f16725e 3 months ago History
1 contributor
607 lines | 16.65kb
package PredictionEngine;

use strict;
use warnings;
use DBI;
use HTTP::Tiny;
use JSON::XS;
use Math::Round;
use Config::Simple;
use Time::Piece;

=head1 NAME

PredictionEngine - AI-powered HDD failure prediction for autoSMART

=head1 DESCRIPTION

This module integrates with OpenAI's API to analyze SMART data trends and predict
HDD failures. It processes historical SMART data, generates feature vectors,
and uses GPT models for intelligent failure prediction.

=head1 SYNOPSIS

    use PredictionEngine;
    
    my $predictor = PredictionEngine->new(
        db_config     => '/path/to/database.conf',
        openai_config => '/path/to/openai.conf'
    );
    
    # Predict failure for specific drive
    my $prediction = $predictor->predict_failure('/dev/sda');
    
    # Analyze all drives
    my $results = $predictor->analyze_all_drives();

=cut

sub new {
    my ($class, %args) = @_;
    
    my $self = {
        db_config     => $args{db_config} || '/etc/autosmart/database.conf',
        openai_config => $args{openai_config} || '/etc/autosmart/openai.conf',
        debug         => $args{debug} || 0,
        db_handle     => undef,
        openai_key    => '',
        model         => 'gpt-4',
        http_client   => HTTP::Tiny->new(timeout => 30),
    };
    
    bless $self, $class;
    $self->_load_config();
    $self->_connect_database();
    
    return $self;
}

=head2 _load_config

Load OpenAI configuration

=cut

sub _load_config {
    my $self = shift;
    
    my $cfg = Config::Simple->new($self->{openai_config})
        or die "Cannot load OpenAI config: $self->{openai_config}";
    
    $self->{openai_key} = $cfg->param('openai.api_key')
        or die "OpenAI API key not configured";
    
    $self->{model}      = $cfg->param('openai.model') || 'gpt-4';
    $self->{max_tokens} = $cfg->param('openai.max_tokens') || 1000;
    $self->{temperature} = $cfg->param('openai.temperature') || 0.3;
    
    $self->_log("OpenAI configuration loaded (model: $self->{model})");
}

=head2 _connect_database

Establish PostgreSQL database connection

=cut

sub _connect_database {
    my $self = shift;
    
    my $cfg = Config::Simple->new($self->{db_config})
        or die "Cannot load database config: $self->{db_config}";
    
    my $dsn = sprintf("DBI:Pg:database=%s;host=%s;port=%s",
        $cfg->param('database.database'),
        $cfg->param('database.host'),
        $cfg->param('database.port')
    );
    
    $self->{db_handle} = DBI->connect(
        $dsn,
        $cfg->param('database.username'),
        $cfg->param('database.password'),
        { 
            RaiseError => 1, 
            AutoCommit => 1,
            pg_enable_utf8 => 1 
        }
    ) or die "Database connection failed: $DBI::errstr";
    
    $self->_log("Database connection established");
}

=head2 get_drive_smart_history

Retrieve SMART data history for a drive

=cut

sub get_drive_smart_history {
    my ($self, $device_path, $days_back) = @_;
    
    $days_back ||= 90;  # Default 3 months
    
    my $sql = q{
        SELECT 
            sr.timestamp,
            sr.temperature,
            sr.parameters_json,
            hi.model_name,
            hi.serial_number,
            hi.size_gb
        FROM smart_readings sr
        JOIN hdd_inventory hi ON sr.device_path = hi.device_path
        WHERE sr.device_path = ?
        AND sr.timestamp >= NOW() - INTERVAL ? DAY
        ORDER BY sr.timestamp ASC
    };
    
    my $sth = $self->{db_handle}->prepare($sql);
    $sth->execute($device_path, $days_back);
    
    my @history = ();
    while (my $row = $sth->fetchrow_hashref()) {
        $row->{parameters} = decode_json($row->{parameters_json});
        delete $row->{parameters_json};
        push @history, $row;
    }
    
    return \@history;
}

=head2 analyze_smart_trends

Analyze SMART parameter trends for patterns

=cut

sub analyze_smart_trends {
    my ($self, $history) = @_;
    
    return {} unless @$history >= 5;  # Need minimum data points
    
    my $trends = {};
    my $critical_params = [
        'Reallocated_Sector_Ct',
        'Spin_Retry_Count', 
        'Reallocated_Event_Count',
        'Current_Pending_Sector',
        'Offline_Uncorrectable',
        'UDMA_CRC_Error_Count',
        'Raw_Read_Error_Rate'
    ];
    
    # Analyze each critical parameter
    foreach my $param_name (@$critical_params) {
        my @values = ();
        my @timestamps = ();
        
        # Extract values for this parameter
        foreach my $reading (@$history) {
            next unless exists $reading->{parameters}->{$param_name};
            
            push @values, $reading->{parameters}->{$param_name}->{raw_value};
            push @timestamps, $reading->{timestamp};
        }
        
        next unless @values >= 3;
        
        # Calculate trend statistics
        my $trend_analysis = $self->_calculate_trend_stats(\@values, \@timestamps);
        
        $trends->{$param_name} = {
            current_value => $values[-1],
            min_value     => $trend_analysis->{min},
            max_value     => $trend_analysis->{max},
            slope         => $trend_analysis->{slope},
            volatility    => $trend_analysis->{volatility},
            data_points   => scalar(@values),
            concerning    => $self->_is_trend_concerning($param_name, $trend_analysis),
        };
    }
    
    # Analyze temperature trends
    my @temperatures = map { $_->{temperature} } @$history;
    if (@temperatures >= 3) {
        my @temp_timestamps = map { $_->{timestamp} } @$history;
        my $temp_stats = $self->_calculate_trend_stats(\@temperatures, \@temp_timestamps);
        
        $trends->{temperature} = {
            current_temp  => $temperatures[-1],
            avg_temp      => $temp_stats->{mean},
            max_temp      => $temp_stats->{max},
            slope         => $temp_stats->{slope},
            concerning    => ($temp_stats->{max} > 60 || $temp_stats->{slope} > 0.1),
        };
    }
    
    return $trends;
}

=head2 _calculate_trend_stats

Calculate statistical metrics for trend analysis

=cut

sub _calculate_trend_stats {
    my ($self, $values, $timestamps) = @_;
    
    return {} unless @$values >= 2;
    
    # Basic statistics
    my $sum = 0;
    my $min = $values->[0];
    my $max = $values->[0];
    
    foreach my $val (@$values) {
        $sum += $val;
        $min = $val if $val < $min;
        $max = $val if $val > $max;
    }
    
    my $mean = $sum / @$values;
    
    # Calculate variance
    my $variance = 0;
    foreach my $val (@$values) {
        $variance += ($val - $mean) ** 2;
    }
    $variance /= (@$values - 1) if @$values > 1;
    
    # Simple linear regression for slope
    my $slope = 0;
    if (@$values >= 2) {
        my $n = @$values;
        my $sum_x = 0;
        my $sum_y = 0;
        my $sum_xy = 0;
        my $sum_x2 = 0;
        
        for my $i (0..$#$values) {
            my $x = $i;  # Use index as x (time progression)
            my $y = $values->[$i];
            
            $sum_x += $x;
            $sum_y += $y;
            $sum_xy += $x * $y;
            $sum_x2 += $x * $x;
        }
        
        my $denominator = $n * $sum_x2 - $sum_x * $sum_x;
        if ($denominator != 0) {
            $slope = ($n * $sum_xy - $sum_x * $sum_y) / $denominator;
        }
    }
    
    return {
        min        => $min,
        max        => $max,
        mean       => $mean,
        variance   => $variance,
        volatility => sqrt($variance),
        slope      => $slope,
    };
}

=head2 _is_trend_concerning

Determine if a SMART parameter trend is concerning

=cut

sub _is_trend_concerning {
    my ($self, $param_name, $stats) = @_;
    
    # Critical parameters that should never increase
    my $critical_increasing = {
        'Reallocated_Sector_Ct'     => 0,
        'Reallocated_Event_Count'   => 0, 
        'Current_Pending_Sector'    => 0,
        'Offline_Uncorrectable'     => 0,
        'Spin_Retry_Count'          => 10,
    };
    
    if (exists $critical_increasing->{$param_name}) {
        my $threshold = $critical_increasing->{$param_name};
        
        return 1 if $stats->{max} > $threshold;
        return 1 if $stats->{slope} > 0.1 && $stats->{max} > 0;
    }
    
    # High volatility is concerning
    return 1 if $stats->{volatility} > ($stats->{mean} * 0.5) && $stats->{mean} > 0;
    
    return 0;
}

=head2 predict_failure

Generate AI-powered failure prediction for a drive

=cut

sub predict_failure {
    my ($self, $device_path, $days_back) = @_;
    
    $days_back ||= 90;
    
    # Get SMART history
    my $history = $self->get_drive_smart_history($device_path, $days_back);
    
    unless (@$history >= 5) {
        return {
            device_path => $device_path,
            prediction  => 'insufficient_data',
            confidence  => 0,
            risk_level  => 'unknown',
            message     => 'Insufficient historical data for prediction'
        };
    }
    
    # Analyze trends
    my $trends = $self->analyze_smart_trends($history);
    
    # Generate AI prompt
    my $prompt = $self->_generate_prediction_prompt($device_path, $history, $trends);
    
    # Call OpenAI API
    my $ai_response = $self->_call_openai_api($prompt);
    
    # Parse and store prediction
    my $prediction = $self->_parse_prediction_response($ai_response, $device_path);
    
    # Store prediction in database
    $self->_store_prediction($prediction);
    
    return $prediction;
}

=head2 _generate_prediction_prompt

Generate detailed prompt for OpenAI API

=cut

sub _generate_prediction_prompt {
    my ($self, $device_path, $history, $trends) = @_;
    
    my $drive_info = $history->[0];  # Basic drive info from first record
    
    my $prompt = "You are an expert HDD failure prediction system analyzing SMART data.\n\n";
    
    $prompt .= "DRIVE INFORMATION:\n";
    $prompt .= "- Device: $device_path\n";
    $prompt .= "- Model: " . ($drive_info->{model_name} || 'Unknown') . "\n";
    $prompt .= "- Serial: " . ($drive_info->{serial_number} || 'Unknown') . "\n";
    $prompt .= "- Size: " . ($drive_info->{size_gb} || 'Unknown') . " GB\n";
    $prompt .= "- Data Points: " . scalar(@$history) . " readings\n\n";
    
    $prompt .= "CRITICAL SMART PARAMETER ANALYSIS:\n";
    
    foreach my $param_name (sort keys %$trends) {
        next if $param_name eq 'temperature';
        
        my $trend = $trends->{$param_name};
        $prompt .= "- $param_name:\n";
        $prompt .= "  * Current: $trend->{current_value}\n";
        $prompt .= "  * Range: $trend->{min_value} - $trend->{max_value}\n";
        $prompt .= "  * Slope: " . sprintf("%.4f", $trend->{slope}) . "\n";
        $prompt .= "  * Volatility: " . sprintf("%.2f", $trend->{volatility}) . "\n";
        $prompt .= "  * Concerning: " . ($trend->{concerning} ? 'YES' : 'No') . "\n";
    }
    
    if (exists $trends->{temperature}) {
        my $temp = $trends->{temperature};
        $prompt .= "\nTEMPERATURE ANALYSIS:\n";
        $prompt .= "- Current: $temp->{current_temp}°C\n";
        $prompt .= "- Average: " . sprintf("%.1f", $temp->{avg_temp}) . "°C\n";
        $prompt .= "- Maximum: $temp->{max_temp}°C\n";
        $prompt .= "- Trend: " . sprintf("%.3f", $temp->{slope}) . "°C per reading\n";
    }
    
    $prompt .= "\nPLEASE ANALYZE THIS DATA AND PROVIDE:\n";
    $prompt .= "1. Overall failure risk assessment (LOW/MODERATE/HIGH/CRITICAL)\n";
    $prompt .= "2. Confidence level (0-100%)\n";  
    $prompt .= "3. Estimated time to failure (if applicable)\n";
    $prompt .= "4. Key concerning indicators\n";
    $prompt .= "5. Recommended actions\n\n";
    
    $prompt .= "Format your response as JSON with fields: risk_level, confidence, time_to_failure_days, concerns, recommendations, reasoning\n";
    
    return $prompt;
}

=head2 _call_openai_api

Make API call to OpenAI

=cut

sub _call_openai_api {
    my ($self, $prompt) = @_;
    
    my $payload = {
        model => $self->{model},
        messages => [
            {
                role => 'system',
                content => 'You are an expert HDD failure prediction system with deep knowledge of SMART parameters and drive reliability patterns.'
            },
            {
                role => 'user', 
                content => $prompt
            }
        ],
        max_tokens => $self->{max_tokens},
        temperature => $self->{temperature},
    };
    
    my $response = $self->{http_client}->post(
        'https://api.openai.com/v1/chat/completions',
        {
            headers => {
                'Authorization' => "Bearer $self->{openai_key}",
                'Content-Type'  => 'application/json',
            },
            content => encode_json($payload)
        }
    );
    
    unless ($response->{success}) {
        die "OpenAI API call failed: $response->{status} $response->{reason}";
    }
    
    my $result = decode_json($response->{content});
    
    return $result->{choices}->[0]->{message}->{content};
}

=head2 _parse_prediction_response

Parse OpenAI response into structured prediction

=cut

sub _parse_prediction_response {
    my ($self, $ai_response, $device_path) = @_;
    
    my $prediction = {
        device_path => $device_path,
        timestamp   => time(),
        prediction  => 'unknown',
        confidence  => 0,
        risk_level  => 'unknown',
        message     => $ai_response,
    };
    
    # Try to parse JSON response
    eval {
        my $parsed = decode_json($ai_response);
        
        $prediction->{risk_level} = lc($parsed->{risk_level}) if $parsed->{risk_level};
        $prediction->{confidence} = $parsed->{confidence} if defined $parsed->{confidence};
        $prediction->{time_to_failure_days} = $parsed->{time_to_failure_days} if $parsed->{time_to_failure_days};
        $prediction->{concerns} = $parsed->{concerns} if $parsed->{concerns};
        $prediction->{recommendations} = $parsed->{recommendations} if $parsed->{recommendations};
        $prediction->{reasoning} = $parsed->{reasoning} if $parsed->{reasoning};
        
        $prediction->{prediction} = 'success';
    };
    
    if ($@) {
        $self->_log("Failed to parse AI response as JSON, using raw text");
        $prediction->{prediction} = 'text_response';
        
        # Try to extract basic info from text
        if ($ai_response =~ /risk.*?:.*?(low|moderate|high|critical)/i) {
            $prediction->{risk_level} = lc($1);
        }
        
        if ($ai_response =~ /confidence.*?:.*?(\d+)/i) {
            $prediction->{confidence} = $1;
        }
    }
    
    return $prediction;
}

=head2 _store_prediction

Store prediction results in database

=cut

sub _store_prediction {
    my ($self, $prediction) = @_;
    
    my $sql = q{
        INSERT INTO predictions 
        (device_path, timestamp, risk_level, confidence, time_to_failure_days,
         concerns, recommendations, reasoning, raw_response)
        VALUES (?, to_timestamp(?), ?, ?, ?, ?, ?, ?, ?)
    };
    
    $self->{db_handle}->do($sql,
        undef,
        $prediction->{device_path},
        $prediction->{timestamp},
        $prediction->{risk_level},
        $prediction->{confidence},
        $prediction->{time_to_failure_days},
        $prediction->{concerns},
        $prediction->{recommendations},
        $prediction->{reasoning},
        $prediction->{message}
    );
}

=head2 analyze_all_drives

Run predictions for all active drives

=cut

sub analyze_all_drives {
    my $self = shift;
    
    my $sql = q{
        SELECT device_path, model_name, serial_number
        FROM hdd_inventory 
        WHERE status = 'active'
        ORDER BY device_path
    };
    
    my $sth = $self->{db_handle}->prepare($sql);
    $sth->execute();
    
    my @results = ();
    
    while (my $row = $sth->fetchrow_hashref()) {
        my $prediction = $self->predict_failure($row->{device_path});
        push @results, $prediction;
        
        # Rate limiting - small delay between API calls
        sleep(1);
    }
    
    return \@results;
}

=head2 _log

Internal logging method

=cut

sub _log {
    my ($self, $message) = @_;
    
    my $timestamp = scalar(localtime());
    print "[$timestamp] PredictionEngine: $message\n" if $self->{debug};
}

=head2 DESTROY

Cleanup database connection

=cut

sub DESTROY {
    my $self = shift;
    $self->{db_handle}->disconnect() if $self->{db_handle};
}

1;

__END__

=head1 AUTHOR

AutoSMART Development Team

=head1 LICENSE

This software is part of the autoSMART project.

=cut