1 contributor
package PredictionEngine;
use strict;
use warnings;
use DBI;
use HTTP::Tiny;
use JSON::XS;
use Math::Round;
use Config::Simple;
use Time::Piece;
=head1 NAME
PredictionEngine - AI-powered HDD failure prediction for autoSMART
=head1 DESCRIPTION
This module integrates with OpenAI's API to analyze SMART data trends and predict
HDD failures. It processes historical SMART data, generates feature vectors,
and uses GPT models for intelligent failure prediction.
=head1 SYNOPSIS
use PredictionEngine;
my $predictor = PredictionEngine->new(
db_config => '/path/to/database.conf',
openai_config => '/path/to/openai.conf'
);
# Predict failure for specific drive
my $prediction = $predictor->predict_failure('/dev/sda');
# Analyze all drives
my $results = $predictor->analyze_all_drives();
=cut
sub new {
my ($class, %args) = @_;
my $self = {
db_config => $args{db_config} || '/etc/autosmart/database.conf',
openai_config => $args{openai_config} || '/etc/autosmart/openai.conf',
debug => $args{debug} || 0,
db_handle => undef,
openai_key => '',
model => 'gpt-4',
http_client => HTTP::Tiny->new(timeout => 30),
};
bless $self, $class;
$self->_load_config();
$self->_connect_database();
return $self;
}
=head2 _load_config
Load OpenAI configuration
=cut
sub _load_config {
my $self = shift;
my $cfg = Config::Simple->new($self->{openai_config})
or die "Cannot load OpenAI config: $self->{openai_config}";
$self->{openai_key} = $cfg->param('openai.api_key')
or die "OpenAI API key not configured";
$self->{model} = $cfg->param('openai.model') || 'gpt-4';
$self->{max_tokens} = $cfg->param('openai.max_tokens') || 1000;
$self->{temperature} = $cfg->param('openai.temperature') || 0.3;
$self->_log("OpenAI configuration loaded (model: $self->{model})");
}
=head2 _connect_database
Establish PostgreSQL database connection
=cut
sub _connect_database {
my $self = shift;
my $cfg = Config::Simple->new($self->{db_config})
or die "Cannot load database config: $self->{db_config}";
my $dsn = sprintf("DBI:Pg:database=%s;host=%s;port=%s",
$cfg->param('database.database'),
$cfg->param('database.host'),
$cfg->param('database.port')
);
$self->{db_handle} = DBI->connect(
$dsn,
$cfg->param('database.username'),
$cfg->param('database.password'),
{
RaiseError => 1,
AutoCommit => 1,
pg_enable_utf8 => 1
}
) or die "Database connection failed: $DBI::errstr";
$self->_log("Database connection established");
}
=head2 get_drive_smart_history
Retrieve SMART data history for a drive
=cut
sub get_drive_smart_history {
my ($self, $device_path, $days_back) = @_;
$days_back ||= 90; # Default 3 months
my $sql = q{
SELECT
sr.timestamp,
sr.temperature,
sr.parameters_json,
hi.model_name,
hi.serial_number,
hi.size_gb
FROM smart_readings sr
JOIN hdd_inventory hi ON sr.device_path = hi.device_path
WHERE sr.device_path = ?
AND sr.timestamp >= NOW() - INTERVAL ? DAY
ORDER BY sr.timestamp ASC
};
my $sth = $self->{db_handle}->prepare($sql);
$sth->execute($device_path, $days_back);
my @history = ();
while (my $row = $sth->fetchrow_hashref()) {
$row->{parameters} = decode_json($row->{parameters_json});
delete $row->{parameters_json};
push @history, $row;
}
return \@history;
}
=head2 analyze_smart_trends
Analyze SMART parameter trends for patterns
=cut
sub analyze_smart_trends {
my ($self, $history) = @_;
return {} unless @$history >= 5; # Need minimum data points
my $trends = {};
my $critical_params = [
'Reallocated_Sector_Ct',
'Spin_Retry_Count',
'Reallocated_Event_Count',
'Current_Pending_Sector',
'Offline_Uncorrectable',
'UDMA_CRC_Error_Count',
'Raw_Read_Error_Rate'
];
# Analyze each critical parameter
foreach my $param_name (@$critical_params) {
my @values = ();
my @timestamps = ();
# Extract values for this parameter
foreach my $reading (@$history) {
next unless exists $reading->{parameters}->{$param_name};
push @values, $reading->{parameters}->{$param_name}->{raw_value};
push @timestamps, $reading->{timestamp};
}
next unless @values >= 3;
# Calculate trend statistics
my $trend_analysis = $self->_calculate_trend_stats(\@values, \@timestamps);
$trends->{$param_name} = {
current_value => $values[-1],
min_value => $trend_analysis->{min},
max_value => $trend_analysis->{max},
slope => $trend_analysis->{slope},
volatility => $trend_analysis->{volatility},
data_points => scalar(@values),
concerning => $self->_is_trend_concerning($param_name, $trend_analysis),
};
}
# Analyze temperature trends
my @temperatures = map { $_->{temperature} } @$history;
if (@temperatures >= 3) {
my @temp_timestamps = map { $_->{timestamp} } @$history;
my $temp_stats = $self->_calculate_trend_stats(\@temperatures, \@temp_timestamps);
$trends->{temperature} = {
current_temp => $temperatures[-1],
avg_temp => $temp_stats->{mean},
max_temp => $temp_stats->{max},
slope => $temp_stats->{slope},
concerning => ($temp_stats->{max} > 60 || $temp_stats->{slope} > 0.1),
};
}
return $trends;
}
=head2 _calculate_trend_stats
Calculate statistical metrics for trend analysis
=cut
sub _calculate_trend_stats {
my ($self, $values, $timestamps) = @_;
return {} unless @$values >= 2;
# Basic statistics
my $sum = 0;
my $min = $values->[0];
my $max = $values->[0];
foreach my $val (@$values) {
$sum += $val;
$min = $val if $val < $min;
$max = $val if $val > $max;
}
my $mean = $sum / @$values;
# Calculate variance
my $variance = 0;
foreach my $val (@$values) {
$variance += ($val - $mean) ** 2;
}
$variance /= (@$values - 1) if @$values > 1;
# Simple linear regression for slope
my $slope = 0;
if (@$values >= 2) {
my $n = @$values;
my $sum_x = 0;
my $sum_y = 0;
my $sum_xy = 0;
my $sum_x2 = 0;
for my $i (0..$#$values) {
my $x = $i; # Use index as x (time progression)
my $y = $values->[$i];
$sum_x += $x;
$sum_y += $y;
$sum_xy += $x * $y;
$sum_x2 += $x * $x;
}
my $denominator = $n * $sum_x2 - $sum_x * $sum_x;
if ($denominator != 0) {
$slope = ($n * $sum_xy - $sum_x * $sum_y) / $denominator;
}
}
return {
min => $min,
max => $max,
mean => $mean,
variance => $variance,
volatility => sqrt($variance),
slope => $slope,
};
}
=head2 _is_trend_concerning
Determine if a SMART parameter trend is concerning
=cut
sub _is_trend_concerning {
my ($self, $param_name, $stats) = @_;
# Critical parameters that should never increase
my $critical_increasing = {
'Reallocated_Sector_Ct' => 0,
'Reallocated_Event_Count' => 0,
'Current_Pending_Sector' => 0,
'Offline_Uncorrectable' => 0,
'Spin_Retry_Count' => 10,
};
if (exists $critical_increasing->{$param_name}) {
my $threshold = $critical_increasing->{$param_name};
return 1 if $stats->{max} > $threshold;
return 1 if $stats->{slope} > 0.1 && $stats->{max} > 0;
}
# High volatility is concerning
return 1 if $stats->{volatility} > ($stats->{mean} * 0.5) && $stats->{mean} > 0;
return 0;
}
=head2 predict_failure
Generate AI-powered failure prediction for a drive
=cut
sub predict_failure {
my ($self, $device_path, $days_back) = @_;
$days_back ||= 90;
# Get SMART history
my $history = $self->get_drive_smart_history($device_path, $days_back);
unless (@$history >= 5) {
return {
device_path => $device_path,
prediction => 'insufficient_data',
confidence => 0,
risk_level => 'unknown',
message => 'Insufficient historical data for prediction'
};
}
# Analyze trends
my $trends = $self->analyze_smart_trends($history);
# Generate AI prompt
my $prompt = $self->_generate_prediction_prompt($device_path, $history, $trends);
# Call OpenAI API
my $ai_response = $self->_call_openai_api($prompt);
# Parse and store prediction
my $prediction = $self->_parse_prediction_response($ai_response, $device_path);
# Store prediction in database
$self->_store_prediction($prediction);
return $prediction;
}
=head2 _generate_prediction_prompt
Generate detailed prompt for OpenAI API
=cut
sub _generate_prediction_prompt {
my ($self, $device_path, $history, $trends) = @_;
my $drive_info = $history->[0]; # Basic drive info from first record
my $prompt = "You are an expert HDD failure prediction system analyzing SMART data.\n\n";
$prompt .= "DRIVE INFORMATION:\n";
$prompt .= "- Device: $device_path\n";
$prompt .= "- Model: " . ($drive_info->{model_name} || 'Unknown') . "\n";
$prompt .= "- Serial: " . ($drive_info->{serial_number} || 'Unknown') . "\n";
$prompt .= "- Size: " . ($drive_info->{size_gb} || 'Unknown') . " GB\n";
$prompt .= "- Data Points: " . scalar(@$history) . " readings\n\n";
$prompt .= "CRITICAL SMART PARAMETER ANALYSIS:\n";
foreach my $param_name (sort keys %$trends) {
next if $param_name eq 'temperature';
my $trend = $trends->{$param_name};
$prompt .= "- $param_name:\n";
$prompt .= " * Current: $trend->{current_value}\n";
$prompt .= " * Range: $trend->{min_value} - $trend->{max_value}\n";
$prompt .= " * Slope: " . sprintf("%.4f", $trend->{slope}) . "\n";
$prompt .= " * Volatility: " . sprintf("%.2f", $trend->{volatility}) . "\n";
$prompt .= " * Concerning: " . ($trend->{concerning} ? 'YES' : 'No') . "\n";
}
if (exists $trends->{temperature}) {
my $temp = $trends->{temperature};
$prompt .= "\nTEMPERATURE ANALYSIS:\n";
$prompt .= "- Current: $temp->{current_temp}°C\n";
$prompt .= "- Average: " . sprintf("%.1f", $temp->{avg_temp}) . "°C\n";
$prompt .= "- Maximum: $temp->{max_temp}°C\n";
$prompt .= "- Trend: " . sprintf("%.3f", $temp->{slope}) . "°C per reading\n";
}
$prompt .= "\nPLEASE ANALYZE THIS DATA AND PROVIDE:\n";
$prompt .= "1. Overall failure risk assessment (LOW/MODERATE/HIGH/CRITICAL)\n";
$prompt .= "2. Confidence level (0-100%)\n";
$prompt .= "3. Estimated time to failure (if applicable)\n";
$prompt .= "4. Key concerning indicators\n";
$prompt .= "5. Recommended actions\n\n";
$prompt .= "Format your response as JSON with fields: risk_level, confidence, time_to_failure_days, concerns, recommendations, reasoning\n";
return $prompt;
}
=head2 _call_openai_api
Make API call to OpenAI
=cut
sub _call_openai_api {
my ($self, $prompt) = @_;
my $payload = {
model => $self->{model},
messages => [
{
role => 'system',
content => 'You are an expert HDD failure prediction system with deep knowledge of SMART parameters and drive reliability patterns.'
},
{
role => 'user',
content => $prompt
}
],
max_tokens => $self->{max_tokens},
temperature => $self->{temperature},
};
my $response = $self->{http_client}->post(
'https://api.openai.com/v1/chat/completions',
{
headers => {
'Authorization' => "Bearer $self->{openai_key}",
'Content-Type' => 'application/json',
},
content => encode_json($payload)
}
);
unless ($response->{success}) {
die "OpenAI API call failed: $response->{status} $response->{reason}";
}
my $result = decode_json($response->{content});
return $result->{choices}->[0]->{message}->{content};
}
=head2 _parse_prediction_response
Parse OpenAI response into structured prediction
=cut
sub _parse_prediction_response {
my ($self, $ai_response, $device_path) = @_;
my $prediction = {
device_path => $device_path,
timestamp => time(),
prediction => 'unknown',
confidence => 0,
risk_level => 'unknown',
message => $ai_response,
};
# Try to parse JSON response
eval {
my $parsed = decode_json($ai_response);
$prediction->{risk_level} = lc($parsed->{risk_level}) if $parsed->{risk_level};
$prediction->{confidence} = $parsed->{confidence} if defined $parsed->{confidence};
$prediction->{time_to_failure_days} = $parsed->{time_to_failure_days} if $parsed->{time_to_failure_days};
$prediction->{concerns} = $parsed->{concerns} if $parsed->{concerns};
$prediction->{recommendations} = $parsed->{recommendations} if $parsed->{recommendations};
$prediction->{reasoning} = $parsed->{reasoning} if $parsed->{reasoning};
$prediction->{prediction} = 'success';
};
if ($@) {
$self->_log("Failed to parse AI response as JSON, using raw text");
$prediction->{prediction} = 'text_response';
# Try to extract basic info from text
if ($ai_response =~ /risk.*?:.*?(low|moderate|high|critical)/i) {
$prediction->{risk_level} = lc($1);
}
if ($ai_response =~ /confidence.*?:.*?(\d+)/i) {
$prediction->{confidence} = $1;
}
}
return $prediction;
}
=head2 _store_prediction
Store prediction results in database
=cut
sub _store_prediction {
my ($self, $prediction) = @_;
my $sql = q{
INSERT INTO predictions
(device_path, timestamp, risk_level, confidence, time_to_failure_days,
concerns, recommendations, reasoning, raw_response)
VALUES (?, to_timestamp(?), ?, ?, ?, ?, ?, ?, ?)
};
$self->{db_handle}->do($sql,
undef,
$prediction->{device_path},
$prediction->{timestamp},
$prediction->{risk_level},
$prediction->{confidence},
$prediction->{time_to_failure_days},
$prediction->{concerns},
$prediction->{recommendations},
$prediction->{reasoning},
$prediction->{message}
);
}
=head2 analyze_all_drives
Run predictions for all active drives
=cut
sub analyze_all_drives {
my $self = shift;
my $sql = q{
SELECT device_path, model_name, serial_number
FROM hdd_inventory
WHERE status = 'active'
ORDER BY device_path
};
my $sth = $self->{db_handle}->prepare($sql);
$sth->execute();
my @results = ();
while (my $row = $sth->fetchrow_hashref()) {
my $prediction = $self->predict_failure($row->{device_path});
push @results, $prediction;
# Rate limiting - small delay between API calls
sleep(1);
}
return \@results;
}
=head2 _log
Internal logging method
=cut
sub _log {
my ($self, $message) = @_;
my $timestamp = scalar(localtime());
print "[$timestamp] PredictionEngine: $message\n" if $self->{debug};
}
=head2 DESTROY
Cleanup database connection
=cut
sub DESTROY {
my $self = shift;
$self->{db_handle}->disconnect() if $self->{db_handle};
}
1;
__END__
=head1 AUTHOR
AutoSMART Development Team
=head1 LICENSE
This software is part of the autoSMART project.
=cut