#!/usr/bin/perl use strict; use warnings; use FindBin qw($Bin); use lib "$Bin/../lib"; use PredictionEngine; use Getopt::Long; use JSON::XS; use POSIX qw(strftime); =head1 NAME autosmart-predictor.pl - AI-powered HDD failure prediction for autoSMART =head1 SYNOPSIS autosmart-predictor.pl [OPTIONS] =head1 OPTIONS --config-dir DIR Configuration directory (default: /etc/autosmart) --device PATH Analyze specific device only --all Analyze all active drives --days-back N Days of history to analyze (default: 90) --output FORMAT Output format: text, json, csv (default: text) --risk-level LEVEL Show only drives with risk >= LEVEL (low, moderate, high, critical) --quiet Quiet mode - only output results --debug Enable debug logging --help Show this help =head1 DESCRIPTION This script uses AI (OpenAI GPT) to analyze SMART data trends and predict HDD failures. It processes historical SMART data stored by the collector and generates intelligent predictions with confidence levels. =cut # Configuration my $config_dir = '/etc/autosmart'; my $specific_device = ''; my $analyze_all = 0; my $days_back = 90; my $output_format = 'text'; my $min_risk_level = ''; my $quiet = 0; my $debug = 0; my $help = 0; GetOptions( 'config-dir=s' => \$config_dir, 'device=s' => \$specific_device, 'all' => \$analyze_all, 'days-back=i' => \$days_back, 'output=s' => \$output_format, 'risk-level=s' => \$min_risk_level, 'quiet' => \$quiet, 'debug' => \$debug, 'help' => \$help, ) or die "Error parsing command line arguments\n"; if ($help) { print_help(); exit 0; } # Validate options unless ($specific_device || $analyze_all) { die "Must specify either --device PATH or --all\n"; } if ($specific_device && $analyze_all) { die "Cannot specify both --device and --all\n"; } unless ($output_format =~ /^(text|json|csv)$/) { die "Invalid output format: $output_format (must be text, json, or csv)\n"; } if ($min_risk_level && $min_risk_level !~ /^(low|moderate|high|critical)$/) { die "Invalid risk level: $min_risk_level (must be low, moderate, high, or critical)\n"; } # Validate configuration directory unless (-d $config_dir) { die "Configuration directory not found: $config_dir\n"; } my $db_config = "$config_dir/database.conf"; my $openai_config = "$config_dir/openai.conf"; unless (-f $db_config && -f $openai_config) { die "Required configuration files not found in $config_dir\n"; } # Initialize prediction engine my $predictor = PredictionEngine->new( db_config => $db_config, openai_config => $openai_config, debug => $debug, ); log_message("autoSMART predictor starting...") unless $quiet; my @predictions = (); if ($specific_device) { # Analyze specific device log_message("Analyzing device: $specific_device") unless $quiet; my $prediction = $predictor->predict_failure($specific_device, $days_back); push @predictions, $prediction; } elsif ($analyze_all) { # Analyze all active drives log_message("Analyzing all active drives...") unless $quiet; @predictions = @{$predictor->analyze_all_drives()}; } # Filter predictions by minimum risk level if specified if ($min_risk_level) { @predictions = filter_by_risk_level(\@predictions, $min_risk_level); } # Output results output_predictions(\@predictions, $output_format); log_message("Analysis complete") unless $quiet; =head2 filter_by_risk_level Filter predictions by minimum risk level =cut sub filter_by_risk_level { my ($predictions, $min_level) = @_; my %risk_order = ( 'low' => 1, 'moderate' => 2, 'high' => 3, 'critical' => 4, ); my $min_order = $risk_order{$min_level} || 1; return grep { exists $risk_order{$_->{risk_level}} && $risk_order{$_->{risk_level}} >= $min_order } @$predictions; } =head2 output_predictions Output predictions in specified format =cut sub output_predictions { my ($predictions, $format) = @_; if ($format eq 'json') { output_json($predictions); } elsif ($format eq 'csv') { output_csv($predictions); } else { output_text($predictions); } } =head2 output_json Output predictions as JSON =cut sub output_json { my $predictions = shift; my $json = JSON::XS->new->pretty->encode({ timestamp => time(), predictions => $predictions, }); print $json; } =head2 output_csv Output predictions as CSV =cut sub output_csv { my $predictions = shift; # CSV header print "device_path,timestamp,risk_level,confidence,time_to_failure_days,concerns,recommendations\n"; foreach my $pred (@$predictions) { my @fields = ( $pred->{device_path} || '', $pred->{timestamp} || '', $pred->{risk_level} || '', $pred->{confidence} || '', $pred->{time_to_failure_days} || '', escape_csv($pred->{concerns} || ''), escape_csv($pred->{recommendations} || ''), ); print join(',', @fields) . "\n"; } } =head2 output_text Output predictions as human-readable text =cut sub output_text { my $predictions = shift; unless (@$predictions) { print "No predictions available.\n"; return; } print "\n" . "="x80 . "\n"; print "autoSMART HDD Failure Prediction Report\n"; print "Generated: " . strftime("%Y-%m-%d %H:%M:%S", localtime()) . "\n"; print "="x80 . "\n\n"; foreach my $pred (@$predictions) { print_prediction_text($pred); print "-"x80 . "\n"; } # Summary statistics my %risk_counts = (); my $total_confidence = 0; my $confidence_count = 0; foreach my $pred (@$predictions) { $risk_counts{$pred->{risk_level} || 'unknown'}++; if (defined $pred->{confidence} && $pred->{confidence} > 0) { $total_confidence += $pred->{confidence}; $confidence_count++; } } print "\nSUMMARY:\n"; print "Total drives analyzed: " . scalar(@$predictions) . "\n"; foreach my $level (qw(critical high moderate low unknown)) { next unless $risk_counts{$level}; print sprintf("%-10s risk: %d drives\n", ucfirst($level), $risk_counts{$level}); } if ($confidence_count > 0) { my $avg_confidence = $total_confidence / $confidence_count; print sprintf("Average confidence: %.1f%%\n", $avg_confidence); } print "\n"; } =head2 print_prediction_text Print a single prediction in text format =cut sub print_prediction_text { my $pred = shift; print "DEVICE: $pred->{device_path}\n"; if ($pred->{prediction} eq 'insufficient_data') { print "STATUS: Insufficient data for analysis\n"; print "MESSAGE: $pred->{message}\n"; return; } print "RISK LEVEL: " . format_risk_level($pred->{risk_level}) . "\n"; if (defined $pred->{confidence}) { print "CONFIDENCE: $pred->{confidence}%\n"; } if (defined $pred->{time_to_failure_days} && $pred->{time_to_failure_days} > 0) { print "ESTIMATED TIME TO FAILURE: $pred->{time_to_failure_days} days\n"; } if ($pred->{concerns}) { print "CONCERNS:\n"; print format_text_block($pred->{concerns}, " "); } if ($pred->{recommendations}) { print "RECOMMENDATIONS:\n"; print format_text_block($pred->{recommendations}, " "); } if ($pred->{reasoning} && $debug) { print "AI REASONING:\n"; print format_text_block($pred->{reasoning}, " "); } my $timestamp = strftime("%Y-%m-%d %H:%M:%S", localtime($pred->{timestamp})); print "ANALYZED: $timestamp\n"; print "\n"; } =head2 format_risk_level Format risk level with color coding (if terminal supports it) =cut sub format_risk_level { my $level = shift || 'unknown'; # Simple color codes (won't work in all terminals) my %colors = ( 'critical' => "\033[1;31m", # Bold red 'high' => "\033[0;31m", # Red 'moderate' => "\033[0;33m", # Yellow 'low' => "\033[0;32m", # Green 'unknown' => "\033[0;37m", # Gray ); my $reset = "\033[0m"; # Only use colors if output is to terminal if (-t STDOUT) { return ($colors{$level} || '') . uc($level) . $reset; } else { return uc($level); } } =head2 format_text_block Format multi-line text with indentation =cut sub format_text_block { my ($text, $indent) = @_; return '' unless $text; $indent ||= ''; my @lines = split /\n/, $text; return join("\n", map { "$indent$_" } @lines) . "\n"; } =head2 escape_csv Escape CSV field content =cut sub escape_csv { my $field = shift || ''; # Escape quotes and wrap in quotes if contains comma/quote/newline if ($field =~ /[",\n]/) { $field =~ s/"/""/g; $field = "\"$field\""; } return $field; } =head2 log_message Log message with timestamp =cut sub log_message { my $message = shift; my $timestamp = strftime("%Y-%m-%d %H:%M:%S", localtime()); print STDERR "[$timestamp] autosmart-predictor: $message\n"; } =head2 print_help Display help information =cut sub print_help { print <<'EOF'; autoSMART AI Predictor v1.0 USAGE: autosmart-predictor.pl [OPTIONS] OPTIONS: --config-dir DIR Configuration directory (default: /etc/autosmart) --device PATH Analyze specific device only (e.g., /dev/sda) --all Analyze all active drives in inventory --days-back N Days of SMART history to analyze (default: 90) --output FORMAT Output format: text, json, csv (default: text) --risk-level LEVEL Show only drives with risk >= LEVEL (low, moderate, high, critical) --quiet Quiet mode - suppress status messages --debug Enable debug logging and show AI reasoning --help Show this help message EXAMPLES: # Analyze specific drive autosmart-predictor.pl --device /dev/sda # Analyze all drives autosmart-predictor.pl --all # Analyze with 30 days of history autosmart-predictor.pl --all --days-back 30 # Show only high/critical risk drives autosmart-predictor.pl --all --risk-level high # Output as JSON autosmart-predictor.pl --all --output json # Quiet CSV output for scripts autosmart-predictor.pl --all --output csv --quiet RISK LEVELS: LOW No immediate concerns detected MODERATE Some parameters showing minor degradation HIGH Multiple concerning trends detected CRITICAL Immediate action required - failure likely soon OUTPUT FORMATS: text Human-readable report (default) json Machine-readable JSON format csv Comma-separated values for spreadsheets CONFIGURATION: Required configuration files in /etc/autosmart/: - database.conf PostgreSQL connection settings - openai.conf OpenAI API configuration The predictor requires historical SMART data collected by autosmart-collector.pl to generate meaningful predictions. AI INTEGRATION: This tool uses OpenAI's GPT models to analyze SMART parameter trends and generate intelligent failure predictions. Ensure your OpenAI API key is properly configured in openai.conf. EXIT CODES: 0 Success 1 Error (configuration, API failure, etc.) EOF } __END__ =head1 AUTHOR AutoSMART Development Team =head1 LICENSE This software is part of the autoSMART project. =cut