Madagascar / projects / autoSMART / scripts / smart-collector-daemon.pl
f16725e 3 months ago History
1 contributor
384 lines | 14.134kb
#!/usr/bin/perl
use strict;
use warnings;
use DBI;
use JSON;
use File::Slurp;
use Getopt::Long;
use POSIX qw(strftime);
use Time::HiRes qw(sleep);

# autoSMART Collector Daemon
# Version: 1.0
# Description: Automated SMART data collection daemon

my $config_file;
my $debug = (defined $ENV{AUTOSMART_DEBUG} && $ENV{AUTOSMART_DEBUG} eq 'true') ? 1 : 0;
my $foreground = 0;

GetOptions(
    'config=s' => \$config_file,
    'debug'    => \$debug,
    'foreground' => \$foreground
) or die "Usage: $0 --config <file> [--debug] [--foreground]\n";

if (defined $ENV{AUTOSMART_DEBUG}) {
    if ($ENV{AUTOSMART_DEBUG} eq 'true') {
        $debug = 1;
        log_message("AUTOSMART_DEBUG enabled via /etc/default/autonas or environment");
    } else {
        $debug = 0;
        log_message("AUTOSMART_DEBUG disabled via /etc/default/autonas or environment");
    }
}

die "Configuration file required\n" unless $config_file;
die "Configuration file not found: $config_file\n" unless -f $config_file;

# Load configuration
my $config = load_config($config_file);
my $node_id = $config->{node}{id} || `hostname -s`;
chomp $node_id;

log_message("Starting autoSMART collector daemon on node: $node_id");
log_message("Configuration loaded from: $config_file");

# Main collection loop
my $last_full_scan = 0;
my $scan_interval = $config->{node}{scan_interval} || 300;
my $full_scan_interval = $config->{collection}{full_scan_interval} || 3600;

while (1) {
    eval {
        my $current_time = time();
        my $force_full = ($current_time - $last_full_scan) >= $full_scan_interval;
        
        if ($force_full) {
            log_message("Performing full SMART scan (forced)");
            $last_full_scan = $current_time;
        }
        
        collect_smart_data($force_full);
        
    };
    
    if ($@) {
        log_message("ERROR: Collection failed: $@");
    }
    
    log_message("Sleeping for $scan_interval seconds...") if $debug;
    sleep($scan_interval);
}

sub collect_smart_data {
    my ($force_full) = @_;
    
    log_message("[DEBUG] Starting data collection cycle, force_full=" . ($force_full ? 'true' : 'false')) if $debug;
    
    # Connect to database
    my $dsn = "DBI:Pg:host=$config->{database}{host};dbname=$config->{database}{database}";
    log_message("[DEBUG] Connecting to database: $dsn") if $debug;
    
    my $dbh = DBI->connect($dsn, $config->{database}{user}, $config->{database}{password}, 
                          {RaiseError => 1, AutoCommit => 1}) 
        or die "Database connection failed: $DBI::errstr";
    
    log_message("✓ Database connected") if $debug;
    
    # Test database connectivity
    if ($debug) {
        eval {
            my $sth = $dbh->prepare("SELECT COUNT(*) FROM hdd_inventory");
            $sth->execute();
            my ($count) = $sth->fetchrow_array();
            log_message("[DEBUG] Database test: found $count HDDs in inventory");
            
            $sth = $dbh->prepare("SELECT COUNT(*) FROM hdd_presence WHERE is_current = TRUE");
            $sth->execute();
            my ($presence_count) = $sth->fetchrow_array();
            log_message("[DEBUG] Database test: found $presence_count current HDD presence records");
        };
        if ($@) {
            log_message("[DEBUG] Database test failed: $@");
        }
    }
    
    # Scan for devices
    my @devices = glob('/dev/sd?');
    push @devices, glob('/dev/nvme?n?');
    
    log_message("[DEBUG] Found " . scalar(@devices) . " potential devices: " . join(', ', @devices)) if $debug;
    
    foreach my $device (@devices) {
        if (-b $device) {
            log_message("[DEBUG] Processing block device: $device") if $debug;
        } else {
            log_message("[DEBUG] Skipping non-block device: $device") if $debug;
            next;
        }
        
        eval {
            process_device($dbh, $device, $force_full);
        };
        
        if ($@) {
            log_message("ERROR processing device $device: $@");
        }
    }
    
    $dbh->disconnect();
    log_message("Collection cycle complete") if $debug;
}

sub process_device {
    my ($dbh, $device, $force_full) = @_;
    
    log_message("[DEBUG] process_device: Processing $device") if $debug;
    
    # Get SMART data
    my $smartctl_cmd = "smartctl -A -i -H $device 2>&1";
    log_message("[DEBUG] Running: $smartctl_cmd") if $debug;
    my @smart_output = `$smartctl_cmd`;
    my $exit_code = $? >> 8;
    
    if (!@smart_output) {
        log_message("[DEBUG] No SMART output for $device") if $debug;
        return;
    }
    
    log_message("[DEBUG] Got " . scalar(@smart_output) . " lines of SMART output from $device (exit code: $exit_code)") if $debug;
    
    # Check if smartctl indicates the device doesn't support SMART
    my $smart_output_text = join('', @smart_output);
    if ($smart_output_text =~ /SMART support is.*Unavailable|Device does not support SMART|No such device/) {
        log_message("[DEBUG] Device $device does not support SMART or is not accessible") if $debug;
        return;
    }
    
    my ($model, $serial, $temp, %smart_params);
    
    foreach my $line (@smart_output) {
        chomp $line;
        
        if ($line =~ /Device Model:\s+(.+)/) {
            $model = $1;
            log_message("[DEBUG] Found model: $model") if $debug;
        } elsif ($line =~ /Serial Number:\s+(.+)/) {
            $serial = $1;
            log_message("[DEBUG] Found serial: $serial") if $debug;
        } elsif ($line =~ /^\s*(\d+)\s+(.+?)\s+0x\w+\s+\d+\s+\d+\s+\d+\s+\w+\s+\w+\s+\w+\s+(\d+)/) {
            # Old format: ID ATTRIBUTE_NAME 0xXXXX DDD DDD DDD Pre-fail Always - RAW_VALUE
            my ($id, $name, $raw) = ($1, $2, $3);
            $name =~ s/\s+/_/g;
            $smart_params{$name} = $raw;
            
            if ($debug && scalar(keys %smart_params) <= 5) {
                log_message("[DEBUG] SMART param (old format): $name = $raw");
            }
            
            if ($name =~ /Temperature|Temp/i) {
                $temp = $raw if (!defined $temp || $raw > 0);
            }
        } elsif ($line =~ /^\s*(\d+)\s+(.+?)\s+0x\w+\s+\d+\s+\d+\s+\d+\s+\S+\s+\S+\s+\S+\s+(\d+)/) {
            # New format: ID ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
            my ($id, $name, $raw) = ($1, $2, $3);
            $name =~ s/\s+/_/g;
            $smart_params{$name} = $raw;
            
            if ($debug && scalar(keys %smart_params) <= 5) {
                log_message("[DEBUG] SMART param (new format): $name = $raw");
            }
            
            if ($name =~ /Temperature|Temp/i) {
                $temp = $raw if (!defined $temp || $raw > 0);
            }
        }
    }
    
    if (!$model || !$serial) {
        log_message("[DEBUG] Missing critical data for $device - model: " . ($model || 'NULL') . ", serial: " . ($serial || 'NULL')) if $debug;
        return;
    }
    
    if (!%smart_params) {
        log_message("[DEBUG] No SMART parameters found for $device") if $debug;
        return;
    }
    
    log_message("[DEBUG] Parsed device data - Model: $model, Serial: $serial, Temperature: " . ($temp || 'NULL') . ", Parameters: " . scalar(keys %smart_params)) if $debug;
    
    return unless ($model && $serial && %smart_params);
    
    log_message("Processing: $model ($serial) @ $device") if $debug;
    
    # Get or create HDD inventory entry
    my $hdd_id = get_or_create_hdd($dbh, $serial, $model, $device);
    
    # Check if we should store this reading
    my $params_json = encode_json(\%smart_params);
    
    if (!$force_full && !$config->{node}{store_unchanged}) {
        # Check for recent identical reading
        my $sth = $dbh->prepare("
            SELECT id FROM smart_readings 
            WHERE hdd_id = ? AND parameters_json = ? 
            AND timestamp > NOW() - INTERVAL '1 hour'
            LIMIT 1
        ");
        $sth->execute($hdd_id, $params_json);
        
        if ($sth->fetchrow_array()) {
            log_message("  Skipping unchanged parameters") if $debug;
            return;
        }
    }
    
    # Store SMART reading
    my $reading_type = $force_full ? 'full' : 'differential';
    
    my $sth = $dbh->prepare("
        INSERT INTO smart_readings (hdd_id, serial_number, device_path, node_id, timestamp, temperature, parameters_json, reading_type)
        VALUES (?, ?, ?, ?, NOW(), ?, ?::jsonb, ?)
        RETURNING id
    ");
    
    my $reading_id = $dbh->selectrow_array($sth, undef, $hdd_id, $serial, $device, $node_id, $temp || 0, $params_json, $reading_type);
    
    log_message("  ✓ SMART reading stored (ID: $reading_id, temp: " . ($temp || 0) . "°C, type: $reading_type)") if $debug;
}

sub get_or_create_hdd {
    my ($dbh, $serial, $model, $device_path) = @_;
    
    log_message("[DEBUG] get_or_create_hdd: serial=$serial, model=$model, device=$device_path, node=$node_id") if $debug;
    
    # Check if HDD exists
    my $sth = $dbh->prepare("SELECT id FROM hdd_inventory WHERE serial_number = ?");
    $sth->execute($serial);
    my ($hdd_id) = $sth->fetchrow_array();
    
    log_message("[DEBUG] HDD lookup result: hdd_id=" . ($hdd_id || 'NULL') . " for serial=$serial") if $debug;
    
    if ($hdd_id) {
        log_message("[DEBUG] Found existing HDD with id=$hdd_id, updating location and presence") if $debug;
        
        # Update current location in inventory
        $dbh->do("UPDATE hdd_inventory SET current_device_path = ?, current_node_id = ?, last_seen = NOW() 
                  WHERE id = ?", undef, $device_path, $node_id, $hdd_id);
        log_message("[DEBUG] Updated hdd_inventory location for hdd_id=$hdd_id") if $debug;

        # Mark all previous hdd_presence as historic for this serial
        my $affected_rows = $dbh->do("UPDATE hdd_presence SET is_current = FALSE WHERE serial_number = ? AND is_current = TRUE AND node <> ?", undef, $serial, $node_id);
        log_message("[DEBUG] Marked $affected_rows historic hdd_presence records for serial=$serial") if $debug;

        # Check if there is already a current presence for this serial/node
        my $sth2 = $dbh->prepare("SELECT id FROM hdd_presence WHERE serial_number = ? AND node = ? AND is_current = TRUE");
        $sth2->execute($serial, $node_id);
        my ($presence_id) = $sth2->fetchrow_array();
        
        if ($presence_id) {
            log_message("[DEBUG] Found existing presence record id=$presence_id, updating data_end") if $debug;
            # Update data_end
            $dbh->do("UPDATE hdd_presence SET data_end = NOW() WHERE id = ?", undef, $presence_id);
            log_message("[DEBUG] Updated data_end for presence_id=$presence_id") if $debug;
        } else {
            log_message("[DEBUG] No existing presence for serial=$serial node=$node_id, creating new record") if $debug;
            # Create new presence record
            $dbh->do("UPDATE hdd_presence SET is_current = FALSE WHERE serial_number = ? AND is_current = TRUE", undef, $serial);
            $sth2 = $dbh->prepare("INSERT INTO hdd_presence (serial_number, node, data_start, data_end, is_current) VALUES (?, ?, NOW(), NOW(), TRUE)");
            $sth2->execute($serial, $node_id);
            my $new_presence_id = $dbh->last_insert_id(undef, undef, 'hdd_presence', undef);
            log_message("[DEBUG] Created new hdd_presence record with id=$new_presence_id for serial=$serial node=$node_id") if $debug;
        }
        return $hdd_id;
    }
    # Create new HDD entry
    log_message("[DEBUG] Creating new HDD entry for serial=$serial model=$model") if $debug;
    $sth = $dbh->prepare("
        INSERT INTO hdd_inventory (serial_number, model_name, current_device_path, current_node_id, 
                                   first_seen, last_seen)
        VALUES (?, ?, ?, ?, NOW(), NOW())
        RETURNING id
    ");
    my $new_id = $dbh->selectrow_array($sth, undef, $serial, $model, $device_path, $node_id);
    log_message("[DEBUG] Created new HDD inventory entry with id=$new_id") if $debug;
    
    # Mark all previous hdd_presence as historic for this serial
    my $affected_rows = $dbh->do("UPDATE hdd_presence SET is_current = FALSE WHERE serial_number = ? AND is_current = TRUE", undef, $serial);
    log_message("[DEBUG] Marked $affected_rows historic hdd_presence records for new serial=$serial") if $debug;
    
    # Create new presence record
    my $sth2 = $dbh->prepare("INSERT INTO hdd_presence (serial_number, node, data_start, data_end, is_current) VALUES (?, ?, NOW(), NOW(), TRUE)");
    $sth2->execute($serial, $node_id);
    my $new_presence_id = $dbh->last_insert_id(undef, undef, 'hdd_presence', undef);
    log_message("[DEBUG] Created new hdd_presence record with id=$new_presence_id for new serial=$serial node=$node_id") if $debug;
    
    return $new_id;
}

sub load_config {
    my ($file) = @_;
    
    my $content = read_file($file);
    my %config;
    
    # Simple YAML-like parser
    my $current_section;
    foreach my $line (split /\n/, $content) {
        $line =~ s/^\s+|\s+$//g;
        next if $line =~ /^#/ || $line eq '';
        
        if ($line =~ /^(\w+):$/) {
            $current_section = $1;
        } elsif ($line =~ /^\s*(\w+):\s*(.+)$/) {
            $config{$current_section}{$1} = $2;
        }
    }
    
    return \%config;
}

sub log_message {
    my ($message) = @_;
    my $timestamp = strftime("%Y-%m-%d %H:%M:%S", localtime);
    print "[$timestamp] $message\n";
}

__END__

=head1 NAME

smart-collector-daemon.pl - autoSMART SMART Data Collection Daemon

=head1 SYNOPSIS

smart-collector-daemon.pl --config <config_file> [--debug] [--foreground]

=head1 DESCRIPTION

Automated daemon for collecting SMART data from storage devices and storing
in PostgreSQL database with differential storage optimization.

=head1 OPTIONS

=over 4

=item --config <file>

Configuration file path (required)

=item --debug

Enable debug logging

=item --foreground

Run in foreground (don't daemonize)

=back

=head1 AUTHOR

autoSMART v1.0 - Hardware-based HDD tracking system

=cut