f16725e 3 months ago History
1 contributor
802 lines | 22.864kb
package SmartCollector;

use strict;
use warnings;
use DBI;
use JSON::XS;
use Time::HiRes qw(time);
use File::Slurp;
use Config::Simple;
use Digest::SHA qw(sha256_hex);

=head1 NAME

SmartCollector - SMART data collection module for autoSMART

=head1 DESCRIPTION

This module handles the collection of SMART data from HDDs identified in Madagascar inventory,
processes the data, and stores it in PostgreSQL for long-term analysis and AI predictions.

=head1 SYNOPSIS

    use SmartCollector;
    
    my $collector = SmartCollector->new(
        config_file => '/path/to/smart.conf',
        db_config   => '/path/to/database.conf'
    );
    
    # Collect data from all monitored drives
    $collector->collect_all();
    
    # Collect data from specific drive
    $collector->collect_drive('/dev/sda');

=cut

sub new {
    my ($class, %args) = @_;
    
    my $self = {
        cluster_config => $args{cluster_config} || '/etc/pve/autoSMART/cluster.conf',
        local_config   => $args{local_config} || '/etc/default/autosmart',
        debug          => $args{debug} || 0,
        node_id        => $args{node_id} || `hostname`,
        smart_params   => {},
        db_handle      => undef,
        local_settings => {},
    };
    
    chomp $self->{node_id};
    
    bless $self, $class;
    $self->_load_local_config();
    $self->_load_cluster_config();
    $self->_connect_database();
    
    return $self;
}

=head2 _load_local_config

Load local node-specific configuration from /etc/default/autosmart

=cut

sub _load_local_config {
    my $self = shift;
    
    return unless -f $self->{local_config};
    
    open my $fh, '<', $self->{local_config} 
        or die "Cannot read local config: $self->{local_config}: $!";
    
    while (my $line = <$fh>) {
        chomp $line;
        next if $line =~ /^\s*#/ || $line =~ /^\s*$/;
        
        if ($line =~ /^(\w+)=(.+)$/) {
            my ($key, $value) = ($1, $2);
            $value =~ s/^["']|["']$//g;  # Remove quotes
            $self->{local_settings}->{$key} = $value;
        }
    }
    
    close $fh;
    
    # Apply debug settings
    if ($self->{local_settings}->{AUTOSMART_DEBUG_ENABLED} eq 'true') {
        $self->{debug} = $self->{local_settings}->{AUTOSMART_DEBUG_LEVEL} || 1;
    }
    
    $self->_log("Loaded local configuration from $self->{local_config}");
}

=head2 _load_cluster_config

Load cluster-wide configuration from Proxmox shared storage

=cut

sub _load_cluster_config {
    my $self = shift;
    
    unless (-f $self->{cluster_config}) {
        die "Cluster configuration not found: $self->{cluster_config}";
    }
    
    my $cfg = Config::Simple->new($self->{cluster_config})
        or die "Cannot load cluster config file: $self->{cluster_config}";
    
    # Load monitoring settings
    $self->{collection_interval} = $cfg->param('cluster.collection_interval') 
        || $self->{local_settings}->{AUTOSMART_COLLECTION_INTERVAL} || 300;
    $self->{collection_timeout} = $cfg->param('cluster.collection_timeout') 
        || $self->{local_settings}->{AUTOSMART_COLLECTION_TIMEOUT} || 30;
    $self->{madagascar_inventory} = $cfg->param('madagascar.inventory_path');
    
    # Load cluster information
    $self->{cluster_name} = $cfg->param('cluster.cluster_name');
    $self->{cluster_nodes} = [split /,/, ($cfg->param('cluster.nodes') || '')];
    
    # Load SMART parameters from cluster config
    my @param_keys = $cfg->param(-block => 'smart_parameters');
    
    foreach my $key (@param_keys) {
        my $value = $cfg->param("smart_parameters.$key");
        my ($threshold, $weight, $enabled, $description) = split /,/, $value, 4;
        
        $self->{smart_params}->{$key} = {
            threshold   => $threshold,
            weight      => $weight,
            enabled     => ($enabled eq 'true'),
            description => $description,
        } if $enabled eq 'true';
    }
    
    $self->_log("Loaded cluster configuration: $self->{cluster_name} (" . 
                keys(%{$self->{smart_params}}) . " SMART parameters)");
}

=head2 _connect_database

Establish PostgreSQL database connection using cluster configuration

=cut

sub _connect_database {
    my $self = shift;
    
    my $cfg = Config::Simple->new($self->{cluster_config})
        or die "Cannot load cluster config for database: $self->{cluster_config}";
    
    my $dsn = sprintf("DBI:Pg:database=%s;host=%s;port=%s",
        $cfg->param('database.database'),
        $cfg->param('database.host'),
        $cfg->param('database.port')
    );
    
    my $timeout = $cfg->param('database.connection_timeout') || 30;
    
    $self->{db_handle} = DBI->connect(
        $dsn,
        $cfg->param('database.username'),
        $cfg->param('database.password'),
        { 
            RaiseError => 1, 
            AutoCommit => 1,
            pg_enable_utf8 => 1,
            connect_timeout => $timeout,
        }
    ) or die "Database connection failed: $DBI::errstr";
    
    # Register this node in the cluster
    $self->_register_node();
    
    $self->_log("Database connection established to cluster database");
}

=head2 get_madagascar_drives

Get list of HDDs from Madagascar inventory (cluster-shared)

=cut

sub get_madagascar_drives {
    my $self = shift;
    
    return [] unless -f $self->{madagascar_inventory};
    
    my $inventory_json = read_file($self->{madagascar_inventory});
    my $inventory = decode_json($inventory_json);
    
    my @drives = ();
    
    # Extract HDD information from Madagascar inventory
    if (ref $inventory eq 'HASH' && exists $inventory->{storage}) {
        foreach my $storage (@{$inventory->{storage}}) {
            # Only include drives for this node
            next unless $storage->{node_id} eq $self->{node_id} || !$storage->{node_id};
            next unless $storage->{type} eq 'HDD';
            next unless $storage->{device_path};
            
            push @drives, {
                device_path => $storage->{device_path},
                serial      => $storage->{serial},
                model       => $storage->{model},
                size_gb     => $storage->{size_gb},
                madagascar_id => $storage->{id},
                node_id     => $self->{node_id},
            };
        }
    }
    
    $self->_log("Found " . @drives . " HDDs for node $self->{node_id} in Madagascar inventory");
    return \@drives;
}

=head2 collect_smart_data

Collect SMART data from a specific drive

=cut

sub collect_smart_data {
    my ($self, $device_path) = @_;
    
    my $cmd = "smartctl -A -f brief -j '$device_path' 2>/dev/null";
    my $output = `$cmd`;
    my $exit_code = $? >> 8;
    
    # Parse smartctl JSON output
    my $smart_data = {};
    
    eval {
        $smart_data = decode_json($output);
    };
    
    if ($@) {
        $self->_log("Failed to parse SMART data for $device_path: $@");
        return undef;
    }
    
    return $self->_process_smart_data($smart_data, $device_path);
}

=head2 _process_smart_data

Process and normalize SMART data

=cut

sub _process_smart_data {
    my ($self, $raw_data, $device_path) = @_;
    
    my $processed = {
        device_path    => $device_path,
        timestamp      => time(),
        collection_ok  => ($raw_data->{smart_status}->{passed} || 0),
        temperature    => 0,
        parameters     => {},
    };
    
    # Extract device information
    if (exists $raw_data->{device}) {
        $processed->{model_name}   = $raw_data->{device}->{model_name} || '';
        $processed->{serial_number} = $raw_data->{device}->{serial_number} || '';
        $processed->{firmware}     = $raw_data->{device}->{firmware_version} || '';
    }
    
    # Extract temperature
    if (exists $raw_data->{temperature}) {
        $processed->{temperature} = $raw_data->{temperature}->{current} || 0;
    }
    
    # Extract SMART attributes
    if (exists $raw_data->{ata_smart_attributes}->{table}) {
        foreach my $attr (@{$raw_data->{ata_smart_attributes}->{table}}) {
            my $name = $attr->{name};
            
            # Only collect monitored parameters
            next unless exists $self->{smart_params}->{$name};
            
            $processed->{parameters}->{$name} = {
                id          => $attr->{id},
                value       => $attr->{value},
                worst       => $attr->{worst},
                thresh      => $attr->{thresh},
                raw_value   => $attr->{raw}->{value},
                when_failed => $attr->{when_failed} || '',
                flags       => $attr->{flags}->{string} || '',
            };
        }
    }
    
    return $processed;
}

=head2 store_smart_data

Store processed SMART data using hardware-based tracking with migration detection

=cut

sub store_smart_data {
    my ($self, $drive_info, $smart_data) = @_;
    
    eval {
        # Detect/handle HDD migration first
        my $hdd_id = $self->_detect_or_create_hdd($drive_info, $smart_data);
        
        # Check if we should store this reading using differential storage
        my $should_store = $self->_should_store_reading($hdd_id, $smart_data);
        
        if ($should_store->{store}) {
            # Insert SMART reading with differential storage information
            $self->_insert_smart_reading_differential($hdd_id, $drive_info, $smart_data, $should_store);
            
            $self->_log("Stored SMART data for HDD ID $hdd_id (Serial: $smart_data->{serial_number}, Type: $should_store->{type})", 2);
        } else {
            $self->_log("Skipped unchanged SMART data for HDD ID $hdd_id (Serial: $smart_data->{serial_number})", 3);
        }
    };
    
    if ($@) {
        $self->_log("ERROR storing SMART data: $@", 1);
        return 0;
    }
    
    return 1;
}

=head2 _detect_or_create_hdd

Detect HDD migration or create new HDD record using hardware identifiers

=cut

sub _detect_or_create_hdd {
    my ($self, $drive_info, $smart_data) = @_;
    
    my $serial = $smart_data->{serial_number} || 'unknown';
    my $model = $smart_data->{model_name} || 'unknown';
    my $device_path = $drive_info->{device_path};
    
    # Call PostgreSQL function to detect migration
    my $sth = $self->{db_handle}->prepare(q{
        SELECT detect_hdd_migration(?, ?, ?, ?, ?, 'collector')
    });
    
    $sth->execute(
        $serial,
        $model, 
        $device_path,
        $self->{node_id},
        $drive_info->{slot} || undef
    );
    
    my ($hdd_id) = $sth->fetchrow_array();
    
    # If NULL returned, this is a new HDD - create it
    if (!defined $hdd_id) {
        $hdd_id = $self->_create_new_hdd($drive_info, $smart_data);
        $self->_log("New HDD discovered: $serial ($model) at $device_path", 2);
    } else {
        $self->_log("HDD tracked: ID $hdd_id, Serial $serial", 3);
    }
    
    return $hdd_id;
}

=head2 _create_new_hdd

Create new HDD record with hardware-based identification

=cut

sub _create_new_hdd {
    my ($self, $drive_info, $smart_data) = @_;
    
    my $sql = q{
        INSERT INTO hdd_inventory 
        (serial_number, model_name, firmware, size_gb, manufacturer,
         current_device_path, current_node_id, current_slot,
         madagascar_id, first_seen, last_seen, status)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, NOW(), NOW(), 'active')
        RETURNING id
    };
    
    my $sth = $self->{db_handle}->prepare($sql);
    $sth->execute(
        $smart_data->{serial_number} || 'unknown',
        $smart_data->{model_name} || 'unknown',
        $smart_data->{firmware} || '',
        $drive_info->{size_gb} || 0,
        $self->_extract_manufacturer($smart_data->{model_name}),
        $drive_info->{device_path},
        $self->{node_id},
        $drive_info->{slot} || undef,
        $drive_info->{madagascar_id}
    );
    
    my ($hdd_id) = $sth->fetchrow_array();
    
    # Create discovery alert
    $self->_create_discovery_alert($hdd_id, $drive_info, $smart_data);
    
    return $hdd_id;
}

=head2 _extract_manufacturer

Extract manufacturer from model name

=cut

sub _extract_manufacturer {
    my ($self, $model_name) = @_;
    
    return 'Unknown' unless $model_name;
    
    # Common HDD manufacturer patterns
    my %manufacturers = (
        qr/^WD|Western\s*Digital/i => 'Western Digital',
        qr/^ST|Seagate/i           => 'Seagate',
        qr/^HGST|Hitachi/i         => 'HGST/Hitachi', 
        qr/^TOSHIBA/i              => 'Toshiba',
        qr/^Samsung/i              => 'Samsung',
        qr/^Maxtor/i               => 'Maxtor',
        qr/^Fujitsu/i              => 'Fujitsu',
    );
    
    foreach my $pattern (keys %manufacturers) {
        return $manufacturers{$pattern} if $model_name =~ /$pattern/;
    }
    
    # Extract first word as fallback
    if ($model_name =~ /^(\w+)/) {
        return $1;
    }
    
    return 'Unknown';
}

=head2 _create_discovery_alert

Create alert for new HDD discovery

=cut

sub _create_discovery_alert {
    my ($self, $hdd_id, $drive_info, $smart_data) = @_;
    
    my $sql = q{
        INSERT INTO alert_history 
        (hdd_id, serial_number, device_path, node_id, alert_type, message)
        VALUES (?, ?, ?, ?, 'discovery', ?)
    };
    
    my $message = sprintf(
        "New HDD discovered: %s (%s) at %s on node %s - Size: %s GB",
        $smart_data->{serial_number} || 'unknown',
        $smart_data->{model_name} || 'unknown',
        $drive_info->{device_path},
        $self->{node_id},
        $drive_info->{size_gb} || '?'
    );
    
    $self->{db_handle}->do($sql, undef,
        $hdd_id,
        $smart_data->{serial_number},
        $drive_info->{device_path},
        $self->{node_id},
        $message
    );
}

=head2 _should_store_reading

Check if SMART reading should be stored using differential storage logic

=cut

sub _should_store_reading {
    my ($self, $hdd_id, $smart_data) = @_;
    
    # Generate checksum of SMART parameters
    my $parameters_json = encode_json($smart_data->{parameters});
    my $checksum = sha256_hex($parameters_json . ($smart_data->{temperature} || ''));
    
    # Call PostgreSQL function to determine if we should store this reading
    my $sth = $self->{db_handle}->prepare(q{
        SELECT should_store_smart_reading(?, ?, ?, NOW())
    });
    
    $sth->execute($hdd_id, $parameters_json, $checksum);
    
    my $result = $sth->fetchrow_hashref();
    
    return {
        store => $result->{should_store},
        type => $result->{reading_type},
        changes_detected => $result->{changes_detected},
        changed_parameters => $result->{changed_parameters},
        previous_reading_id => $result->{previous_reading_id},
        checksum => $checksum
    };
}

=head2 _insert_smart_reading_differential

Insert SMART reading with differential storage information

=cut

sub _insert_smart_reading_differential {
    my ($self, $hdd_id, $drive_info, $smart_data, $storage_info) = @_;
    
    my $sql = q{
        INSERT INTO smart_readings
        (hdd_id, serial_number, device_path, node_id, timestamp, 
         collection_ok, temperature, parameters_json, reading_type,
         changes_detected, changed_parameters, previous_reading_id, checksum)
        VALUES (?, ?, ?, ?, to_timestamp(?), ?, ?, ?, ?, ?, ?, ?, ?)
    };
    
    # For differential readings, only store changed parameters
    my $parameters_to_store;
    if ($storage_info->{type} eq 'differential' && $storage_info->{changed_parameters}) {
        # Extract only changed parameters
        my $changed_params = decode_json($storage_info->{changed_parameters});
        my $all_params = $smart_data->{parameters};
        $parameters_to_store = {};
        
        for my $param_name (@$changed_params) {
            $parameters_to_store->{$param_name} = $all_params->{$param_name};
        }
    } else {
        # Store all parameters for baseline/full readings
        $parameters_to_store = $smart_data->{parameters};
    }
    
    my $parameters_json = encode_json($parameters_to_store);
    
    $self->{db_handle}->do($sql,
        undef,
        $hdd_id,
        $smart_data->{serial_number},
        $drive_info->{device_path},
        $self->{node_id},
        $smart_data->{timestamp},
        $smart_data->{collection_ok},
        $smart_data->{temperature},
        $parameters_json,
        $storage_info->{type},
        $storage_info->{changes_detected} ? 'true' : 'false',
        $storage_info->{changed_parameters},
        $storage_info->{previous_reading_id},
        $storage_info->{checksum}
    );
}

=head2 _insert_smart_reading

Insert SMART reading linked to hardware ID (legacy method for compatibility)

=cut

sub _insert_smart_reading {
    my ($self, $hdd_id, $drive_info, $smart_data) = @_;
    
    my $sql = q{
        INSERT INTO smart_readings
        (hdd_id, serial_number, device_path, node_id, timestamp, 
         collection_ok, temperature, parameters_json)
        VALUES (?, ?, ?, ?, to_timestamp(?), ?, ?, ?)
    };
    
    my $parameters_json = encode_json($smart_data->{parameters});
    
    $self->{db_handle}->do($sql,
        undef,
        $hdd_id,
        $smart_data->{serial_number},
        $drive_info->{device_path},
        $self->{node_id},
        $smart_data->{timestamp},
        $smart_data->{collection_ok},
        $smart_data->{temperature},
        $parameters_json
    );
}

=head2 collect_all

Collect SMART data from all drives in Madagascar inventory

=cut

sub collect_all {
    my $self = shift;
    
    my $drives = $self->get_madagascar_drives();
    my $successful = 0;
    my $failed = 0;
    my $storage_stats = {
        baseline => 0,
        full => 0,
        differential => 0,
        skipped => 0
    };
    
    foreach my $drive (@$drives) {
        my $smart_data = $self->collect_smart_data($drive->{device_path});
        
        if ($smart_data && $self->store_smart_data($drive, $smart_data)) {
            $successful++;
        } else {
            $failed++;
            $self->_log("Failed to collect/store data for $drive->{device_path}");
        }
        
        # Small delay between drives to avoid overwhelming system
        select(undef, undef, undef, 0.1);
    }
    
    # Get storage statistics for this collection run
    my $stats = $self->_get_recent_storage_stats();
    $self->_log("Collection complete: $successful successful, $failed failed");
    $self->_log("Storage efficiency - Baseline: $stats->{baseline}, Full: $stats->{full}, Differential: $stats->{differential}, Skipped: $stats->{skipped}");
    
    return { 
        successful => $successful, 
        failed => $failed, 
        total => scalar(@$drives),
        storage_stats => $stats
    };
}

=head2 _get_recent_storage_stats

Get statistics about storage efficiency from recent readings

=cut

sub _get_recent_storage_stats {
    my $self = shift;
    
    my $sql = q{
        SELECT 
            reading_type,
            COUNT(*) as count
        FROM smart_readings 
        WHERE timestamp > NOW() - INTERVAL '1 hour'
        GROUP BY reading_type
        ORDER BY reading_type
    };
    
    my $sth = $self->{db_handle}->prepare($sql);
    $sth->execute();
    
    my $stats = {
        baseline => 0,
        full => 0,
        differential => 0,
        total => 0
    };
    
    while (my $row = $sth->fetchrow_hashref()) {
        $stats->{$row->{reading_type}} = $row->{count};
        $stats->{total} += $row->{count};
    }
    
    # Calculate efficiency percentage
    my $efficient_readings = $stats->{differential} + $stats->{baseline};
    my $efficiency_pct = $stats->{total} > 0 ? 
        sprintf("%.1f", ($efficient_readings / $stats->{total}) * 100) : 0;
    
    $stats->{efficiency_percent} = $efficiency_pct;
    
    return $stats;
}

=head2 _log

Internal logging method with enhanced debug levels

=cut

sub _log {
    my ($self, $message, $level) = @_;
    
    $level ||= 1;  # Default to basic level
    
    # Check if we should log based on debug level
    return unless $self->{debug} >= $level;
    
    my $timestamp = scalar(localtime());
    my $node_id = $self->{node_id} || 'unknown';
    my $prefix = "[$timestamp] [$node_id] SmartCollector";
    
    if ($self->{debug}) {
        print "$prefix: $message\n";
    }
    
    # Also log to syslog if enabled
    if ($self->{local_settings}->{AUTOSMART_LOG_SYSLOG} eq 'true') {
        eval {
            use Sys::Syslog qw(:standard :macros);
            my $facility = $self->{local_settings}->{AUTOSMART_LOG_FACILITY} || 'daemon';
            openlog('autosmart', 'pid,ndelay', $facility);
            syslog(LOG_INFO, "SmartCollector[$node_id]: $message");
            closelog();
        };
    }
    
    # Log to file if specified
    my $log_file = $self->{local_settings}->{AUTOSMART_DEBUG_LOG_FILE};
    if ($log_file && $self->{debug} >= 2) {
        eval {
            open my $fh, '>>', $log_file;
            print $fh "$prefix: $message\n";
            close $fh;
        };
    }
}

=head2 _register_node

Register this node in the cluster database

=cut

sub _register_node {
    my $self = shift;
    
    eval {
        # Create cluster_nodes table if it doesn't exist
        $self->{db_handle}->do(q{
            CREATE TABLE IF NOT EXISTS cluster_nodes (
                node_id VARCHAR(100) PRIMARY KEY,
                hostname VARCHAR(255),
                ip_address INET,
                last_seen TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
                status VARCHAR(20) DEFAULT 'active',
                version VARCHAR(50),
                capabilities JSON,
                created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
            )
        });
        
        # Register/update this node
        my $hostname = `hostname -f`;
        chomp $hostname;
        
        my $ip = `hostname -I | awk '{print \$1}'`;
        chomp $ip;
        
        $self->{db_handle}->do(q{
            INSERT INTO cluster_nodes 
            (node_id, hostname, ip_address, last_seen, status, version)
            VALUES (?, ?, ?, NOW(), 'active', '1.0')
            ON CONFLICT (node_id)
            DO UPDATE SET
                hostname = EXCLUDED.hostname,
                ip_address = EXCLUDED.ip_address,
                last_seen = NOW(),
                status = 'active'
        }, undef, $self->{node_id}, $hostname, $ip);
        
        $self->_log("Registered node $self->{node_id} in cluster", 2);
    };
    
    if ($@) {
        $self->_log("Warning: Failed to register node: $@", 1);
    }
}

=head2 DESTROY

Cleanup database connection

=cut

sub DESTROY {
    my $self = shift;
    $self->{db_handle}->disconnect() if $self->{db_handle};
}

1;

__END__

=head1 AUTHOR

AutoSMART Development Team

=head1 LICENSE

This software is part of the autoSMART project.

=cut