1 contributor
package SmartCollector;
use strict;
use warnings;
use DBI;
use JSON::XS;
use Time::HiRes qw(time);
use File::Slurp;
use Config::Simple;
use Digest::SHA qw(sha256_hex);
=head1 NAME
SmartCollector - SMART data collection module for autoSMART
=head1 DESCRIPTION
This module handles the collection of SMART data from HDDs identified in Madagascar inventory,
processes the data, and stores it in PostgreSQL for long-term analysis and AI predictions.
=head1 SYNOPSIS
use SmartCollector;
my $collector = SmartCollector->new(
config_file => '/path/to/smart.conf',
db_config => '/path/to/database.conf'
);
# Collect data from all monitored drives
$collector->collect_all();
# Collect data from specific drive
$collector->collect_drive('/dev/sda');
=cut
sub new {
my ($class, %args) = @_;
my $self = {
cluster_config => $args{cluster_config} || '/etc/pve/autoSMART/cluster.conf',
local_config => $args{local_config} || '/etc/default/autosmart',
debug => $args{debug} || 0,
node_id => $args{node_id} || `hostname`,
smart_params => {},
db_handle => undef,
local_settings => {},
};
chomp $self->{node_id};
bless $self, $class;
$self->_load_local_config();
$self->_load_cluster_config();
$self->_connect_database();
return $self;
}
=head2 _load_local_config
Load local node-specific configuration from /etc/default/autosmart
=cut
sub _load_local_config {
my $self = shift;
return unless -f $self->{local_config};
open my $fh, '<', $self->{local_config}
or die "Cannot read local config: $self->{local_config}: $!";
while (my $line = <$fh>) {
chomp $line;
next if $line =~ /^\s*#/ || $line =~ /^\s*$/;
if ($line =~ /^(\w+)=(.+)$/) {
my ($key, $value) = ($1, $2);
$value =~ s/^["']|["']$//g; # Remove quotes
$self->{local_settings}->{$key} = $value;
}
}
close $fh;
# Apply debug settings
if ($self->{local_settings}->{AUTOSMART_DEBUG_ENABLED} eq 'true') {
$self->{debug} = $self->{local_settings}->{AUTOSMART_DEBUG_LEVEL} || 1;
}
$self->_log("Loaded local configuration from $self->{local_config}");
}
=head2 _load_cluster_config
Load cluster-wide configuration from Proxmox shared storage
=cut
sub _load_cluster_config {
my $self = shift;
unless (-f $self->{cluster_config}) {
die "Cluster configuration not found: $self->{cluster_config}";
}
my $cfg = Config::Simple->new($self->{cluster_config})
or die "Cannot load cluster config file: $self->{cluster_config}";
# Load monitoring settings
$self->{collection_interval} = $cfg->param('cluster.collection_interval')
|| $self->{local_settings}->{AUTOSMART_COLLECTION_INTERVAL} || 300;
$self->{collection_timeout} = $cfg->param('cluster.collection_timeout')
|| $self->{local_settings}->{AUTOSMART_COLLECTION_TIMEOUT} || 30;
$self->{madagascar_inventory} = $cfg->param('madagascar.inventory_path');
# Load cluster information
$self->{cluster_name} = $cfg->param('cluster.cluster_name');
$self->{cluster_nodes} = [split /,/, ($cfg->param('cluster.nodes') || '')];
# Load SMART parameters from cluster config
my @param_keys = $cfg->param(-block => 'smart_parameters');
foreach my $key (@param_keys) {
my $value = $cfg->param("smart_parameters.$key");
my ($threshold, $weight, $enabled, $description) = split /,/, $value, 4;
$self->{smart_params}->{$key} = {
threshold => $threshold,
weight => $weight,
enabled => ($enabled eq 'true'),
description => $description,
} if $enabled eq 'true';
}
$self->_log("Loaded cluster configuration: $self->{cluster_name} (" .
keys(%{$self->{smart_params}}) . " SMART parameters)");
}
=head2 _connect_database
Establish PostgreSQL database connection using cluster configuration
=cut
sub _connect_database {
my $self = shift;
my $cfg = Config::Simple->new($self->{cluster_config})
or die "Cannot load cluster config for database: $self->{cluster_config}";
my $dsn = sprintf("DBI:Pg:database=%s;host=%s;port=%s",
$cfg->param('database.database'),
$cfg->param('database.host'),
$cfg->param('database.port')
);
my $timeout = $cfg->param('database.connection_timeout') || 30;
$self->{db_handle} = DBI->connect(
$dsn,
$cfg->param('database.username'),
$cfg->param('database.password'),
{
RaiseError => 1,
AutoCommit => 1,
pg_enable_utf8 => 1,
connect_timeout => $timeout,
}
) or die "Database connection failed: $DBI::errstr";
# Register this node in the cluster
$self->_register_node();
$self->_log("Database connection established to cluster database");
}
=head2 get_madagascar_drives
Get list of HDDs from Madagascar inventory (cluster-shared)
=cut
sub get_madagascar_drives {
my $self = shift;
return [] unless -f $self->{madagascar_inventory};
my $inventory_json = read_file($self->{madagascar_inventory});
my $inventory = decode_json($inventory_json);
my @drives = ();
# Extract HDD information from Madagascar inventory
if (ref $inventory eq 'HASH' && exists $inventory->{storage}) {
foreach my $storage (@{$inventory->{storage}}) {
# Only include drives for this node
next unless $storage->{node_id} eq $self->{node_id} || !$storage->{node_id};
next unless $storage->{type} eq 'HDD';
next unless $storage->{device_path};
push @drives, {
device_path => $storage->{device_path},
serial => $storage->{serial},
model => $storage->{model},
size_gb => $storage->{size_gb},
madagascar_id => $storage->{id},
node_id => $self->{node_id},
};
}
}
$self->_log("Found " . @drives . " HDDs for node $self->{node_id} in Madagascar inventory");
return \@drives;
}
=head2 collect_smart_data
Collect SMART data from a specific drive
=cut
sub collect_smart_data {
my ($self, $device_path) = @_;
my $cmd = "smartctl -A -f brief -j '$device_path' 2>/dev/null";
my $output = `$cmd`;
my $exit_code = $? >> 8;
# Parse smartctl JSON output
my $smart_data = {};
eval {
$smart_data = decode_json($output);
};
if ($@) {
$self->_log("Failed to parse SMART data for $device_path: $@");
return undef;
}
return $self->_process_smart_data($smart_data, $device_path);
}
=head2 _process_smart_data
Process and normalize SMART data
=cut
sub _process_smart_data {
my ($self, $raw_data, $device_path) = @_;
my $processed = {
device_path => $device_path,
timestamp => time(),
collection_ok => ($raw_data->{smart_status}->{passed} || 0),
temperature => 0,
parameters => {},
};
# Extract device information
if (exists $raw_data->{device}) {
$processed->{model_name} = $raw_data->{device}->{model_name} || '';
$processed->{serial_number} = $raw_data->{device}->{serial_number} || '';
$processed->{firmware} = $raw_data->{device}->{firmware_version} || '';
}
# Extract temperature
if (exists $raw_data->{temperature}) {
$processed->{temperature} = $raw_data->{temperature}->{current} || 0;
}
# Extract SMART attributes
if (exists $raw_data->{ata_smart_attributes}->{table}) {
foreach my $attr (@{$raw_data->{ata_smart_attributes}->{table}}) {
my $name = $attr->{name};
# Only collect monitored parameters
next unless exists $self->{smart_params}->{$name};
$processed->{parameters}->{$name} = {
id => $attr->{id},
value => $attr->{value},
worst => $attr->{worst},
thresh => $attr->{thresh},
raw_value => $attr->{raw}->{value},
when_failed => $attr->{when_failed} || '',
flags => $attr->{flags}->{string} || '',
};
}
}
return $processed;
}
=head2 store_smart_data
Store processed SMART data using hardware-based tracking with migration detection
=cut
sub store_smart_data {
my ($self, $drive_info, $smart_data) = @_;
eval {
# Detect/handle HDD migration first
my $hdd_id = $self->_detect_or_create_hdd($drive_info, $smart_data);
# SCHEMA v2: Store complete reading (no differential storage)
my $event_id = $self->_insert_collection_event($hdd_id, $drive_info, $smart_data);
if ($event_id) {
$self->_log("Stored SMART data: HDD ID $hdd_id, Event ID $event_id (Serial: $smart_data->{serial_number})", 2);
} else {
$self->_log("Failed to store SMART data for HDD ID $hdd_id", 1);
}
};
if ($@) {
$self->_log("ERROR storing SMART data: $@", 1);
return 0;
}
return 1;
}
=head2 _detect_or_create_hdd
Detect HDD migration or create new HDD record using hardware identifiers
=cut
sub _detect_or_create_hdd {
my ($self, $drive_info, $smart_data) = @_;
my $serial = $smart_data->{serial_number} || 'unknown';
my $model = $smart_data->{model_name} || 'unknown';
my $device_path = $drive_info->{device_path};
# Call PostgreSQL function to detect migration
my $sth = $self->{db_handle}->prepare(q{
SELECT detect_hdd_migration(?, ?, ?, ?, ?, 'collector')
});
$sth->execute(
$serial,
$model,
$device_path,
$self->{node_id},
$drive_info->{slot} || undef
);
my ($hdd_id) = $sth->fetchrow_array();
# If NULL returned, this is a new HDD - create it
if (!defined $hdd_id) {
$hdd_id = $self->_create_new_hdd($drive_info, $smart_data);
$self->_log("New HDD discovered: $serial ($model) at $device_path", 2);
} else {
$self->_log("HDD tracked: ID $hdd_id, Serial $serial", 3);
}
return $hdd_id;
}
=head2 _create_new_hdd
Create new HDD record with hardware-based identification
=cut
sub _create_new_hdd {
my ($self, $drive_info, $smart_data) = @_;
my $sql = q{
INSERT INTO hdd_inventory
(serial_number, model_name, firmware, size_gb, manufacturer,
current_device_path, current_node_id, current_slot,
madagascar_id, first_seen, last_seen, status)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, NOW(), NOW(), 'active')
RETURNING id
};
my $sth = $self->{db_handle}->prepare($sql);
$sth->execute(
$smart_data->{serial_number} || 'unknown',
$smart_data->{model_name} || 'unknown',
$smart_data->{firmware} || '',
$drive_info->{size_gb} || 0,
$self->_extract_manufacturer($smart_data->{model_name}),
$drive_info->{device_path},
$self->{node_id},
$drive_info->{slot} || undef,
$drive_info->{madagascar_id}
);
my ($hdd_id) = $sth->fetchrow_array();
# Create discovery alert
$self->_create_discovery_alert($hdd_id, $drive_info, $smart_data);
return $hdd_id;
}
=head2 _extract_manufacturer
Extract manufacturer from model name
=cut
sub _extract_manufacturer {
my ($self, $model_name) = @_;
return 'Unknown' unless $model_name;
# Common HDD manufacturer patterns
my %manufacturers = (
qr/^WD|Western\s*Digital/i => 'Western Digital',
qr/^ST|Seagate/i => 'Seagate',
qr/^HGST|Hitachi/i => 'HGST/Hitachi',
qr/^TOSHIBA/i => 'Toshiba',
qr/^Samsung/i => 'Samsung',
qr/^Maxtor/i => 'Maxtor',
qr/^Fujitsu/i => 'Fujitsu',
);
foreach my $pattern (keys %manufacturers) {
return $manufacturers{$pattern} if $model_name =~ /$pattern/;
}
# Extract first word as fallback
if ($model_name =~ /^(\w+)/) {
return $1;
}
return 'Unknown';
}
=head2 _create_discovery_alert
Create alert for new HDD discovery
=cut
sub _create_discovery_alert {
my ($self, $hdd_id, $drive_info, $smart_data) = @_;
my $sql = q{
INSERT INTO alert_history
(hdd_id, serial_number, device_path, node_id, alert_type, message)
VALUES (?, ?, ?, ?, 'discovery', ?)
};
my $message = sprintf(
"New HDD discovered: %s (%s) at %s on node %s - Size: %s GB",
$smart_data->{serial_number} || 'unknown',
$smart_data->{model_name} || 'unknown',
$drive_info->{device_path},
$self->{node_id},
$drive_info->{size_gb} || '?'
);
$self->{db_handle}->do($sql, undef,
$hdd_id,
$smart_data->{serial_number},
$drive_info->{device_path},
$self->{node_id},
$message
);
}
=head2 _should_store_reading
Check if SMART reading should be stored using differential storage logic
=cut
sub _should_store_reading {
my ($self, $hdd_id, $smart_data) = @_;
# Generate checksum of SMART parameters
my $parameters_json = encode_json($smart_data->{parameters});
my $checksum = sha256_hex($parameters_json . ($smart_data->{temperature} || ''));
# Call PostgreSQL function to determine if we should store this reading
my $sth = $self->{db_handle}->prepare(q{
SELECT should_store_smart_reading(?, ?, ?, NOW())
});
$sth->execute($hdd_id, $parameters_json, $checksum);
my $result = $sth->fetchrow_hashref();
return {
store => $result->{should_store},
type => $result->{reading_type},
changes_detected => $result->{changes_detected},
changed_parameters => $result->{changed_parameters},
previous_reading_id => $result->{previous_reading_id},
checksum => $checksum
};
}
=head2 _insert_smart_reading_differential
Insert SMART reading with differential storage information
=cut
sub _insert_smart_reading_differential {
my ($self, $hdd_id, $drive_info, $smart_data, $storage_info) = @_;
my $sql = q{
INSERT INTO smart_readings
(hdd_id, serial_number, device_path, node_id, timestamp,
collection_ok, temperature, parameters_json, reading_type,
changes_detected, changed_parameters, previous_reading_id, checksum)
VALUES (?, ?, ?, ?, to_timestamp(?), ?, ?, ?, ?, ?, ?, ?, ?)
};
# For differential readings, only store changed parameters
my $parameters_to_store;
if ($storage_info->{type} eq 'differential' && $storage_info->{changed_parameters}) {
# Extract only changed parameters
my $changed_params = decode_json($storage_info->{changed_parameters});
my $all_params = $smart_data->{parameters};
$parameters_to_store = {};
for my $param_name (@$changed_params) {
$parameters_to_store->{$param_name} = $all_params->{$param_name};
}
} else {
# Store all parameters for baseline/full readings
$parameters_to_store = $smart_data->{parameters};
}
my $parameters_json = encode_json($parameters_to_store);
$self->{db_handle}->do($sql,
undef,
$hdd_id,
$smart_data->{serial_number},
$drive_info->{device_path},
$self->{node_id},
$smart_data->{timestamp},
$smart_data->{collection_ok},
$smart_data->{temperature},
$parameters_json,
$storage_info->{type},
$storage_info->{changes_detected} ? 'true' : 'false',
$storage_info->{changed_parameters},
$storage_info->{previous_reading_id},
$storage_info->{checksum}
);
}
=head2 _insert_smart_reading
Insert SMART reading linked to hardware ID (legacy method for compatibility)
=cut
sub _insert_smart_reading {
my ($self, $hdd_id, $drive_info, $smart_data) = @_;
my $sql = q{
INSERT INTO smart_readings
(hdd_id, serial_number, device_path, node_id, timestamp,
collection_ok, temperature, parameters_json)
VALUES (?, ?, ?, ?, to_timestamp(?), ?, ?, ?)
};
my $parameters_json = encode_json($smart_data->{parameters});
$self->{db_handle}->do($sql,
undef,
$hdd_id,
$smart_data->{serial_number},
$drive_info->{device_path},
$self->{node_id},
$smart_data->{timestamp},
$smart_data->{collection_ok},
$smart_data->{temperature},
$parameters_json
);
}
=head2 collect_all
Collect SMART data from all drives in Madagascar inventory
=cut
sub collect_all {
my $self = shift;
my $drives = $self->get_madagascar_drives();
my $successful = 0;
my $failed = 0;
my $storage_stats = {
baseline => 0,
full => 0,
differential => 0,
skipped => 0
};
foreach my $drive (@$drives) {
my $smart_data = $self->collect_smart_data($drive->{device_path});
if ($smart_data && $self->store_smart_data($drive, $smart_data)) {
$successful++;
} else {
$failed++;
$self->_log("Failed to collect/store data for $drive->{device_path}");
}
# Small delay between drives to avoid overwhelming system
select(undef, undef, undef, 0.1);
}
# Get storage statistics for this collection run
my $stats = $self->_get_recent_storage_stats();
$self->_log("Collection complete: $successful successful, $failed failed");
$self->_log("Storage efficiency - Baseline: $stats->{baseline}, Full: $stats->{full}, Differential: $stats->{differential}, Skipped: $stats->{skipped}");
return {
successful => $successful,
failed => $failed,
total => scalar(@$drives),
storage_stats => $stats
};
}
=head2 _get_recent_storage_stats
Get statistics about storage efficiency from recent readings
=cut
sub _get_recent_storage_stats {
my $self = shift;
my $sql = q{
SELECT
reading_type,
COUNT(*) as count
FROM smart_readings
WHERE timestamp > NOW() - INTERVAL '1 hour'
GROUP BY reading_type
ORDER BY reading_type
};
my $sth = $self->{db_handle}->prepare($sql);
$sth->execute();
my $stats = {
baseline => 0,
full => 0,
differential => 0,
total => 0
};
while (my $row = $sth->fetchrow_hashref()) {
$stats->{$row->{reading_type}} = $row->{count};
$stats->{total} += $row->{count};
}
# Calculate efficiency percentage
my $efficient_readings = $stats->{differential} + $stats->{baseline};
my $efficiency_pct = $stats->{total} > 0 ?
sprintf("%.1f", ($efficient_readings / $stats->{total}) * 100) : 0;
$stats->{efficiency_percent} = $efficiency_pct;
return $stats;
}
=head2 _log
Internal logging method with enhanced debug levels
=cut
sub _log {
my ($self, $message, $level) = @_;
$level ||= 1; # Default to basic level
# Check if we should log based on debug level
return unless $self->{debug} >= $level;
my $timestamp = scalar(localtime());
my $node_id = $self->{node_id} || 'unknown';
my $prefix = "[$timestamp] [$node_id] SmartCollector";
if ($self->{debug}) {
print "$prefix: $message\n";
}
# Also log to syslog if enabled
if ($self->{local_settings}->{AUTOSMART_LOG_SYSLOG} eq 'true') {
eval {
use Sys::Syslog qw(:standard :macros);
my $facility = $self->{local_settings}->{AUTOSMART_LOG_FACILITY} || 'daemon';
openlog('autosmart', 'pid,ndelay', $facility);
syslog(LOG_INFO, "SmartCollector[$node_id]: $message");
closelog();
};
}
# Log to file if specified
my $log_file = $self->{local_settings}->{AUTOSMART_DEBUG_LOG_FILE};
if ($log_file && $self->{debug} >= 2) {
eval {
open my $fh, '>>', $log_file;
print $fh "$prefix: $message\n";
close $fh;
};
}
}
=head2 _register_node
Register this node in the cluster database
=cut
sub _register_node {
my $self = shift;
eval {
# Create cluster_nodes table if it doesn't exist
$self->{db_handle}->do(q{
CREATE TABLE IF NOT EXISTS cluster_nodes (
node_id VARCHAR(100) PRIMARY KEY,
hostname VARCHAR(255),
ip_address INET,
last_seen TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
status VARCHAR(20) DEFAULT 'active',
version VARCHAR(50),
capabilities JSON,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
)
});
# Register/update this node
my $hostname = `hostname -f`;
chomp $hostname;
my $ip = `hostname -I | awk '{print \$1}'`;
chomp $ip;
$self->{db_handle}->do(q{
INSERT INTO cluster_nodes
(node_id, hostname, ip_address, last_seen, status, version)
VALUES (?, ?, ?, NOW(), 'active', '1.0')
ON CONFLICT (node_id)
DO UPDATE SET
hostname = EXCLUDED.hostname,
ip_address = EXCLUDED.ip_address,
last_seen = NOW(),
status = 'active'
}, undef, $self->{node_id}, $hostname, $ip);
$self->_log("Registered node $self->{node_id} in cluster", 2);
};
if ($@) {
$self->_log("Warning: Failed to register node: $@", 1);
}
}
=head2 _insert_collection_event (SCHEMA v2)
SCHEMA v2: Insert complete SMART reading (no differential storage)
Calls PostgreSQL function: insert_collection_event()
=cut
sub _insert_collection_event {
my ($self, $hdd_id, $drive_info, $smart_data) = @_;
# Build parameters JSON for database function
my $params_json = encode_json($smart_data->{parameters} || {});
my $checksum = sha256_hex($params_json . ($smart_data->{temperature} || ''));
# Call PostgreSQL function: insert_collection_event(hdd_id, serial, node, ts, temp, ok, checksum, params::JSONB)
my $sth = $self->{db_handle}->prepare(q{
SELECT insert_collection_event(?, ?, ?, NOW(), ?, ?, ?, ?::jsonb)
});
eval {
$sth->execute(
$hdd_id,
$smart_data->{serial_number},
$self->{node_id},
$smart_data->{temperature} || undef,
1, # collection_ok = true
$checksum,
$params_json
);
};
if ($@) {
$self->_log("ERROR inserting collection event: $@", 1);
return undef;
}
my ($event_id) = $sth->fetchrow_array();
return $event_id;
}
=head2 DESTROY
Cleanup database connection
=cut
sub DESTROY {
my $self = shift;
$self->{db_handle}->disconnect() if $self->{db_handle};
}
1;
__END__
=head1 AUTHOR
AutoSMART Development Team
=head1 LICENSE
This software is part of the autoSMART project.
=cut