|
Bogdan Timofte
authored
3 months ago
|
1
|
#!/usr/bin/perl
|
|
|
2
|
use strict;
|
|
|
3
|
use warnings;
|
|
|
4
|
use DBI;
|
|
|
5
|
use JSON;
|
|
|
6
|
use File::Slurp;
|
|
|
7
|
use Getopt::Long;
|
|
|
8
|
use POSIX qw(strftime);
|
|
|
9
|
use Time::HiRes qw(sleep);
|
|
|
10
|
|
|
|
11
|
# autoSMART Collector Daemon
|
|
|
12
|
# Version: 1.0
|
|
|
13
|
# Description: Automated SMART data collection daemon
|
|
|
14
|
|
|
|
15
|
my $config_file;
|
|
|
16
|
my $debug = (defined $ENV{AUTOSMART_DEBUG} && $ENV{AUTOSMART_DEBUG} eq 'true') ? 1 : 0;
|
|
|
17
|
my $foreground = 0;
|
|
|
18
|
|
|
|
19
|
GetOptions(
|
|
|
20
|
'config=s' => \$config_file,
|
|
|
21
|
'debug' => \$debug,
|
|
|
22
|
'foreground' => \$foreground
|
|
|
23
|
) or die "Usage: $0 --config <file> [--debug] [--foreground]\n";
|
|
|
24
|
|
|
|
25
|
if (defined $ENV{AUTOSMART_DEBUG}) {
|
|
|
26
|
if ($ENV{AUTOSMART_DEBUG} eq 'true') {
|
|
|
27
|
$debug = 1;
|
|
|
28
|
log_message("AUTOSMART_DEBUG enabled via /etc/default/autonas or environment");
|
|
|
29
|
} else {
|
|
|
30
|
$debug = 0;
|
|
|
31
|
log_message("AUTOSMART_DEBUG disabled via /etc/default/autonas or environment");
|
|
|
32
|
}
|
|
|
33
|
}
|
|
|
34
|
|
|
|
35
|
die "Configuration file required\n" unless $config_file;
|
|
|
36
|
die "Configuration file not found: $config_file\n" unless -f $config_file;
|
|
|
37
|
|
|
|
38
|
# Load configuration
|
|
|
39
|
my $config = load_config($config_file);
|
|
|
40
|
my $node_id = $config->{node}{id} || `hostname -s`;
|
|
|
41
|
chomp $node_id;
|
|
|
42
|
|
|
|
43
|
log_message("Starting autoSMART collector daemon on node: $node_id");
|
|
|
44
|
log_message("Configuration loaded from: $config_file");
|
|
|
45
|
|
|
|
46
|
# Main collection loop
|
|
|
47
|
my $last_full_scan = 0;
|
|
|
48
|
my $scan_interval = $config->{node}{scan_interval} || 300;
|
|
|
49
|
my $full_scan_interval = $config->{collection}{full_scan_interval} || 3600;
|
|
|
50
|
|
|
|
51
|
while (1) {
|
|
|
52
|
eval {
|
|
|
53
|
my $current_time = time();
|
|
|
54
|
my $force_full = ($current_time - $last_full_scan) >= $full_scan_interval;
|
|
|
55
|
|
|
|
56
|
if ($force_full) {
|
|
|
57
|
log_message("Performing full SMART scan (forced)");
|
|
|
58
|
$last_full_scan = $current_time;
|
|
|
59
|
}
|
|
|
60
|
|
|
|
61
|
collect_smart_data($force_full);
|
|
|
62
|
|
|
|
63
|
};
|
|
|
64
|
|
|
|
65
|
if ($@) {
|
|
|
66
|
log_message("ERROR: Collection failed: $@");
|
|
|
67
|
}
|
|
|
68
|
|
|
|
69
|
log_message("Sleeping for $scan_interval seconds...") if $debug;
|
|
|
70
|
sleep($scan_interval);
|
|
|
71
|
}
|
|
|
72
|
|
|
|
73
|
sub collect_smart_data {
|
|
|
74
|
my ($force_full) = @_;
|
|
|
75
|
|
|
|
76
|
log_message("[DEBUG] Starting data collection cycle, force_full=" . ($force_full ? 'true' : 'false')) if $debug;
|
|
|
77
|
|
|
|
78
|
# Connect to database
|
|
|
79
|
my $dsn = "DBI:Pg:host=$config->{database}{host};dbname=$config->{database}{database}";
|
|
|
80
|
log_message("[DEBUG] Connecting to database: $dsn") if $debug;
|
|
|
81
|
|
|
|
82
|
my $dbh = DBI->connect($dsn, $config->{database}{user}, $config->{database}{password},
|
|
|
83
|
{RaiseError => 1, AutoCommit => 1})
|
|
|
84
|
or die "Database connection failed: $DBI::errstr";
|
|
|
85
|
|
|
|
86
|
log_message("✓ Database connected") if $debug;
|
|
|
87
|
|
|
|
88
|
# Test database connectivity
|
|
|
89
|
if ($debug) {
|
|
|
90
|
eval {
|
|
|
91
|
my $sth = $dbh->prepare("SELECT COUNT(*) FROM hdd_inventory");
|
|
|
92
|
$sth->execute();
|
|
|
93
|
my ($count) = $sth->fetchrow_array();
|
|
|
94
|
log_message("[DEBUG] Database test: found $count HDDs in inventory");
|
|
|
95
|
|
|
|
96
|
$sth = $dbh->prepare("SELECT COUNT(*) FROM hdd_presence WHERE is_current = TRUE");
|
|
|
97
|
$sth->execute();
|
|
|
98
|
my ($presence_count) = $sth->fetchrow_array();
|
|
|
99
|
log_message("[DEBUG] Database test: found $presence_count current HDD presence records");
|
|
|
100
|
};
|
|
|
101
|
if ($@) {
|
|
|
102
|
log_message("[DEBUG] Database test failed: $@");
|
|
|
103
|
}
|
|
|
104
|
}
|
|
|
105
|
|
|
|
106
|
# Scan for devices
|
|
|
107
|
my @devices = glob('/dev/sd?');
|
|
|
108
|
push @devices, glob('/dev/nvme?n?');
|
|
|
109
|
|
|
|
110
|
log_message("[DEBUG] Found " . scalar(@devices) . " potential devices: " . join(', ', @devices)) if $debug;
|
|
|
111
|
|
|
|
112
|
foreach my $device (@devices) {
|
|
|
113
|
if (-b $device) {
|
|
|
114
|
log_message("[DEBUG] Processing block device: $device") if $debug;
|
|
|
115
|
} else {
|
|
|
116
|
log_message("[DEBUG] Skipping non-block device: $device") if $debug;
|
|
|
117
|
next;
|
|
|
118
|
}
|
|
|
119
|
|
|
|
120
|
eval {
|
|
|
121
|
process_device($dbh, $device, $force_full);
|
|
|
122
|
};
|
|
|
123
|
|
|
|
124
|
if ($@) {
|
|
|
125
|
log_message("ERROR processing device $device: $@");
|
|
|
126
|
}
|
|
|
127
|
}
|
|
|
128
|
|
|
|
129
|
$dbh->disconnect();
|
|
|
130
|
log_message("Collection cycle complete") if $debug;
|
|
|
131
|
}
|
|
|
132
|
|
|
|
133
|
sub process_device {
|
|
|
134
|
my ($dbh, $device, $force_full) = @_;
|
|
|
135
|
|
|
|
136
|
log_message("[DEBUG] process_device: Processing $device") if $debug;
|
|
|
137
|
|
|
|
138
|
# Get SMART data
|
|
|
139
|
my $smartctl_cmd = "smartctl -A -i -H $device 2>&1";
|
|
|
140
|
log_message("[DEBUG] Running: $smartctl_cmd") if $debug;
|
|
|
141
|
my @smart_output = `$smartctl_cmd`;
|
|
|
142
|
my $exit_code = $? >> 8;
|
|
|
143
|
|
|
|
144
|
if (!@smart_output) {
|
|
|
145
|
log_message("[DEBUG] No SMART output for $device") if $debug;
|
|
|
146
|
return;
|
|
|
147
|
}
|
|
|
148
|
|
|
|
149
|
log_message("[DEBUG] Got " . scalar(@smart_output) . " lines of SMART output from $device (exit code: $exit_code)") if $debug;
|
|
|
150
|
|
|
|
151
|
# Check if smartctl indicates the device doesn't support SMART
|
|
|
152
|
my $smart_output_text = join('', @smart_output);
|
|
|
153
|
if ($smart_output_text =~ /SMART support is.*Unavailable|Device does not support SMART|No such device/) {
|
|
|
154
|
log_message("[DEBUG] Device $device does not support SMART or is not accessible") if $debug;
|
|
|
155
|
return;
|
|
|
156
|
}
|
|
|
157
|
|
|
|
158
|
my ($model, $serial, $temp, %smart_params);
|
|
|
159
|
|
|
|
160
|
foreach my $line (@smart_output) {
|
|
|
161
|
chomp $line;
|
|
|
162
|
|
|
|
163
|
if ($line =~ /Device Model:\s+(.+)/) {
|
|
|
164
|
$model = $1;
|
|
|
165
|
log_message("[DEBUG] Found model: $model") if $debug;
|
|
|
166
|
} elsif ($line =~ /Serial Number:\s+(.+)/) {
|
|
|
167
|
$serial = $1;
|
|
|
168
|
log_message("[DEBUG] Found serial: $serial") if $debug;
|
|
|
169
|
} elsif ($line =~ /^\s*(\d+)\s+(.+?)\s+0x\w+\s+\d+\s+\d+\s+\d+\s+\w+\s+\w+\s+\w+\s+(\d+)/) {
|
|
|
170
|
# Old format: ID ATTRIBUTE_NAME 0xXXXX DDD DDD DDD Pre-fail Always - RAW_VALUE
|
|
|
171
|
my ($id, $name, $raw) = ($1, $2, $3);
|
|
|
172
|
$name =~ s/\s+/_/g;
|
|
|
173
|
$smart_params{$name} = $raw;
|
|
|
174
|
|
|
|
175
|
if ($debug && scalar(keys %smart_params) <= 5) {
|
|
|
176
|
log_message("[DEBUG] SMART param (old format): $name = $raw");
|
|
|
177
|
}
|
|
|
178
|
|
|
|
179
|
if ($name =~ /Temperature|Temp/i) {
|
|
|
180
|
$temp = $raw if (!defined $temp || $raw > 0);
|
|
|
181
|
}
|
|
|
182
|
} elsif ($line =~ /^\s*(\d+)\s+(.+?)\s+0x\w+\s+\d+\s+\d+\s+\d+\s+\S+\s+\S+\s+\S+\s+(\d+)/) {
|
|
|
183
|
# New format: ID ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
|
|
|
184
|
my ($id, $name, $raw) = ($1, $2, $3);
|
|
|
185
|
$name =~ s/\s+/_/g;
|
|
|
186
|
$smart_params{$name} = $raw;
|
|
|
187
|
|
|
|
188
|
if ($debug && scalar(keys %smart_params) <= 5) {
|
|
|
189
|
log_message("[DEBUG] SMART param (new format): $name = $raw");
|
|
|
190
|
}
|
|
|
191
|
|
|
|
192
|
if ($name =~ /Temperature|Temp/i) {
|
|
|
193
|
$temp = $raw if (!defined $temp || $raw > 0);
|
|
|
194
|
}
|
|
|
195
|
}
|
|
|
196
|
}
|
|
|
197
|
|
|
|
198
|
if (!$model || !$serial) {
|
|
|
199
|
log_message("[DEBUG] Missing critical data for $device - model: " . ($model || 'NULL') . ", serial: " . ($serial || 'NULL')) if $debug;
|
|
|
200
|
return;
|
|
|
201
|
}
|
|
|
202
|
|
|
|
203
|
if (!%smart_params) {
|
|
|
204
|
log_message("[DEBUG] No SMART parameters found for $device") if $debug;
|
|
|
205
|
return;
|
|
|
206
|
}
|
|
|
207
|
|
|
|
208
|
log_message("[DEBUG] Parsed device data - Model: $model, Serial: $serial, Temperature: " . ($temp || 'NULL') . ", Parameters: " . scalar(keys %smart_params)) if $debug;
|
|
|
209
|
|
|
|
210
|
return unless ($model && $serial && %smart_params);
|
|
|
211
|
|
|
|
212
|
log_message("Processing: $model ($serial) @ $device") if $debug;
|
|
|
213
|
|
|
|
214
|
# Get or create HDD inventory entry
|
|
|
215
|
my $hdd_id = get_or_create_hdd($dbh, $serial, $model, $device);
|
|
Bogdan Timofte
authored
2 weeks ago
|
216
|
|
|
|
217
|
# SCHEMA v2: Store complete reading via PostgreSQL function
|
|
Bogdan Timofte
authored
3 months ago
|
218
|
my $params_json = encode_json(\%smart_params);
|
|
Bogdan Timofte
authored
2 weeks ago
|
219
|
my $checksum = sha256_hex($params_json . ($temp || ''));
|
|
|
220
|
|
|
Bogdan Timofte
authored
3 months ago
|
221
|
my $sth = $dbh->prepare("
|
|
Bogdan Timofte
authored
2 weeks ago
|
222
|
SELECT insert_collection_event(?, ?, ?, NOW(), ?, ?, ?, ?::jsonb)
|
|
Bogdan Timofte
authored
3 months ago
|
223
|
");
|
|
Bogdan Timofte
authored
2 weeks ago
|
224
|
|
|
|
225
|
my $event_id = $dbh->selectrow_array(
|
|
|
226
|
$sth, undef,
|
|
|
227
|
$hdd_id,
|
|
|
228
|
$serial,
|
|
|
229
|
$node_id,
|
|
|
230
|
$temp || 0,
|
|
|
231
|
1, # collection_ok = true
|
|
|
232
|
$checksum,
|
|
|
233
|
$params_json
|
|
|
234
|
);
|
|
|
235
|
|
|
|
236
|
if ($event_id) {
|
|
|
237
|
log_message(" ✓ SMART event stored (ID: $event_id, temp: " . ($temp || 0) . "°C, params: " . scalar(keys %smart_params) . ")") if $debug;
|
|
|
238
|
} else {
|
|
|
239
|
log_message(" ✗ Failed to store SMART event for $serial");
|
|
|
240
|
}
|
|
Bogdan Timofte
authored
3 months ago
|
241
|
}
|
|
|
242
|
|
|
|
243
|
sub get_or_create_hdd {
|
|
|
244
|
my ($dbh, $serial, $model, $device_path) = @_;
|
|
|
245
|
|
|
|
246
|
log_message("[DEBUG] get_or_create_hdd: serial=$serial, model=$model, device=$device_path, node=$node_id") if $debug;
|
|
|
247
|
|
|
|
248
|
# Check if HDD exists
|
|
|
249
|
my $sth = $dbh->prepare("SELECT id FROM hdd_inventory WHERE serial_number = ?");
|
|
|
250
|
$sth->execute($serial);
|
|
|
251
|
my ($hdd_id) = $sth->fetchrow_array();
|
|
|
252
|
|
|
|
253
|
log_message("[DEBUG] HDD lookup result: hdd_id=" . ($hdd_id || 'NULL') . " for serial=$serial") if $debug;
|
|
|
254
|
|
|
|
255
|
if ($hdd_id) {
|
|
|
256
|
log_message("[DEBUG] Found existing HDD with id=$hdd_id, updating location and presence") if $debug;
|
|
|
257
|
|
|
|
258
|
# Update current location in inventory
|
|
|
259
|
$dbh->do("UPDATE hdd_inventory SET current_device_path = ?, current_node_id = ?, last_seen = NOW()
|
|
|
260
|
WHERE id = ?", undef, $device_path, $node_id, $hdd_id);
|
|
|
261
|
log_message("[DEBUG] Updated hdd_inventory location for hdd_id=$hdd_id") if $debug;
|
|
|
262
|
|
|
|
263
|
# Mark all previous hdd_presence as historic for this serial
|
|
|
264
|
my $affected_rows = $dbh->do("UPDATE hdd_presence SET is_current = FALSE WHERE serial_number = ? AND is_current = TRUE AND node <> ?", undef, $serial, $node_id);
|
|
|
265
|
log_message("[DEBUG] Marked $affected_rows historic hdd_presence records for serial=$serial") if $debug;
|
|
|
266
|
|
|
|
267
|
# Check if there is already a current presence for this serial/node
|
|
|
268
|
my $sth2 = $dbh->prepare("SELECT id FROM hdd_presence WHERE serial_number = ? AND node = ? AND is_current = TRUE");
|
|
|
269
|
$sth2->execute($serial, $node_id);
|
|
|
270
|
my ($presence_id) = $sth2->fetchrow_array();
|
|
|
271
|
|
|
|
272
|
if ($presence_id) {
|
|
|
273
|
log_message("[DEBUG] Found existing presence record id=$presence_id, updating data_end") if $debug;
|
|
|
274
|
# Update data_end
|
|
|
275
|
$dbh->do("UPDATE hdd_presence SET data_end = NOW() WHERE id = ?", undef, $presence_id);
|
|
|
276
|
log_message("[DEBUG] Updated data_end for presence_id=$presence_id") if $debug;
|
|
|
277
|
} else {
|
|
|
278
|
log_message("[DEBUG] No existing presence for serial=$serial node=$node_id, creating new record") if $debug;
|
|
|
279
|
# Create new presence record
|
|
|
280
|
$dbh->do("UPDATE hdd_presence SET is_current = FALSE WHERE serial_number = ? AND is_current = TRUE", undef, $serial);
|
|
|
281
|
$sth2 = $dbh->prepare("INSERT INTO hdd_presence (serial_number, node, data_start, data_end, is_current) VALUES (?, ?, NOW(), NOW(), TRUE)");
|
|
|
282
|
$sth2->execute($serial, $node_id);
|
|
|
283
|
my $new_presence_id = $dbh->last_insert_id(undef, undef, 'hdd_presence', undef);
|
|
|
284
|
log_message("[DEBUG] Created new hdd_presence record with id=$new_presence_id for serial=$serial node=$node_id") if $debug;
|
|
|
285
|
}
|
|
|
286
|
return $hdd_id;
|
|
|
287
|
}
|
|
|
288
|
# Create new HDD entry
|
|
|
289
|
log_message("[DEBUG] Creating new HDD entry for serial=$serial model=$model") if $debug;
|
|
|
290
|
$sth = $dbh->prepare("
|
|
|
291
|
INSERT INTO hdd_inventory (serial_number, model_name, current_device_path, current_node_id,
|
|
|
292
|
first_seen, last_seen)
|
|
|
293
|
VALUES (?, ?, ?, ?, NOW(), NOW())
|
|
|
294
|
RETURNING id
|
|
|
295
|
");
|
|
|
296
|
my $new_id = $dbh->selectrow_array($sth, undef, $serial, $model, $device_path, $node_id);
|
|
|
297
|
log_message("[DEBUG] Created new HDD inventory entry with id=$new_id") if $debug;
|
|
|
298
|
|
|
|
299
|
# Mark all previous hdd_presence as historic for this serial
|
|
|
300
|
my $affected_rows = $dbh->do("UPDATE hdd_presence SET is_current = FALSE WHERE serial_number = ? AND is_current = TRUE", undef, $serial);
|
|
|
301
|
log_message("[DEBUG] Marked $affected_rows historic hdd_presence records for new serial=$serial") if $debug;
|
|
|
302
|
|
|
|
303
|
# Create new presence record
|
|
|
304
|
my $sth2 = $dbh->prepare("INSERT INTO hdd_presence (serial_number, node, data_start, data_end, is_current) VALUES (?, ?, NOW(), NOW(), TRUE)");
|
|
|
305
|
$sth2->execute($serial, $node_id);
|
|
|
306
|
my $new_presence_id = $dbh->last_insert_id(undef, undef, 'hdd_presence', undef);
|
|
|
307
|
log_message("[DEBUG] Created new hdd_presence record with id=$new_presence_id for new serial=$serial node=$node_id") if $debug;
|
|
|
308
|
|
|
|
309
|
return $new_id;
|
|
|
310
|
}
|
|
|
311
|
|
|
|
312
|
sub load_config {
|
|
|
313
|
my ($file) = @_;
|
|
|
314
|
|
|
|
315
|
my $content = read_file($file);
|
|
|
316
|
my %config;
|
|
|
317
|
|
|
|
318
|
# Simple YAML-like parser
|
|
|
319
|
my $current_section;
|
|
|
320
|
foreach my $line (split /\n/, $content) {
|
|
|
321
|
$line =~ s/^\s+|\s+$//g;
|
|
|
322
|
next if $line =~ /^#/ || $line eq '';
|
|
|
323
|
|
|
|
324
|
if ($line =~ /^(\w+):$/) {
|
|
|
325
|
$current_section = $1;
|
|
|
326
|
} elsif ($line =~ /^\s*(\w+):\s*(.+)$/) {
|
|
|
327
|
$config{$current_section}{$1} = $2;
|
|
|
328
|
}
|
|
|
329
|
}
|
|
|
330
|
|
|
|
331
|
return \%config;
|
|
|
332
|
}
|
|
|
333
|
|
|
|
334
|
sub log_message {
|
|
|
335
|
my ($message) = @_;
|
|
|
336
|
my $timestamp = strftime("%Y-%m-%d %H:%M:%S", localtime);
|
|
|
337
|
print "[$timestamp] $message\n";
|
|
|
338
|
}
|
|
|
339
|
|
|
|
340
|
__END__
|
|
|
341
|
|
|
|
342
|
=head1 NAME
|
|
|
343
|
|
|
|
344
|
smart-collector-daemon.pl - autoSMART SMART Data Collection Daemon
|
|
|
345
|
|
|
|
346
|
=head1 SYNOPSIS
|
|
|
347
|
|
|
|
348
|
smart-collector-daemon.pl --config <config_file> [--debug] [--foreground]
|
|
|
349
|
|
|
|
350
|
=head1 DESCRIPTION
|
|
|
351
|
|
|
|
352
|
Automated daemon for collecting SMART data from storage devices and storing
|
|
|
353
|
in PostgreSQL database with differential storage optimization.
|
|
|
354
|
|
|
|
355
|
=head1 OPTIONS
|
|
|
356
|
|
|
|
357
|
=over 4
|
|
|
358
|
|
|
|
359
|
=item --config <file>
|
|
|
360
|
|
|
|
361
|
Configuration file path (required)
|
|
|
362
|
|
|
|
363
|
=item --debug
|
|
|
364
|
|
|
|
365
|
Enable debug logging
|
|
|
366
|
|
|
|
367
|
=item --foreground
|
|
|
368
|
|
|
|
369
|
Run in foreground (don't daemonize)
|
|
|
370
|
|
|
|
371
|
=back
|
|
|
372
|
|
|
|
373
|
=head1 AUTHOR
|
|
|
374
|
|
|
|
375
|
autoSMART v1.0 - Hardware-based HDD tracking system
|
|
|
376
|
|
|
|
377
|
=cut
|