|
Bogdan Timofte
authored
3 months ago
|
1
|
#!/usr/bin/perl
|
|
|
2
|
|
|
|
3
|
use strict;
|
|
|
4
|
use warnings;
|
|
|
5
|
use FindBin qw($Bin);
|
|
|
6
|
use lib "$Bin/../lib";
|
|
|
7
|
|
|
|
8
|
use SmartCollector;
|
|
|
9
|
use Getopt::Long;
|
|
|
10
|
use POSIX qw(strftime);
|
|
|
11
|
|
|
|
12
|
=head1 NAME
|
|
|
13
|
|
|
|
14
|
autosmart-collector.pl - SMART data collection daemon for Proxmox cluster
|
|
|
15
|
|
|
|
16
|
=head1 SYNOPSIS
|
|
|
17
|
|
|
|
18
|
autosmart-collector.pl [OPTIONS]
|
|
|
19
|
|
|
|
20
|
=head1 OPTIONS
|
|
|
21
|
|
|
|
22
|
--cluster-config FILE Cluster configuration file (default: /etc/pve/autoSMART/cluster.conf)
|
|
|
23
|
--local-config FILE Local configuration file (default: /etc/default/autosmart)
|
|
|
24
|
--daemon Run as daemon
|
|
|
25
|
--once Run once and exit (for cron jobs)
|
|
|
26
|
--device PATH Collect from specific device only
|
|
|
27
|
--debug Enable debug logging
|
|
|
28
|
--help Show this help
|
|
|
29
|
|
|
|
30
|
=head1 DESCRIPTION
|
|
|
31
|
|
|
|
32
|
This script collects SMART data from HDDs in a Proxmox cluster environment.
|
|
|
33
|
Configuration is split between cluster-wide settings in /etc/pve/autoSMART/
|
|
|
34
|
and local node settings in /etc/default/autosmart.
|
|
|
35
|
|
|
|
36
|
=cut
|
|
|
37
|
|
|
|
38
|
# Configuration
|
|
|
39
|
my $cluster_config = '/etc/pve/autoSMART/cluster.conf';
|
|
|
40
|
my $local_config = '/etc/default/autosmart';
|
|
|
41
|
my $daemon_mode = 0;
|
|
|
42
|
my $run_once = 0;
|
|
|
43
|
my $specific_device = '';
|
|
|
44
|
my $debug = 0;
|
|
|
45
|
my $help = 0;
|
|
|
46
|
|
|
|
47
|
GetOptions(
|
|
|
48
|
'cluster-config=s' => \$cluster_config,
|
|
|
49
|
'local-config=s' => \$local_config,
|
|
|
50
|
'daemon' => \$daemon_mode,
|
|
|
51
|
'once' => \$run_once,
|
|
|
52
|
'device=s' => \$specific_device,
|
|
|
53
|
'debug' => \$debug,
|
|
|
54
|
'help' => \$help,
|
|
|
55
|
) or die "Error parsing command line arguments\n";
|
|
|
56
|
|
|
|
57
|
if ($help) {
|
|
|
58
|
print_help();
|
|
|
59
|
exit 0;
|
|
|
60
|
}
|
|
|
61
|
|
|
|
62
|
# Load local configuration for environment setup
|
|
|
63
|
my %local_settings = load_local_config($local_config);
|
|
|
64
|
|
|
|
65
|
# Override debug flag from local config if not specified
|
|
|
66
|
unless ($debug) {
|
|
|
67
|
$debug = ($local_settings{AUTOSMART_DEBUG_ENABLED} eq 'true') ?
|
|
|
68
|
($local_settings{AUTOSMART_DEBUG_LEVEL} || 1) : 0;
|
|
|
69
|
}
|
|
|
70
|
|
|
|
71
|
# Validate configuration files
|
|
|
72
|
unless (-f $cluster_config) {
|
|
|
73
|
die "Cluster configuration not found: $cluster_config\n";
|
|
|
74
|
}
|
|
|
75
|
|
|
|
76
|
unless (-f $local_config) {
|
|
|
77
|
die "Local configuration not found: $local_config\n";
|
|
|
78
|
}
|
|
|
79
|
|
|
|
80
|
# Check for emergency stop
|
|
|
81
|
if (-f ($local_settings{AUTOSMART_EMERGENCY_STOP_FILE} || '/etc/autosmart/EMERGENCY_STOP')) {
|
|
|
82
|
die "Emergency stop file detected - autoSMART is disabled\n";
|
|
|
83
|
}
|
|
|
84
|
|
|
|
85
|
# Initialize collector with Proxmox cluster configuration
|
|
|
86
|
my $collector = SmartCollector->new(
|
|
|
87
|
cluster_config => $cluster_config,
|
|
|
88
|
local_config => $local_config,
|
|
|
89
|
debug => $debug,
|
|
|
90
|
);
|
|
|
91
|
|
|
|
92
|
log_message("autoSMART collector starting for cluster node...");
|
|
|
93
|
|
|
|
94
|
if ($specific_device) {
|
|
|
95
|
# Collect from specific device
|
|
|
96
|
collect_specific_device($collector, $specific_device);
|
|
|
97
|
} elsif ($run_once) {
|
|
|
98
|
# Single collection run
|
|
|
99
|
run_collection_cycle($collector);
|
|
|
100
|
} elsif ($daemon_mode) {
|
|
|
101
|
# Daemon mode
|
|
|
102
|
run_daemon($collector, \%local_settings);
|
|
|
103
|
} else {
|
|
|
104
|
# Default: single collection run
|
|
|
105
|
run_collection_cycle($collector);
|
|
|
106
|
}
|
|
|
107
|
|
|
|
108
|
log_message("autoSMART collector finished");
|
|
|
109
|
|
|
|
110
|
=head2 load_local_config
|
|
|
111
|
|
|
|
112
|
Load local configuration from /etc/default/autosmart
|
|
|
113
|
|
|
|
114
|
=cut
|
|
|
115
|
|
|
|
116
|
sub load_local_config {
|
|
|
117
|
my $config_file = shift;
|
|
|
118
|
|
|
|
119
|
my %settings = ();
|
|
|
120
|
|
|
|
121
|
return %settings unless -f $config_file;
|
|
|
122
|
|
|
|
123
|
open my $fh, '<', $config_file
|
|
|
124
|
or die "Cannot read local config: $config_file: $!";
|
|
|
125
|
|
|
|
126
|
while (my $line = <$fh>) {
|
|
|
127
|
chomp $line;
|
|
|
128
|
next if $line =~ /^\s*#/ || $line =~ /^\s*$/;
|
|
|
129
|
|
|
|
130
|
if ($line =~ /^(\w+)=(.+)$/) {
|
|
|
131
|
my ($key, $value) = ($1, $2);
|
|
|
132
|
$value =~ s/^["']|["']$//g; # Remove quotes
|
|
|
133
|
$settings{$key} = $value;
|
|
|
134
|
}
|
|
|
135
|
}
|
|
|
136
|
|
|
|
137
|
close $fh;
|
|
|
138
|
|
|
|
139
|
return %settings;
|
|
|
140
|
}
|
|
|
141
|
|
|
|
142
|
=head2 collect_specific_device
|
|
|
143
|
|
|
|
144
|
Collect SMART data from a specific device
|
|
|
145
|
|
|
|
146
|
=cut
|
|
|
147
|
|
|
|
148
|
sub collect_specific_device {
|
|
|
149
|
my ($collector, $device_path) = @_;
|
|
|
150
|
|
|
|
151
|
log_message("Collecting SMART data from $device_path");
|
|
|
152
|
|
|
|
153
|
my $smart_data = $collector->collect_smart_data($device_path);
|
|
|
154
|
|
|
|
155
|
unless ($smart_data) {
|
|
|
156
|
log_message("ERROR: Failed to collect SMART data from $device_path");
|
|
|
157
|
exit 1;
|
|
|
158
|
}
|
|
|
159
|
|
|
|
160
|
# Create minimal drive info for storage
|
|
|
161
|
my $drive_info = {
|
|
|
162
|
device_path => $device_path,
|
|
|
163
|
serial => $smart_data->{serial_number} || 'unknown',
|
|
|
164
|
model => $smart_data->{model_name} || 'unknown',
|
|
|
165
|
size_gb => 0,
|
|
|
166
|
madagascar_id => "manual_$device_path",
|
|
|
167
|
};
|
|
|
168
|
|
|
|
169
|
if ($collector->store_smart_data($drive_info, $smart_data)) {
|
|
|
170
|
log_message("Successfully stored SMART data for $device_path");
|
|
|
171
|
} else {
|
|
|
172
|
log_message("ERROR: Failed to store SMART data for $device_path");
|
|
|
173
|
exit 1;
|
|
|
174
|
}
|
|
|
175
|
}
|
|
|
176
|
|
|
|
177
|
=head2 run_collection_cycle
|
|
|
178
|
|
|
|
179
|
Execute one complete collection cycle
|
|
|
180
|
|
|
|
181
|
=cut
|
|
|
182
|
|
|
|
183
|
sub run_collection_cycle {
|
|
|
184
|
my $collector = shift;
|
|
|
185
|
|
|
|
186
|
log_message("Starting collection cycle");
|
|
|
187
|
|
|
|
188
|
my $result = $collector->collect_all();
|
|
|
189
|
|
|
|
190
|
log_message(sprintf(
|
|
|
191
|
"Collection cycle complete: %d successful, %d failed, %d total",
|
|
|
192
|
$result->{successful},
|
|
|
193
|
$result->{failed},
|
|
|
194
|
$result->{total}
|
|
|
195
|
));
|
|
|
196
|
|
|
|
197
|
# Exit with error code if any collections failed
|
|
|
198
|
if ($result->{failed} > 0) {
|
|
|
199
|
exit 1;
|
|
|
200
|
}
|
|
|
201
|
}
|
|
|
202
|
|
|
|
203
|
=head2 run_daemon
|
|
|
204
|
|
|
|
205
|
Run as daemon with periodic collection
|
|
|
206
|
|
|
|
207
|
=cut
|
|
|
208
|
|
|
|
209
|
sub run_daemon {
|
|
|
210
|
my $collector = shift;
|
|
|
211
|
|
|
|
212
|
# Get collection interval from config
|
|
|
213
|
my $cfg = Config::Simple->new("$config_dir/smart.conf");
|
|
|
214
|
my $interval = $cfg->param('monitoring.collection_interval') || 300;
|
|
|
215
|
|
|
|
216
|
log_message("Running in daemon mode (interval: ${interval}s)");
|
|
|
217
|
|
|
|
218
|
# Set up signal handlers for graceful shutdown
|
|
|
219
|
my $running = 1;
|
|
|
220
|
|
|
|
221
|
$SIG{TERM} = sub {
|
|
|
222
|
log_message("Received SIGTERM, shutting down gracefully");
|
|
|
223
|
$running = 0;
|
|
|
224
|
};
|
|
|
225
|
|
|
|
226
|
$SIG{INT} = sub {
|
|
|
227
|
log_message("Received SIGINT, shutting down gracefully");
|
|
|
228
|
$running = 0;
|
|
|
229
|
};
|
|
|
230
|
|
|
|
231
|
# Main daemon loop
|
|
|
232
|
while ($running) {
|
|
|
233
|
my $start_time = time();
|
|
|
234
|
|
|
|
235
|
eval {
|
|
|
236
|
run_collection_cycle($collector);
|
|
|
237
|
};
|
|
|
238
|
|
|
|
239
|
if ($@) {
|
|
|
240
|
log_message("ERROR in collection cycle: $@");
|
|
|
241
|
}
|
|
|
242
|
|
|
|
243
|
# Calculate sleep time to maintain interval
|
|
|
244
|
my $elapsed = time() - $start_time;
|
|
|
245
|
my $sleep_time = $interval - $elapsed;
|
|
|
246
|
|
|
|
247
|
if ($sleep_time > 0) {
|
|
|
248
|
log_message("Sleeping for ${sleep_time}s until next collection");
|
|
|
249
|
|
|
|
250
|
# Sleep in small chunks to allow signal handling
|
|
|
251
|
while ($sleep_time > 0 && $running) {
|
|
|
252
|
my $chunk = $sleep_time > 5 ? 5 : $sleep_time;
|
|
|
253
|
sleep($chunk);
|
|
|
254
|
$sleep_time -= $chunk;
|
|
|
255
|
}
|
|
|
256
|
} else {
|
|
|
257
|
log_message("WARNING: Collection took longer than interval (${elapsed}s > ${interval}s)");
|
|
|
258
|
}
|
|
|
259
|
}
|
|
|
260
|
|
|
|
261
|
log_message("Daemon shutdown complete");
|
|
|
262
|
}
|
|
|
263
|
|
|
|
264
|
=head2 log_message
|
|
|
265
|
|
|
|
266
|
Log message with timestamp
|
|
|
267
|
|
|
|
268
|
=cut
|
|
|
269
|
|
|
|
270
|
sub log_message {
|
|
|
271
|
my $message = shift;
|
|
|
272
|
|
|
|
273
|
my $timestamp = strftime("%Y-%m-%d %H:%M:%S", localtime());
|
|
|
274
|
print "[$timestamp] autosmart-collector: $message\n";
|
|
|
275
|
}
|
|
|
276
|
|
|
|
277
|
=head2 print_help
|
|
|
278
|
|
|
|
279
|
Display help information
|
|
|
280
|
|
|
|
281
|
=cut
|
|
|
282
|
|
|
|
283
|
sub print_help {
|
|
|
284
|
print <<'EOF';
|
|
|
285
|
autoSMART Data Collector v1.0
|
|
|
286
|
|
|
|
287
|
USAGE:
|
|
|
288
|
autosmart-collector.pl [OPTIONS]
|
|
|
289
|
|
|
|
290
|
OPTIONS:
|
|
|
291
|
--config-dir DIR Configuration directory (default: /etc/autosmart)
|
|
|
292
|
--daemon Run as daemon with periodic collection
|
|
|
293
|
--once Run once and exit (useful for cron jobs)
|
|
|
294
|
--device PATH Collect from specific device only (e.g., /dev/sda)
|
|
|
295
|
--debug Enable debug logging
|
|
|
296
|
--help Show this help message
|
|
|
297
|
|
|
|
298
|
EXAMPLES:
|
|
|
299
|
# Run once (for cron jobs)
|
|
|
300
|
autosmart-collector.pl --once
|
|
|
301
|
|
|
|
302
|
# Run as daemon
|
|
|
303
|
autosmart-collector.pl --daemon
|
|
|
304
|
|
|
|
305
|
# Collect from specific device
|
|
|
306
|
autosmart-collector.pl --device /dev/sda
|
|
|
307
|
|
|
|
308
|
# Run with debug logging
|
|
|
309
|
autosmart-collector.pl --debug --once
|
|
|
310
|
|
|
|
311
|
# Use custom config directory
|
|
|
312
|
autosmart-collector.pl --config-dir /opt/autosmart/config --once
|
|
|
313
|
|
|
|
314
|
CONFIGURATION:
|
|
|
315
|
Configuration files should be in /etc/autosmart/ or specified directory:
|
|
|
316
|
- smart.conf SMART monitoring settings
|
|
|
317
|
- database.conf PostgreSQL connection settings
|
|
|
318
|
|
|
|
319
|
DAEMON MODE:
|
|
|
320
|
In daemon mode, the collector runs continuously and collects data at
|
|
|
321
|
intervals specified in smart.conf (monitoring.collection_interval).
|
|
|
322
|
|
|
|
323
|
Send SIGTERM or SIGINT for graceful shutdown.
|
|
|
324
|
|
|
|
325
|
CRON MODE:
|
|
|
326
|
Use --once flag for cron-based scheduling:
|
|
|
327
|
|
|
|
328
|
# Collect every 5 minutes
|
|
|
329
|
*/5 * * * * /usr/local/bin/autosmart-collector.pl --once
|
|
|
330
|
|
|
|
331
|
EXIT CODES:
|
|
|
332
|
0 Success
|
|
|
333
|
1 Error (failed collections, missing config, etc.)
|
|
|
334
|
|
|
|
335
|
EOF
|
|
|
336
|
}
|
|
|
337
|
|
|
|
338
|
__END__
|
|
|
339
|
|
|
|
340
|
=head1 AUTHOR
|
|
|
341
|
|
|
|
342
|
AutoSMART Development Team
|
|
|
343
|
|
|
|
344
|
=head1 LICENSE
|
|
|
345
|
|
|
|
346
|
This software is part of the autoSMART project.
|
|
|
347
|
|
|
|
348
|
=cut
|