|
Bogdan Timofte
authored
3 months ago
|
1
|
# autoSMART SMART Parameters Configuration
|
|
|
2
|
# Defines which SMART parameters to monitor and their thresholds
|
|
|
3
|
|
|
|
4
|
[monitoring]
|
|
|
5
|
# Collection interval in seconds
|
|
|
6
|
collection_interval = 300 # 5 minutes
|
|
|
7
|
collection_timeout = 30 # 30 seconds timeout per disk
|
|
|
8
|
|
|
|
9
|
# Madagascar integration
|
|
|
10
|
madagascar_inventory_file = /etc/madagascar/disk_inventory.json
|
|
|
11
|
madagascar_api_endpoint = http://madagascar.local/api/v1/disks
|
|
|
12
|
|
|
|
13
|
[smart_parameters]
|
|
|
14
|
# Format: parameter_name = threshold,weight,enabled,description
|
|
|
15
|
|
|
|
16
|
# Critical parameters (high weight, immediate attention)
|
|
|
17
|
Raw_Read_Error_Rate = 100000,0.9,true,"Raw read error rate from disk surface"
|
|
|
18
|
Reallocated_Sector_Ct = 5,0.95,true,"Count of reallocated sectors"
|
|
|
19
|
Current_Pending_Sector = 1,0.9,true,"Count of sectors waiting for reallocation"
|
|
|
20
|
Offline_Uncorrectable = 1,0.95,true,"Count of uncorrectable sectors"
|
|
|
21
|
UDMA_CRC_Error_Count = 100,0.7,true,"Count of UDMA CRC errors"
|
|
|
22
|
|
|
|
23
|
# Important parameters (medium weight)
|
|
|
24
|
Spin_Retry_Count = 3,0.8,true,"Count of spin-up retry attempts"
|
|
|
25
|
End-to-End_Error = 1,0.8,true,"End-to-end error detection count"
|
|
|
26
|
Reported_Uncorrect = 1,0.85,true,"Count of uncorrectable errors reported"
|
|
|
27
|
High_Fly_Writes = 1,0.7,true,"Count of high fly write operations"
|
|
|
28
|
Airflow_Temperature_Cel = 50,0.6,true,"Temperature of airflow in Celsius"
|
|
|
29
|
|
|
|
30
|
# Monitoring parameters (lower weight, trending)
|
|
|
31
|
Temperature_Celsius = 55,0.6,true,"Drive temperature in Celsius"
|
|
|
32
|
Power_On_Hours = 43800,0.4,true,"Total power-on hours (5 years)"
|
|
|
33
|
Load_Cycle_Count = 300000,0.5,true,"Count of load/unload cycles"
|
|
|
34
|
Start_Stop_Count = 10000,0.4,true,"Count of start/stop cycles"
|
|
|
35
|
Power_Cycle_Count = 10000,0.4,true,"Count of power-on cycles"
|
|
|
36
|
|
|
|
37
|
# Performance parameters (informational)
|
|
|
38
|
Seek_Error_Rate = 100000,0.3,true,"Rate of seek errors"
|
|
|
39
|
Throughput_Performance = 80,0.3,true,"Overall throughput performance"
|
|
|
40
|
Spin_Up_Time = 10000,0.4,true,"Time required to spin up"
|
|
|
41
|
|
|
|
42
|
[thresholds]
|
|
|
43
|
# Global threshold multipliers
|
|
|
44
|
temperature_warning = 0.9 # Warning at 90% of threshold
|
|
|
45
|
temperature_critical = 1.0 # Critical at 100% of threshold
|
|
|
46
|
sector_warning = 0.5 # Warning at 50% of threshold
|
|
|
47
|
sector_critical = 1.0 # Critical at 100% of threshold
|
|
|
48
|
|
|
|
49
|
# Trend analysis
|
|
|
50
|
trend_window_hours = 168 # 7 days for trend analysis
|
|
|
51
|
trend_deviation_threshold = 2.0 # Standard deviations for anomaly
|
|
|
52
|
|
|
|
53
|
[exclusions]
|
|
|
54
|
# Disk models/serials to exclude from monitoring
|
|
|
55
|
exclude_models = "Virtual,QEMU,VMware"
|
|
|
56
|
exclude_serials = ""
|
|
|
57
|
exclude_by_size_gb = 8 # Exclude disks smaller than 8GB
|