Newer Older
f16725e 3 months ago History
57 lines | 2.512kb
Bogdan Timofte authored 3 months ago
1
# autoSMART SMART Parameters Configuration
2
# Defines which SMART parameters to monitor and their thresholds
3

            
4
[monitoring]
5
# Collection interval in seconds
6
collection_interval = 300      # 5 minutes
7
collection_timeout = 30        # 30 seconds timeout per disk
8

            
9
# Madagascar integration
10
madagascar_inventory_file = /etc/madagascar/disk_inventory.json
11
madagascar_api_endpoint = http://madagascar.local/api/v1/disks
12

            
13
[smart_parameters]
14
# Format: parameter_name = threshold,weight,enabled,description
15

            
16
# Critical parameters (high weight, immediate attention)
17
Raw_Read_Error_Rate = 100000,0.9,true,"Raw read error rate from disk surface"
18
Reallocated_Sector_Ct = 5,0.95,true,"Count of reallocated sectors"
19
Current_Pending_Sector = 1,0.9,true,"Count of sectors waiting for reallocation"
20
Offline_Uncorrectable = 1,0.95,true,"Count of uncorrectable sectors"
21
UDMA_CRC_Error_Count = 100,0.7,true,"Count of UDMA CRC errors"
22

            
23
# Important parameters (medium weight)
24
Spin_Retry_Count = 3,0.8,true,"Count of spin-up retry attempts"
25
End-to-End_Error = 1,0.8,true,"End-to-end error detection count"
26
Reported_Uncorrect = 1,0.85,true,"Count of uncorrectable errors reported"
27
High_Fly_Writes = 1,0.7,true,"Count of high fly write operations"
28
Airflow_Temperature_Cel = 50,0.6,true,"Temperature of airflow in Celsius"
29

            
30
# Monitoring parameters (lower weight, trending)
31
Temperature_Celsius = 55,0.6,true,"Drive temperature in Celsius"
32
Power_On_Hours = 43800,0.4,true,"Total power-on hours (5 years)"
33
Load_Cycle_Count = 300000,0.5,true,"Count of load/unload cycles"
34
Start_Stop_Count = 10000,0.4,true,"Count of start/stop cycles"
35
Power_Cycle_Count = 10000,0.4,true,"Count of power-on cycles"
36

            
37
# Performance parameters (informational)
38
Seek_Error_Rate = 100000,0.3,true,"Rate of seek errors"
39
Throughput_Performance = 80,0.3,true,"Overall throughput performance"
40
Spin_Up_Time = 10000,0.4,true,"Time required to spin up"
41

            
42
[thresholds]
43
# Global threshold multipliers
44
temperature_warning = 0.9      # Warning at 90% of threshold
45
temperature_critical = 1.0     # Critical at 100% of threshold
46
sector_warning = 0.5           # Warning at 50% of threshold
47
sector_critical = 1.0          # Critical at 100% of threshold
48

            
49
# Trend analysis
50
trend_window_hours = 168       # 7 days for trend analysis
51
trend_deviation_threshold = 2.0 # Standard deviations for anomaly
52

            
53
[exclusions]
54
# Disk models/serials to exclude from monitoring
55
exclude_models = "Virtual,QEMU,VMware"
56
exclude_serials = ""
57
exclude_by_size_gb = 8         # Exclude disks smaller than 8GB