f16725e 3 months ago History
1 contributor
57 lines | 2.512kb
# autoSMART SMART Parameters Configuration
# Defines which SMART parameters to monitor and their thresholds

[monitoring]
# Collection interval in seconds
collection_interval = 300      # 5 minutes
collection_timeout = 30        # 30 seconds timeout per disk

# Madagascar integration
madagascar_inventory_file = /etc/madagascar/disk_inventory.json
madagascar_api_endpoint = http://madagascar.local/api/v1/disks

[smart_parameters]
# Format: parameter_name = threshold,weight,enabled,description

# Critical parameters (high weight, immediate attention)
Raw_Read_Error_Rate = 100000,0.9,true,"Raw read error rate from disk surface"
Reallocated_Sector_Ct = 5,0.95,true,"Count of reallocated sectors"
Current_Pending_Sector = 1,0.9,true,"Count of sectors waiting for reallocation"
Offline_Uncorrectable = 1,0.95,true,"Count of uncorrectable sectors"
UDMA_CRC_Error_Count = 100,0.7,true,"Count of UDMA CRC errors"

# Important parameters (medium weight)
Spin_Retry_Count = 3,0.8,true,"Count of spin-up retry attempts"
End-to-End_Error = 1,0.8,true,"End-to-end error detection count"
Reported_Uncorrect = 1,0.85,true,"Count of uncorrectable errors reported"
High_Fly_Writes = 1,0.7,true,"Count of high fly write operations"
Airflow_Temperature_Cel = 50,0.6,true,"Temperature of airflow in Celsius"

# Monitoring parameters (lower weight, trending)
Temperature_Celsius = 55,0.6,true,"Drive temperature in Celsius"
Power_On_Hours = 43800,0.4,true,"Total power-on hours (5 years)"
Load_Cycle_Count = 300000,0.5,true,"Count of load/unload cycles"
Start_Stop_Count = 10000,0.4,true,"Count of start/stop cycles"
Power_Cycle_Count = 10000,0.4,true,"Count of power-on cycles"

# Performance parameters (informational)
Seek_Error_Rate = 100000,0.3,true,"Rate of seek errors"
Throughput_Performance = 80,0.3,true,"Overall throughput performance"
Spin_Up_Time = 10000,0.4,true,"Time required to spin up"

[thresholds]
# Global threshold multipliers
temperature_warning = 0.9      # Warning at 90% of threshold
temperature_critical = 1.0     # Critical at 100% of threshold
sector_warning = 0.5           # Warning at 50% of threshold
sector_critical = 1.0          # Critical at 100% of threshold

# Trend analysis
trend_window_hours = 168       # 7 days for trend analysis
trend_deviation_threshold = 2.0 # Standard deviations for anomaly

[exclusions]
# Disk models/serials to exclude from monitoring
exclude_models = "Virtual,QEMU,VMware"
exclude_serials = ""
exclude_by_size_gb = 8         # Exclude disks smaller than 8GB