|
Bogdan Timofte
authored
3 months ago
|
1
|
# autoSMART Cluster Configuration
|
|
|
2
|
# Location: /etc/pve/autoSMART/cluster.conf
|
|
|
3
|
# This file is shared across all Proxmox cluster nodes
|
|
|
4
|
|
|
|
5
|
[cluster]
|
|
|
6
|
# Cluster identification
|
|
|
7
|
cluster_name = proxmox-cluster-main
|
|
|
8
|
cluster_id = pve-cluster-001
|
|
|
9
|
nodes = node91,node92,node93
|
|
|
10
|
|
|
|
11
|
# Database configuration (shared cluster database)
|
|
|
12
|
[database]
|
|
|
13
|
host = 192.168.2.91
|
|
|
14
|
port = 5432
|
|
|
15
|
database = autosmart_cluster
|
|
|
16
|
username = autosmart_cluster
|
|
|
17
|
password = cluster_secure_password_here
|
|
|
18
|
connection_timeout = 30
|
|
|
19
|
pool_size = 10
|
|
|
20
|
|
|
|
21
|
# OpenAI configuration (shared API key)
|
|
|
22
|
[openai]
|
|
|
23
|
api_key = your_cluster_openai_api_key_here
|
|
|
24
|
model = gpt-4
|
|
|
25
|
max_tokens = 1500
|
|
|
26
|
temperature = 0.3
|
|
|
27
|
rate_limit_delay = 2
|
|
|
28
|
|
|
|
29
|
# Madagascar inventory integration
|
|
|
30
|
[madagascar]
|
|
|
31
|
inventory_path = /etc/pve/autoSMART/madagascar_inventory.json
|
|
|
32
|
update_interval = 3600
|
|
|
33
|
sync_across_nodes = true
|
|
|
34
|
|
|
|
35
|
# Cluster-wide SMART monitoring parameters
|
|
|
36
|
[smart_parameters]
|
|
|
37
|
# Critical parameters (high weight for AI analysis)
|
|
|
38
|
Reallocated_Sector_Ct = 1,10.0,true,Critical reallocated sectors
|
|
|
39
|
Reallocated_Event_Count = 1,9.0,true,Reallocation events
|
|
|
40
|
Current_Pending_Sector = 1,9.5,true,Pending sector reallocation
|
|
|
41
|
Offline_Uncorrectable = 1,10.0,true,Uncorrectable sectors
|
|
|
42
|
UDMA_CRC_Error_Count = 10,5.0,true,Communication errors
|
|
|
43
|
Spin_Retry_Count = 1,8.0,true,Spindle motor retries
|
|
|
44
|
|
|
|
45
|
# Important parameters (medium weight)
|
|
|
46
|
Raw_Read_Error_Rate = 100000,3.0,true,Raw read errors
|
|
|
47
|
Seek_Error_Rate = 100000,4.0,true,Seek operation errors
|
|
|
48
|
Load_Cycle_Count = 100000,2.0,true,Head load cycles
|
|
|
49
|
Power_On_Hours = 35000,2.0,true,Power-on time
|
|
|
50
|
Temperature_Celsius = 50,3.0,true,Operating temperature
|
|
|
51
|
|
|
|
52
|
# Monitoring parameters (low weight)
|
|
|
53
|
Start_Stop_Count = 10000,1.0,true,Start/stop cycles
|
|
|
54
|
Power_Cycle_Count = 10000,1.0,true,Power cycles
|
|
|
55
|
Command_Timeout = 100,2.0,true,Command timeouts
|
|
|
56
|
High_Fly_Writes = 1,4.0,true,Head fly height issues
|
|
|
57
|
Airflow_Temperature_Cel = 45,1.5,true,Airflow temperature
|
|
|
58
|
|
|
|
59
|
# Cluster-wide alert settings
|
|
|
60
|
[alerts]
|
|
|
61
|
email_enabled = true
|
|
|
62
|
email_smtp_server = mail.domain.com
|
|
|
63
|
email_smtp_port = 587
|
|
|
64
|
email_username = autosmart@domain.com
|
|
|
65
|
email_password = email_password_here
|
|
|
66
|
email_recipients = admin@domain.com,ops@domain.com
|
|
|
67
|
email_critical_only = false
|
|
|
68
|
|
|
|
69
|
# Risk level alert thresholds
|
|
|
70
|
alert_critical_immediate = true
|
|
|
71
|
alert_high_delay_minutes = 30
|
|
|
72
|
alert_moderate_delay_hours = 4
|
|
|
73
|
alert_low_daily_summary = true
|
|
|
74
|
|
|
|
75
|
# Data retention (cluster-wide policy)
|
|
|
76
|
[retention]
|
|
|
77
|
smart_readings_days = 365
|
|
|
78
|
predictions_days = 180
|
|
|
79
|
alerts_days = 90
|
|
|
80
|
cleanup_interval_hours = 24
|
|
|
81
|
|
|
|
82
|
# Cluster synchronization
|
|
|
83
|
[synchronization]
|
|
|
84
|
node_discovery_interval = 300
|
|
|
85
|
health_check_interval = 60
|
|
|
86
|
failover_enabled = true
|
|
|
87
|
backup_nodes = node92,node93
|
|
|
88
|
primary_node = node91
|