Newer Older
f16725e 3 months ago History
88 lines | 2.636kb
Bogdan Timofte authored 3 months ago
1
# autoSMART Cluster Configuration
2
# Location: /etc/pve/autoSMART/cluster.conf
3
# This file is shared across all Proxmox cluster nodes
4

            
5
[cluster]
6
# Cluster identification
7
cluster_name = proxmox-cluster-main
8
cluster_id = pve-cluster-001
9
nodes = node91,node92,node93
10

            
11
# Database configuration (shared cluster database)
12
[database]
13
host = 192.168.2.91
14
port = 5432
15
database = autosmart_cluster
16
username = autosmart_cluster
17
password = cluster_secure_password_here
18
connection_timeout = 30
19
pool_size = 10
20

            
21
# OpenAI configuration (shared API key)
22
[openai]
23
api_key = your_cluster_openai_api_key_here
24
model = gpt-4
25
max_tokens = 1500
26
temperature = 0.3
27
rate_limit_delay = 2
28

            
29
# Madagascar inventory integration
30
[madagascar]
31
inventory_path = /etc/pve/autoSMART/madagascar_inventory.json
32
update_interval = 3600
33
sync_across_nodes = true
34

            
35
# Cluster-wide SMART monitoring parameters
36
[smart_parameters]
37
# Critical parameters (high weight for AI analysis)
38
Reallocated_Sector_Ct = 1,10.0,true,Critical reallocated sectors
39
Reallocated_Event_Count = 1,9.0,true,Reallocation events
40
Current_Pending_Sector = 1,9.5,true,Pending sector reallocation
41
Offline_Uncorrectable = 1,10.0,true,Uncorrectable sectors
42
UDMA_CRC_Error_Count = 10,5.0,true,Communication errors
43
Spin_Retry_Count = 1,8.0,true,Spindle motor retries
44

            
45
# Important parameters (medium weight)
46
Raw_Read_Error_Rate = 100000,3.0,true,Raw read errors
47
Seek_Error_Rate = 100000,4.0,true,Seek operation errors
48
Load_Cycle_Count = 100000,2.0,true,Head load cycles
49
Power_On_Hours = 35000,2.0,true,Power-on time
50
Temperature_Celsius = 50,3.0,true,Operating temperature
51

            
52
# Monitoring parameters (low weight)
53
Start_Stop_Count = 10000,1.0,true,Start/stop cycles
54
Power_Cycle_Count = 10000,1.0,true,Power cycles
55
Command_Timeout = 100,2.0,true,Command timeouts
56
High_Fly_Writes = 1,4.0,true,Head fly height issues
57
Airflow_Temperature_Cel = 45,1.5,true,Airflow temperature
58

            
59
# Cluster-wide alert settings
60
[alerts]
61
email_enabled = true
62
email_smtp_server = mail.domain.com
63
email_smtp_port = 587
64
email_username = autosmart@domain.com
65
email_password = email_password_here
66
email_recipients = admin@domain.com,ops@domain.com
67
email_critical_only = false
68

            
69
# Risk level alert thresholds
70
alert_critical_immediate = true
71
alert_high_delay_minutes = 30
72
alert_moderate_delay_hours = 4
73
alert_low_daily_summary = true
74

            
75
# Data retention (cluster-wide policy)
76
[retention]
77
smart_readings_days = 365
78
predictions_days = 180
79
alerts_days = 90
80
cleanup_interval_hours = 24
81

            
82
# Cluster synchronization
83
[synchronization]
84
node_discovery_interval = 300
85
health_check_interval = 60
86
failover_enabled = true
87
backup_nodes = node92,node93
88
primary_node = node91