1 contributor
-- autoSMART Database Schema - Fixed for PostgreSQL 15
-- This version removes problematic syntax and creates a working schema
-- Drop existing tables if they exist
DROP TABLE IF EXISTS smart_readings CASCADE;
DROP TABLE IF EXISTS predictions CASCADE;
DROP TABLE IF EXISTS alert_history CASCADE;
DROP TABLE IF EXISTS hdd_presence CASCADE;
DROP TABLE IF EXISTS hdd_inventory CASCADE;
-- Create required extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "btree_gin";
-- HDD Inventory Table (Hardware-based tracking)
CREATE TABLE hdd_inventory (
id SERIAL PRIMARY KEY,
serial_number VARCHAR(100) NOT NULL,
model_name VARCHAR(200) NOT NULL,
firmware VARCHAR(50),
size_gb INTEGER,
manufacturer VARCHAR(100),
current_device_path VARCHAR(50),
current_node_id VARCHAR(50),
current_slot VARCHAR(20),
madagascar_id VARCHAR(100),
first_seen TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
last_seen TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
status VARCHAR(20) DEFAULT 'active',
status_changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
notes TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
-- Hardware identification constraint
CONSTRAINT unique_hardware_id UNIQUE (serial_number, model_name)
);
-- Create index for device path (but allow NULLs and duplicates)
CREATE INDEX idx_hdd_inventory_device_path ON hdd_inventory(current_device_path) WHERE current_device_path IS NOT NULL;
CREATE INDEX idx_hdd_inventory_node ON hdd_inventory(current_node_id);
CREATE INDEX idx_hdd_inventory_status ON hdd_inventory(status);
CREATE INDEX idx_hdd_inventory_last_seen ON hdd_inventory(last_seen);
-- HDD Presence Table (tracks HDD mobility across nodes)
CREATE TABLE hdd_presence (
id SERIAL PRIMARY KEY,
serial_number VARCHAR(64) NOT NULL,
node VARCHAR(64) NOT NULL,
data_start TIMESTAMP NOT NULL,
data_end TIMESTAMP NOT NULL,
is_current BOOLEAN NOT NULL DEFAULT TRUE
);
CREATE INDEX idx_hdd_presence_serial_current ON hdd_presence(serial_number, is_current);
CREATE INDEX idx_hdd_presence_node ON hdd_presence(node);
CREATE INDEX idx_hdd_presence_data_end ON hdd_presence(data_end DESC);
-- SMART Readings Table (with differential storage)
CREATE TABLE smart_readings (
id BIGSERIAL PRIMARY KEY,
hdd_id INTEGER REFERENCES hdd_inventory(id),
serial_number VARCHAR(100) NOT NULL,
device_path VARCHAR(50),
node_id VARCHAR(50),
timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
collection_ok BOOLEAN DEFAULT true,
temperature INTEGER,
parameters_json JSONB,
reading_type VARCHAR(20) DEFAULT 'full',
changes_detected BOOLEAN DEFAULT true,
changed_parameters JSONB,
previous_reading_id INTEGER REFERENCES smart_readings(id),
checksum VARCHAR(64)
);
CREATE INDEX idx_smart_readings_hdd_id ON smart_readings(hdd_id);
CREATE INDEX idx_smart_readings_timestamp ON smart_readings(timestamp DESC);
CREATE INDEX idx_smart_readings_serial ON smart_readings(serial_number);
CREATE INDEX idx_smart_readings_device_path ON smart_readings(device_path);
CREATE INDEX idx_smart_readings_type ON smart_readings(reading_type);
CREATE INDEX idx_smart_readings_checksum ON smart_readings(checksum);
CREATE INDEX idx_smart_readings_previous ON smart_readings(previous_reading_id);
-- GIN index for JSONB parameters
CREATE INDEX idx_smart_readings_parameters ON smart_readings USING GIN (parameters_json);
CREATE INDEX idx_smart_readings_changed_params ON smart_readings USING GIN (changed_parameters);
-- Predictions Table
CREATE TABLE predictions (
id SERIAL PRIMARY KEY,
hdd_id INTEGER REFERENCES hdd_inventory(id),
serial_number VARCHAR(100) NOT NULL,
device_path VARCHAR(50),
timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
risk_level VARCHAR(20),
failure_probability DECIMAL(5,4),
predicted_failure_date DATE,
confidence_score DECIMAL(5,4),
analysis_summary TEXT,
recommendations JSONB,
openai_response JSONB,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX idx_predictions_hdd_id ON predictions(hdd_id);
CREATE INDEX idx_predictions_timestamp ON predictions(timestamp DESC);
CREATE INDEX idx_predictions_risk_level ON predictions(risk_level);
CREATE INDEX idx_predictions_serial ON predictions(serial_number);
-- Alert History Table
CREATE TABLE alert_history (
id SERIAL PRIMARY KEY,
hdd_id INTEGER REFERENCES hdd_inventory(id),
serial_number VARCHAR(100) NOT NULL,
alert_type VARCHAR(50),
severity VARCHAR(20),
message TEXT,
sent_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
sent_to TEXT,
delivery_status VARCHAR(20) DEFAULT 'pending',
related_reading_id BIGINT REFERENCES smart_readings(id),
related_prediction_id INTEGER REFERENCES predictions(id)
);
CREATE INDEX idx_alert_history_hdd_id ON alert_history(hdd_id);
CREATE INDEX idx_alert_history_sent_at ON alert_history(sent_at DESC);
CREATE INDEX idx_alert_history_severity ON alert_history(severity);
CREATE INDEX idx_alert_history_serial ON alert_history(serial_number);
-- System Configuration Table
CREATE TABLE IF NOT EXISTS system_config (
id SERIAL PRIMARY KEY,
config_key VARCHAR(100) UNIQUE NOT NULL,
value TEXT,
description TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Insert default configuration
INSERT INTO system_config (config_key, value, description) VALUES
('collection_interval_seconds', '1800', 'SMART data collection interval in seconds')
ON CONFLICT (config_key) DO NOTHING;
INSERT INTO system_config (config_key, value, description) VALUES
('differential_storage_enabled', 'true', 'Enable differential storage optimization'),
('forced_storage_interval_hours', '24', 'Hours between forced full readings'),
('critical_parameter_force_store', 'true', 'Force storage for critical parameter changes'),
('temperature_change_threshold', '5', 'Temperature change threshold for storage (Celsius)')
ON CONFLICT (config_key) DO NOTHING;
-- SMART Thresholds Table
CREATE TABLE IF NOT EXISTS smart_thresholds (
id SERIAL PRIMARY KEY,
parameter_name VARCHAR(100) NOT NULL,
warning_threshold NUMERIC,
critical_threshold NUMERIC,
weight NUMERIC DEFAULT 1.0,
enabled BOOLEAN DEFAULT true,
description TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Insert default SMART thresholds
INSERT INTO smart_thresholds (parameter_name, warning_threshold, critical_threshold, weight, description) VALUES
('Reallocated_Sector_Ct', 1, 5, 10.0, 'Reallocated sector count - critical for drive health'),
('Spin_Retry_Count', 1, 10, 8.0, 'Spindle motor retry attempts'),
('Reallocated_Event_Count', 1, 10, 9.0, 'Number of reallocation events'),
('Current_Pending_Sector', 1, 5, 9.5, 'Sectors waiting to be reallocated'),
('Offline_Uncorrectable', 1, 1, 10.0, 'Uncorrectable sectors found during offline scan'),
('UDMA_CRC_Error_Count', 10, 50, 5.0, 'Communication errors between drive and controller'),
('Raw_Read_Error_Rate', 100000, 1000000, 3.0, 'Raw read error rate (varies by manufacturer)'),
('Seek_Error_Rate', 100000, 1000000, 4.0, 'Seek error rate'),
('Power_On_Hours', 35000, 50000, 2.0, 'Total power-on time in hours'),
('Load_Cycle_Count', 100000, 300000, 2.0, 'Number of head load/unload cycles'),
('Temperature_Celsius', 50, 60, 3.0, 'Drive operating temperature'),
('Start_Stop_Count', 10000, 50000, 1.0, 'Drive start/stop cycles'),
('Power_Cycle_Count', 10000, 20000, 1.0, 'Number of power cycles')
ON CONFLICT (parameter_name) DO NOTHING;
-- Create view for reconstructed SMART data (handles differential storage)
CREATE VIEW smart_readings_reconstructed AS
WITH RECURSIVE reading_chain AS (
-- Base case: get baseline readings
SELECT
id, hdd_id, serial_number, timestamp,
parameters_json, temperature, reading_type,
previous_reading_id, 1 as chain_level
FROM smart_readings
WHERE reading_type IN ('baseline', 'full')
UNION ALL
-- Recursive case: follow the chain of differential readings
SELECT
sr.id, sr.hdd_id, sr.serial_number, sr.timestamp,
-- Merge parameters from previous reading with current changes
COALESCE(rc.parameters_json, '{}'::jsonb) || sr.parameters_json as parameters_json,
COALESCE(sr.temperature, rc.temperature) as temperature,
sr.reading_type,
sr.previous_reading_id,
rc.chain_level + 1
FROM smart_readings sr
JOIN reading_chain rc ON sr.previous_reading_id = rc.id
WHERE sr.reading_type = 'differential'
)
SELECT
id, hdd_id, serial_number, timestamp,
parameters_json, temperature, reading_type,
chain_level
FROM reading_chain;
-- Latest SMART readings for all drives (using reconstructed differential data)
CREATE VIEW latest_smart_readings AS
SELECT DISTINCT ON (sr.hdd_id)
sr.id,
sr.hdd_id,
sr.serial_number,
sr.timestamp,
sr.parameters_json,
sr.temperature,
hi.model_name,
hi.manufacturer,
hi.size_gb,
hi.current_device_path,
hi.current_node_id
FROM smart_readings_reconstructed sr
JOIN hdd_inventory hi ON sr.hdd_id = hi.id
ORDER BY sr.hdd_id, sr.timestamp DESC;
-- Drive health summary view
CREATE VIEW drive_health_summary AS
SELECT
hi.id as hdd_id,
hi.serial_number,
hi.model_name,
hi.manufacturer,
hi.current_device_path,
hi.current_node_id,
hi.status,
lsr.timestamp as last_reading,
lsr.temperature,
p.risk_level,
p.failure_probability,
p.predicted_failure_date,
EXTRACT(EPOCH FROM (NOW() - lsr.timestamp))/3600 as hours_since_last_reading
FROM hdd_inventory hi
LEFT JOIN latest_smart_readings lsr ON hi.id = lsr.hdd_id
LEFT JOIN LATERAL (
SELECT risk_level, failure_probability, predicted_failure_date
FROM predictions
WHERE hdd_id = hi.id
ORDER BY timestamp DESC
LIMIT 1
) p ON true
WHERE hi.status = 'active';
-- Function to check if SMART reading should be stored (simplified version)
CREATE OR REPLACE FUNCTION should_store_smart_reading(
p_hdd_id INTEGER,
p_parameters_json JSONB,
p_checksum VARCHAR(64),
p_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) RETURNS TABLE(
should_store BOOLEAN,
reading_type VARCHAR(20),
changes_detected BOOLEAN,
changed_parameters JSONB,
previous_reading_id INTEGER
) AS $$
DECLARE
v_last_reading RECORD;
v_config_enabled BOOLEAN := true;
v_force_interval_hours INTEGER := 24;
v_temp_threshold INTEGER := 5;
BEGIN
-- Get configuration
SELECT (value::boolean) INTO v_config_enabled
FROM system_config WHERE config_key = 'differential_storage_enabled';
SELECT (value::integer) INTO v_force_interval_hours
FROM system_config WHERE config_key = 'forced_storage_interval_hours';
SELECT (value::integer) INTO v_temp_threshold
FROM system_config WHERE config_key = 'temperature_change_threshold';
-- If differential storage is disabled, always store as full
IF v_config_enabled IS FALSE OR v_config_enabled IS NULL THEN
RETURN QUERY SELECT true, 'full'::varchar(20), true, NULL::jsonb, NULL::integer;
RETURN;
END IF;
-- Get the last reading for this HDD
SELECT id, checksum, timestamp, parameters_json, temperature
INTO v_last_reading
FROM smart_readings
WHERE hdd_id = p_hdd_id
ORDER BY timestamp DESC
LIMIT 1;
-- If no previous reading, store as baseline
IF v_last_reading IS NULL THEN
RETURN QUERY SELECT true, 'baseline'::varchar(20), true, NULL::jsonb, NULL::integer;
RETURN;
END IF;
-- If checksum matches, no changes detected
IF v_last_reading.checksum = p_checksum THEN
RETURN QUERY SELECT false, 'skipped'::varchar(20), false, NULL::jsonb, v_last_reading.id;
RETURN;
END IF;
-- If forced interval exceeded, store as full
IF p_timestamp > v_last_reading.timestamp + (v_force_interval_hours || ' hours')::interval THEN
RETURN QUERY SELECT true, 'full'::varchar(20), true, NULL::jsonb, v_last_reading.id;
RETURN;
END IF;
-- Otherwise, store as differential
RETURN QUERY SELECT true, 'differential'::varchar(20), true, '[]'::jsonb, v_last_reading.id;
RETURN;
END;
$$ LANGUAGE plpgsql;
-- Function to update HDD presence tracking
CREATE OR REPLACE FUNCTION update_hdd_presence(
p_serial_number VARCHAR(64),
p_node VARCHAR(64)
) RETURNS VOID AS $$
BEGIN
-- Mark all previous presence records for this serial as historic
UPDATE hdd_presence
SET is_current = FALSE
WHERE serial_number = p_serial_number AND is_current = TRUE AND node <> p_node;
-- Check if there's already a current presence for this serial/node
IF EXISTS (SELECT 1 FROM hdd_presence WHERE serial_number = p_serial_number AND node = p_node AND is_current = TRUE) THEN
-- Update data_end for existing current presence
UPDATE hdd_presence
SET data_end = NOW()
WHERE serial_number = p_serial_number AND node = p_node AND is_current = TRUE;
ELSE
-- Create new presence record
INSERT INTO hdd_presence (serial_number, node, data_start, data_end, is_current)
VALUES (p_serial_number, p_node, NOW(), NOW(), TRUE);
END IF;
END;
$$ LANGUAGE plpgsql;
-- Function to update timestamps
CREATE OR REPLACE FUNCTION update_timestamp() RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Create triggers for timestamp updates
CREATE TRIGGER update_hdd_inventory_timestamp
BEFORE UPDATE ON hdd_inventory
FOR EACH ROW EXECUTE FUNCTION update_timestamp();
CREATE TRIGGER update_smart_thresholds_timestamp
BEFORE UPDATE ON smart_thresholds
FOR EACH ROW EXECUTE FUNCTION update_timestamp();
CREATE TRIGGER update_system_config_timestamp
BEFORE UPDATE ON system_config
FOR EACH ROW EXECUTE FUNCTION update_timestamp();
-- Grant permissions to autosmart user
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO autosmart;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO autosmart;
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO autosmart;
-- Grant specific permissions for hdd_presence table
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE hdd_presence TO autosmart;
-- Final message
DO $$
BEGIN
RAISE NOTICE 'autoSMART database schema deployed successfully!';
RAISE NOTICE 'Tables created: hdd_inventory, hdd_presence, smart_readings, predictions, smart_thresholds, alert_history, system_config';
RAISE NOTICE 'Views created: smart_readings_reconstructed, latest_smart_readings, drive_health_summary';
RAISE NOTICE 'Functions created: update_hdd_presence(), should_store_smart_reading()';
RAISE NOTICE 'Permissions granted to autosmart user';
END $$;