-- autoSMART Database Schema - Fixed for PostgreSQL 15 -- This version removes problematic syntax and creates a working schema -- Drop existing tables if they exist DROP TABLE IF EXISTS smart_readings CASCADE; DROP TABLE IF EXISTS predictions CASCADE; DROP TABLE IF EXISTS alert_history CASCADE; DROP TABLE IF EXISTS hdd_presence CASCADE; DROP TABLE IF EXISTS hdd_inventory CASCADE; -- Create required extensions CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "btree_gin"; -- HDD Inventory Table (Hardware-based tracking) CREATE TABLE hdd_inventory ( id SERIAL PRIMARY KEY, serial_number VARCHAR(100) NOT NULL, model_name VARCHAR(200) NOT NULL, firmware VARCHAR(50), size_gb INTEGER, manufacturer VARCHAR(100), current_device_path VARCHAR(50), current_node_id VARCHAR(50), current_slot VARCHAR(20), madagascar_id VARCHAR(100), first_seen TIMESTAMP WITH TIME ZONE DEFAULT NOW(), last_seen TIMESTAMP WITH TIME ZONE DEFAULT NOW(), status VARCHAR(20) DEFAULT 'active', status_changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), notes TEXT, created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), -- Hardware identification constraint CONSTRAINT unique_hardware_id UNIQUE (serial_number, model_name) ); -- Create index for device path (but allow NULLs and duplicates) CREATE INDEX idx_hdd_inventory_device_path ON hdd_inventory(current_device_path) WHERE current_device_path IS NOT NULL; CREATE INDEX idx_hdd_inventory_node ON hdd_inventory(current_node_id); CREATE INDEX idx_hdd_inventory_status ON hdd_inventory(status); CREATE INDEX idx_hdd_inventory_last_seen ON hdd_inventory(last_seen); -- HDD Presence Table (tracks HDD mobility across nodes) CREATE TABLE hdd_presence ( id SERIAL PRIMARY KEY, serial_number VARCHAR(64) NOT NULL, node VARCHAR(64) NOT NULL, data_start TIMESTAMP NOT NULL, data_end TIMESTAMP NOT NULL, is_current BOOLEAN NOT NULL DEFAULT TRUE ); CREATE INDEX idx_hdd_presence_serial_current ON hdd_presence(serial_number, is_current); CREATE INDEX idx_hdd_presence_node ON hdd_presence(node); CREATE INDEX idx_hdd_presence_data_end ON hdd_presence(data_end DESC); -- SMART Readings Table (with differential storage) CREATE TABLE smart_readings ( id BIGSERIAL PRIMARY KEY, hdd_id INTEGER REFERENCES hdd_inventory(id), serial_number VARCHAR(100) NOT NULL, device_path VARCHAR(50), node_id VARCHAR(50), timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(), collection_ok BOOLEAN DEFAULT true, temperature INTEGER, parameters_json JSONB, reading_type VARCHAR(20) DEFAULT 'full', changes_detected BOOLEAN DEFAULT true, changed_parameters JSONB, previous_reading_id INTEGER REFERENCES smart_readings(id), checksum VARCHAR(64) ); CREATE INDEX idx_smart_readings_hdd_id ON smart_readings(hdd_id); CREATE INDEX idx_smart_readings_timestamp ON smart_readings(timestamp DESC); CREATE INDEX idx_smart_readings_serial ON smart_readings(serial_number); CREATE INDEX idx_smart_readings_device_path ON smart_readings(device_path); CREATE INDEX idx_smart_readings_type ON smart_readings(reading_type); CREATE INDEX idx_smart_readings_checksum ON smart_readings(checksum); CREATE INDEX idx_smart_readings_previous ON smart_readings(previous_reading_id); -- GIN index for JSONB parameters CREATE INDEX idx_smart_readings_parameters ON smart_readings USING GIN (parameters_json); CREATE INDEX idx_smart_readings_changed_params ON smart_readings USING GIN (changed_parameters); -- Predictions Table CREATE TABLE predictions ( id SERIAL PRIMARY KEY, hdd_id INTEGER REFERENCES hdd_inventory(id), serial_number VARCHAR(100) NOT NULL, device_path VARCHAR(50), timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(), risk_level VARCHAR(20), failure_probability DECIMAL(5,4), predicted_failure_date DATE, confidence_score DECIMAL(5,4), analysis_summary TEXT, recommendations JSONB, openai_response JSONB, created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); CREATE INDEX idx_predictions_hdd_id ON predictions(hdd_id); CREATE INDEX idx_predictions_timestamp ON predictions(timestamp DESC); CREATE INDEX idx_predictions_risk_level ON predictions(risk_level); CREATE INDEX idx_predictions_serial ON predictions(serial_number); -- Alert History Table CREATE TABLE alert_history ( id SERIAL PRIMARY KEY, hdd_id INTEGER REFERENCES hdd_inventory(id), serial_number VARCHAR(100) NOT NULL, alert_type VARCHAR(50), severity VARCHAR(20), message TEXT, sent_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), sent_to TEXT, delivery_status VARCHAR(20) DEFAULT 'pending', related_reading_id BIGINT REFERENCES smart_readings(id), related_prediction_id INTEGER REFERENCES predictions(id) ); CREATE INDEX idx_alert_history_hdd_id ON alert_history(hdd_id); CREATE INDEX idx_alert_history_sent_at ON alert_history(sent_at DESC); CREATE INDEX idx_alert_history_severity ON alert_history(severity); CREATE INDEX idx_alert_history_serial ON alert_history(serial_number); -- System Configuration Table CREATE TABLE IF NOT EXISTS system_config ( id SERIAL PRIMARY KEY, config_key VARCHAR(100) UNIQUE NOT NULL, value TEXT, description TEXT, created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); -- Insert default configuration INSERT INTO system_config (config_key, value, description) VALUES ('collection_interval_seconds', '1800', 'SMART data collection interval in seconds') ON CONFLICT (config_key) DO NOTHING; INSERT INTO system_config (config_key, value, description) VALUES ('differential_storage_enabled', 'true', 'Enable differential storage optimization'), ('forced_storage_interval_hours', '24', 'Hours between forced full readings'), ('critical_parameter_force_store', 'true', 'Force storage for critical parameter changes'), ('temperature_change_threshold', '5', 'Temperature change threshold for storage (Celsius)') ON CONFLICT (config_key) DO NOTHING; -- SMART Thresholds Table CREATE TABLE IF NOT EXISTS smart_thresholds ( id SERIAL PRIMARY KEY, parameter_name VARCHAR(100) NOT NULL, warning_threshold NUMERIC, critical_threshold NUMERIC, weight NUMERIC DEFAULT 1.0, enabled BOOLEAN DEFAULT true, description TEXT, created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); -- Insert default SMART thresholds INSERT INTO smart_thresholds (parameter_name, warning_threshold, critical_threshold, weight, description) VALUES ('Reallocated_Sector_Ct', 1, 5, 10.0, 'Reallocated sector count - critical for drive health'), ('Spin_Retry_Count', 1, 10, 8.0, 'Spindle motor retry attempts'), ('Reallocated_Event_Count', 1, 10, 9.0, 'Number of reallocation events'), ('Current_Pending_Sector', 1, 5, 9.5, 'Sectors waiting to be reallocated'), ('Offline_Uncorrectable', 1, 1, 10.0, 'Uncorrectable sectors found during offline scan'), ('UDMA_CRC_Error_Count', 10, 50, 5.0, 'Communication errors between drive and controller'), ('Raw_Read_Error_Rate', 100000, 1000000, 3.0, 'Raw read error rate (varies by manufacturer)'), ('Seek_Error_Rate', 100000, 1000000, 4.0, 'Seek error rate'), ('Power_On_Hours', 35000, 50000, 2.0, 'Total power-on time in hours'), ('Load_Cycle_Count', 100000, 300000, 2.0, 'Number of head load/unload cycles'), ('Temperature_Celsius', 50, 60, 3.0, 'Drive operating temperature'), ('Start_Stop_Count', 10000, 50000, 1.0, 'Drive start/stop cycles'), ('Power_Cycle_Count', 10000, 20000, 1.0, 'Number of power cycles') ON CONFLICT (parameter_name) DO NOTHING; -- Create view for reconstructed SMART data (handles differential storage) CREATE VIEW smart_readings_reconstructed AS WITH RECURSIVE reading_chain AS ( -- Base case: get baseline readings SELECT id, hdd_id, serial_number, timestamp, parameters_json, temperature, reading_type, previous_reading_id, 1 as chain_level FROM smart_readings WHERE reading_type IN ('baseline', 'full') UNION ALL -- Recursive case: follow the chain of differential readings SELECT sr.id, sr.hdd_id, sr.serial_number, sr.timestamp, -- Merge parameters from previous reading with current changes COALESCE(rc.parameters_json, '{}'::jsonb) || sr.parameters_json as parameters_json, COALESCE(sr.temperature, rc.temperature) as temperature, sr.reading_type, sr.previous_reading_id, rc.chain_level + 1 FROM smart_readings sr JOIN reading_chain rc ON sr.previous_reading_id = rc.id WHERE sr.reading_type = 'differential' ) SELECT id, hdd_id, serial_number, timestamp, parameters_json, temperature, reading_type, chain_level FROM reading_chain; -- Latest SMART readings for all drives (using reconstructed differential data) CREATE VIEW latest_smart_readings AS SELECT DISTINCT ON (sr.hdd_id) sr.id, sr.hdd_id, sr.serial_number, sr.timestamp, sr.parameters_json, sr.temperature, hi.model_name, hi.manufacturer, hi.size_gb, hi.current_device_path, hi.current_node_id FROM smart_readings_reconstructed sr JOIN hdd_inventory hi ON sr.hdd_id = hi.id ORDER BY sr.hdd_id, sr.timestamp DESC; -- Drive health summary view CREATE VIEW drive_health_summary AS SELECT hi.id as hdd_id, hi.serial_number, hi.model_name, hi.manufacturer, hi.current_device_path, hi.current_node_id, hi.status, lsr.timestamp as last_reading, lsr.temperature, p.risk_level, p.failure_probability, p.predicted_failure_date, EXTRACT(EPOCH FROM (NOW() - lsr.timestamp))/3600 as hours_since_last_reading FROM hdd_inventory hi LEFT JOIN latest_smart_readings lsr ON hi.id = lsr.hdd_id LEFT JOIN LATERAL ( SELECT risk_level, failure_probability, predicted_failure_date FROM predictions WHERE hdd_id = hi.id ORDER BY timestamp DESC LIMIT 1 ) p ON true WHERE hi.status = 'active'; -- Function to check if SMART reading should be stored (simplified version) CREATE OR REPLACE FUNCTION should_store_smart_reading( p_hdd_id INTEGER, p_parameters_json JSONB, p_checksum VARCHAR(64), p_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW() ) RETURNS TABLE( should_store BOOLEAN, reading_type VARCHAR(20), changes_detected BOOLEAN, changed_parameters JSONB, previous_reading_id INTEGER ) AS $$ DECLARE v_last_reading RECORD; v_config_enabled BOOLEAN := true; v_force_interval_hours INTEGER := 24; v_temp_threshold INTEGER := 5; BEGIN -- Get configuration SELECT (value::boolean) INTO v_config_enabled FROM system_config WHERE config_key = 'differential_storage_enabled'; SELECT (value::integer) INTO v_force_interval_hours FROM system_config WHERE config_key = 'forced_storage_interval_hours'; SELECT (value::integer) INTO v_temp_threshold FROM system_config WHERE config_key = 'temperature_change_threshold'; -- If differential storage is disabled, always store as full IF v_config_enabled IS FALSE OR v_config_enabled IS NULL THEN RETURN QUERY SELECT true, 'full'::varchar(20), true, NULL::jsonb, NULL::integer; RETURN; END IF; -- Get the last reading for this HDD SELECT id, checksum, timestamp, parameters_json, temperature INTO v_last_reading FROM smart_readings WHERE hdd_id = p_hdd_id ORDER BY timestamp DESC LIMIT 1; -- If no previous reading, store as baseline IF v_last_reading IS NULL THEN RETURN QUERY SELECT true, 'baseline'::varchar(20), true, NULL::jsonb, NULL::integer; RETURN; END IF; -- If checksum matches, no changes detected IF v_last_reading.checksum = p_checksum THEN RETURN QUERY SELECT false, 'skipped'::varchar(20), false, NULL::jsonb, v_last_reading.id; RETURN; END IF; -- If forced interval exceeded, store as full IF p_timestamp > v_last_reading.timestamp + (v_force_interval_hours || ' hours')::interval THEN RETURN QUERY SELECT true, 'full'::varchar(20), true, NULL::jsonb, v_last_reading.id; RETURN; END IF; -- Otherwise, store as differential RETURN QUERY SELECT true, 'differential'::varchar(20), true, '[]'::jsonb, v_last_reading.id; RETURN; END; $$ LANGUAGE plpgsql; -- Function to update HDD presence tracking CREATE OR REPLACE FUNCTION update_hdd_presence( p_serial_number VARCHAR(64), p_node VARCHAR(64) ) RETURNS VOID AS $$ BEGIN -- Mark all previous presence records for this serial as historic UPDATE hdd_presence SET is_current = FALSE WHERE serial_number = p_serial_number AND is_current = TRUE AND node <> p_node; -- Check if there's already a current presence for this serial/node IF EXISTS (SELECT 1 FROM hdd_presence WHERE serial_number = p_serial_number AND node = p_node AND is_current = TRUE) THEN -- Update data_end for existing current presence UPDATE hdd_presence SET data_end = NOW() WHERE serial_number = p_serial_number AND node = p_node AND is_current = TRUE; ELSE -- Create new presence record INSERT INTO hdd_presence (serial_number, node, data_start, data_end, is_current) VALUES (p_serial_number, p_node, NOW(), NOW(), TRUE); END IF; END; $$ LANGUAGE plpgsql; -- Function to update timestamps CREATE OR REPLACE FUNCTION update_timestamp() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$ LANGUAGE plpgsql; -- Create triggers for timestamp updates CREATE TRIGGER update_hdd_inventory_timestamp BEFORE UPDATE ON hdd_inventory FOR EACH ROW EXECUTE FUNCTION update_timestamp(); CREATE TRIGGER update_smart_thresholds_timestamp BEFORE UPDATE ON smart_thresholds FOR EACH ROW EXECUTE FUNCTION update_timestamp(); CREATE TRIGGER update_system_config_timestamp BEFORE UPDATE ON system_config FOR EACH ROW EXECUTE FUNCTION update_timestamp(); -- Grant permissions to autosmart user GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO autosmart; GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO autosmart; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO autosmart; -- Grant specific permissions for hdd_presence table GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE hdd_presence TO autosmart; -- Final message DO $$ BEGIN RAISE NOTICE 'autoSMART database schema deployed successfully!'; RAISE NOTICE 'Tables created: hdd_inventory, hdd_presence, smart_readings, predictions, smart_thresholds, alert_history, system_config'; RAISE NOTICE 'Views created: smart_readings_reconstructed, latest_smart_readings, drive_health_summary'; RAISE NOTICE 'Functions created: update_hdd_presence(), should_store_smart_reading()'; RAISE NOTICE 'Permissions granted to autosmart user'; END $$;