Madagascar / projects / autoSMART / sql / migrate-v1-to-v2.sql
1 contributor
339 lines | 14.278kb
-- autoSMART Migration v1 → v2
-- Migrates existing data from smart_readings (blob JSONB) to new schema
-- Date: 2026-05-20
-- Execution time: ~5-15 minutes for 151K readings
-- NOTE: Only baseline + full readings are migrated (differential are empty due to bug)

BEGIN;

-- ============================================================================
-- PHASE 1: Populate smart_param_catalog from smart_thresholds + auto-discovery
-- ============================================================================

DO $$
DECLARE
    v_param TEXT;
    v_threshold_rec RECORD;
BEGIN
    RAISE NOTICE '[PHASE 1] Populating smart_param_catalog...';

    FOR v_threshold_rec IN
        SELECT parameter_name, warning_threshold, critical_threshold,
               weight, description
        FROM smart_thresholds
    LOOP
        INSERT INTO smart_param_catalog
            (param_name, description, warning_threshold, critical_threshold,
             health_weight, is_critical, unit)
        VALUES (
            v_threshold_rec.parameter_name,
            v_threshold_rec.description,
            v_threshold_rec.warning_threshold,
            v_threshold_rec.critical_threshold,
            v_threshold_rec.weight,
            (v_threshold_rec.weight >= 8.0),
            'count'
        )
        ON CONFLICT (param_name) DO UPDATE
            SET warning_threshold  = EXCLUDED.warning_threshold,
                critical_threshold = EXCLUDED.critical_threshold,
                health_weight      = EXCLUDED.health_weight,
                is_critical        = EXCLUDED.is_critical,
                description        = COALESCE(EXCLUDED.description, smart_param_catalog.description),
                updated_at         = NOW();
    END LOOP;

    RAISE NOTICE '[PHASE 1] smart_thresholds → smart_param_catalog: % parameters imported',
        (SELECT COUNT(*) FROM smart_param_catalog);

    FOR v_param IN
        SELECT DISTINCT jsonb_object_keys(parameters_json)
        FROM smart_readings
        WHERE parameters_json IS NOT NULL AND parameters_json != '{}'::jsonb
    LOOP
        INSERT INTO smart_param_catalog (param_name)
        VALUES (v_param)
        ON CONFLICT (param_name) DO NOTHING;
    END LOOP;

    RAISE NOTICE '[PHASE 1] Auto-discovery from JSONB complete: % total parameters in catalog',
        (SELECT COUNT(*) FROM smart_param_catalog);
END $$;

-- ============================================================================
-- PHASE 2: Migrate smart_readings → smart_collection_events (only full/baseline)
-- ============================================================================

DO $$
DECLARE
    v_count BIGINT;
BEGIN
    RAISE NOTICE '[PHASE 2] Migrating full/baseline readings to smart_collection_events...';

    INSERT INTO smart_collection_events
        (hdd_id, serial_number, node_id, collected_at,
         collection_ok, temperature, checksum, param_count)
    SELECT
        hdd_id,
        serial_number,
        COALESCE(node_id, 'unknown'),
        timestamp,
        COALESCE(collection_ok, true),
        temperature,
        checksum,
        CASE WHEN parameters_json IS NOT NULL AND parameters_json != '{}'::jsonb
             THEN (SELECT COUNT(*) FROM jsonb_object_keys(parameters_json))::SMALLINT
             ELSE 0::SMALLINT
        END
    FROM smart_readings
    WHERE reading_type IN ('baseline', 'full')
    ORDER BY timestamp;

    v_count := (SELECT COUNT(*) FROM smart_collection_events);
    RAISE NOTICE '[PHASE 2] Migrated % collection events', v_count;
END $$;

-- ============================================================================
-- PHASE 3: Create monthly partitions for historical data range
-- ============================================================================

DO $$
DECLARE
    v_min_date DATE;
    v_max_date DATE;
    v_d DATE;
BEGIN
    RAISE NOTICE '[PHASE 3] Creating monthly partitions for historical range...';

    SELECT DATE_TRUNC('month', MIN(timestamp))::DATE,
           DATE_TRUNC('month', MAX(timestamp))::DATE + INTERVAL '1 month'
    INTO v_min_date, v_max_date
    FROM smart_readings;

    v_d := v_min_date;
    WHILE v_d <= v_max_date LOOP
        PERFORM create_monthly_partition(
            EXTRACT(YEAR FROM v_d)::INTEGER,
            EXTRACT(MONTH FROM v_d)::INTEGER
        );
        v_d := v_d + INTERVAL '1 month';
    END LOOP;

    RAISE NOTICE '[PHASE 3] Partition creation complete';
END $$;

-- ============================================================================
-- PHASE 4: Populate smart_param_values from JSONB
-- ============================================================================

DO $$
DECLARE
    v_migrated BIGINT;
    v_errors   BIGINT;
BEGIN
    RAISE NOTICE '[PHASE 4] Migrating parameter values from smart_readings JSONB...';

    INSERT INTO smart_param_values
        (event_id, hdd_id, param_id, collected_at,
         raw_value, normalized_value, worst_value, threshold_value, when_failed)
    SELECT
        sce.id,
        sr.hdd_id,
        spc.id,
        sr.timestamp,
        -- Extract raw_value: handle both scalar {"param": 123} and object {"param": {"raw_value": 123}}
        CASE
            WHEN jsonb_typeof(sr.parameters_json->kv.key) = 'number'
                THEN (sr.parameters_json->kv.key)::BIGINT
            WHEN jsonb_typeof(sr.parameters_json->kv.key) = 'object'
                THEN (sr.parameters_json->kv.key->>'raw_value')::BIGINT
            ELSE NULL::BIGINT
        END,
        CASE
            WHEN jsonb_typeof(sr.parameters_json->kv.key) = 'object'
                THEN (sr.parameters_json->kv.key->>'value')::SMALLINT
            ELSE NULL::SMALLINT
        END,
        CASE
            WHEN jsonb_typeof(sr.parameters_json->kv.key) = 'object'
                THEN (sr.parameters_json->kv.key->>'worst')::SMALLINT
            ELSE NULL::SMALLINT
        END,
        CASE
            WHEN jsonb_typeof(sr.parameters_json->kv.key) = 'object'
                THEN (sr.parameters_json->kv.key->>'thresh')::SMALLINT
            ELSE NULL::SMALLINT
        END,
        CASE
            WHEN jsonb_typeof(sr.parameters_json->kv.key) = 'object'
                THEN sr.parameters_json->kv.key->>'when_failed'
            ELSE NULL::VARCHAR
        END
    FROM smart_readings sr
    JOIN smart_collection_events sce
        ON sce.hdd_id = sr.hdd_id
        AND sce.collected_at = sr.timestamp
        AND sce.serial_number = sr.serial_number
    CROSS JOIN LATERAL jsonb_each_text(COALESCE(sr.parameters_json, '{}'::jsonb)) AS kv(key, value)
    LEFT JOIN smart_param_catalog spc ON spc.param_name = kv.key
    WHERE sr.reading_type IN ('baseline', 'full')
      AND sr.parameters_json IS NOT NULL
      AND sr.parameters_json != '{}'::jsonb
      AND spc.id IS NOT NULL;

    v_migrated := (SELECT COUNT(*) FROM smart_param_values);
    RAISE NOTICE '[PHASE 4] Migrated % parameter values', v_migrated;
END $$;

-- ============================================================================
-- PHASE 5: Populate param_id_ata from JSONB metadata
-- ============================================================================

DO $$
DECLARE
    v_updated INTEGER;
BEGIN
    RAISE NOTICE '[PHASE 5] Extracting ATA attribute IDs from JSONB...';

    UPDATE smart_param_catalog spc
    SET param_id_ata = subq.ata_id
    FROM (
        SELECT DISTINCT ON (kv.key)
            kv.key AS param_name,
            (sr.parameters_json->kv.key->>'id')::INTEGER AS ata_id
        FROM smart_readings sr
        CROSS JOIN LATERAL jsonb_each(sr.parameters_json) AS kv
        WHERE jsonb_typeof(sr.parameters_json->kv.key) = 'object'
          AND sr.parameters_json->kv.key->>'id' IS NOT NULL
        ORDER BY kv.key, sr.timestamp DESC
    ) subq
    WHERE spc.param_name = subq.param_name
      AND subq.ata_id IS NOT NULL
      AND spc.param_id_ata IS NULL;

    GET DIAGNOSTICS v_updated = ROW_COUNT;
    RAISE NOTICE '[PHASE 5] Updated % parameters with ATA IDs', v_updated;
END $$;

-- ============================================================================
-- PHASE 6: Integrity verification
-- ============================================================================

DO $$
DECLARE
    v_orig_full_readings    BIGINT;
    v_migrated_events       BIGINT;
    v_migrated_values       BIGINT;
    v_avg_params_per_event  NUMERIC;
    v_catalog_params        BIGINT;
BEGIN
    RAISE NOTICE '[PHASE 6] Verifying migration integrity...';

    SELECT COUNT(*) INTO v_orig_full_readings
    FROM smart_readings WHERE reading_type IN ('baseline', 'full');

    SELECT COUNT(*) INTO v_migrated_events
    FROM smart_collection_events;

    SELECT COUNT(*) INTO v_migrated_values
    FROM smart_param_values;

    SELECT COUNT(*) INTO v_catalog_params
    FROM smart_param_catalog;

    v_avg_params_per_event := ROUND(v_migrated_values::NUMERIC / NULLIF(v_migrated_events, 0), 2);

    RAISE NOTICE '═══════════════════════════════════════════════';
    RAISE NOTICE 'MIGRATION SUMMARY:';
    RAISE NOTICE '───────────────────────────────────────────────';
    RAISE NOTICE 'Original full/baseline readings:    %', v_orig_full_readings;
    RAISE NOTICE 'Migrated collection events:        %', v_migrated_events;
    RAISE NOTICE 'Migrated parameter values:         %', v_migrated_values;
    RAISE NOTICE 'Average params per event:          %', v_avg_params_per_event;
    RAISE NOTICE 'Parameters in catalog:             %', v_catalog_params;
    RAISE NOTICE '═══════════════════════════════════════════════';

    IF v_migrated_events = 0 THEN
        RAISE EXCEPTION 'ERROR: No events migrated! Check smart_readings data.';
    END IF;

    IF v_migrated_events < v_orig_full_readings THEN
        RAISE WARNING 'Event count (%) less than original (%). Possible: timestamp duplicates detected and consolidated.',
            v_migrated_events, v_orig_full_readings;
    END IF;

    IF v_migrated_values = 0 THEN
        RAISE WARNING 'No parameter values migrated! Check JSONB format.';
    END IF;

    RAISE NOTICE 'Migration integrity check: PASSED ✓';
END $$;

-- ============================================================================
-- PHASE 7: Sample data validation
-- ============================================================================

DO $$
DECLARE
    v_rec RECORD;
    v_count INTEGER;
BEGIN
    RAISE NOTICE '[PHASE 7] Sample data validation...';

    SELECT COUNT(*) INTO v_count
    FROM smart_param_values
    WHERE raw_value IS NOT NULL;

    RAISE NOTICE 'Records with raw_value: % / %', v_count,
        (SELECT COUNT(*) FROM smart_param_values);

    FOR v_rec IN
        SELECT serial_number, COUNT(*) as event_count, SUM(param_count) as total_params
        FROM smart_collection_events
        GROUP BY serial_number
        ORDER BY event_count DESC
        LIMIT 5
    LOOP
        RAISE NOTICE 'Disk: %, Events: %, Total params across events: %',
            v_rec.serial_number, v_rec.event_count, v_rec.total_params;
    END LOOP;

    RAISE NOTICE '[PHASE 7] Sample validation: PASSED ✓';
END $$;

-- ============================================================================
-- FINAL SUMMARY
-- ============================================================================

DO $$
BEGIN
    RAISE NOTICE '';
    RAISE NOTICE '╔════════════════════════════════════════════════════════════════╗';
    RAISE NOTICE '║ ✅ Migration v1 → v2 Complete!                                 ║';
    RAISE NOTICE '╠════════════════════════════════════════════════════════════════╣';
    RAISE NOTICE '║                                                                ║';
    RAISE NOTICE '║ New data is now in:                                            ║';
    RAISE NOTICE '║   • smart_collection_events (metadata per-collection)           ║';
    RAISE NOTICE '║   • smart_param_values (parameter values, partitioned)          ║';
    RAISE NOTICE '║   • smart_param_catalog (parameter registry)                    ║';
    RAISE NOTICE '║                                                                ║';
    RAISE NOTICE '║ Views available:                                               ║';
    RAISE NOTICE '║   • v_latest_param_values                                       ║';
    RAISE NOTICE '║   • v_drive_health_summary                                      ║';
    RAISE NOTICE '║   • v_param_trend                                               ║';
    RAISE NOTICE '║   • v_cluster_overview                                          ║';
    RAISE NOTICE '║   • v_smart_readings_compat (backward-compatible)               ║';
    RAISE NOTICE '║                                                                ║';
    RAISE NOTICE '║ Next steps:                                                    ║';
    RAISE NOTICE '║   1. Verify data: SELECT * FROM v_drive_health_summary;        ║';
    RAISE NOTICE '║   2. Run collation fix: ALTER DATABASE autosmart               ║';
    RAISE NOTICE '║      REFRESH COLLATION VERSION;                                ║';
    RAISE NOTICE '║   3. Archive old table: ALTER TABLE smart_readings             ║';
    RAISE NOTICE '║      RENAME TO smart_readings_archive_v1;                       ║';
    RAISE NOTICE '║   4. Update collectors (SmartCollector.pm, daemon)              ║';
    RAISE NOTICE '║                                                                ║';
    RAISE NOTICE '╚════════════════════════════════════════════════════════════════╝';
    RAISE NOTICE '';
END $$;

COMMIT;