Showing 6 changed files with 482 additions and 19 deletions
+6 -3
HealthProbe/Doc/00-agent-guides/AGENTS.md
@@ -193,13 +193,16 @@ final class TypeDistributionBin {
193 193
 // Deletions are recorded by sampleUUIDHash because HKDeletedObject exposes UUIDs,
194 194
 // not complete sample payloads.
195 195
 
196
-// Interface updated 2026-05-23 — see AGENTS.md
196
+// Interface updated 2026-05-24 — see AGENTS.md
197 197
 // HealthArchiveStore exposes SQL-first observation diff APIs:
198 198
 // diffSummary(_:) returns appeared/disappeared/representationChanged counts and
199 199
 // diffRecords(_:) returns a paged record list for one change kind.
200 200
 // aggregateComparison(_:) compares materialized daily aggregates between two
201
-// observations. UI/cache agents should consume these APIs instead of loading full
202
-// observation record sets.
201
+// observations. sourceProvenanceBreakdown(_:) returns visible source/device
202
+// composition, and consolidationEvidence(_:) returns conservative evidence labels
203
+// with counts, aggregate sums, coverage, density, source compatibility, and
204
+// uncertainty text. UI/cache agents should consume these APIs instead of loading
205
+// full observation record sets.
203 206
 
204 207
 // Storage objective updated 2026-05-23 — see AGENTS.md
205 208
 // Recurring complete snapshots are out of scope for the target architecture.
+10 -11
HealthProbe/Doc/04-project/IMPLEMENTATION_STATUS.md
@@ -1,6 +1,6 @@
1 1
 # HealthProbe - Implementation Status
2 2
 
3
-**Last Updated:** 2026-05-23
3
+**Last Updated:** 2026-05-24
4 4
 
5 5
 ## Current Reality
6 6
 
@@ -25,7 +25,7 @@ There are no real deployments, only test installations. Existing prototype datab
25 25
 |------|----------------|--------------------|
26 26
 | Product docs | Updated | Keep `HealthProbe/Doc/README.md` as canonical index |
27 27
 | HealthKit capture | Prototype exists | Adapt capture to write differential SQLite observations first |
28
-| SQLite archive | Archive v2 schema, differential write path, daily aggregate rebuilds, integrity report, v2 record reads, initial SQL diff/count/aggregate APIs, and XCTest coverage are in place; legacy write mirror still exists | Add provenance and consolidation-evidence SQL analysis, large synthetic-data tests, then retire `archive_samples` |
28
+| SQLite archive | Archive v2 schema, differential write path, daily aggregate rebuilds, integrity report, v2 record reads, SQL diff/count/aggregate/provenance/consolidation-evidence APIs, and XCTest coverage are in place; legacy write mirror still exists | Add large synthetic-data timing/memory tests, then retire `archive_samples` |
29 29
 | Core Data cache | Not implemented | Add rebuildable cache for expensive counts, summaries, report metadata, UI state |
30 30
 | SwiftData cache | Exists | Treat as disposable prototype data; reset/ignore during v2 transition |
31 31
 | UI | Prototype exists | Reframe screens around observations, diffs, export, archive status |
@@ -38,13 +38,12 @@ There are no real deployments, only test installations. Existing prototype datab
38 38
 
39 39
 Detailed checkable milestones live in [`Refactoring-Plan.md`](Refactoring-Plan.md).
40 40
 
41
-1. Add provenance and consolidation-evidence queries on top of the SQLite archive.
42
-2. Expand the synthetic large-data test harness for diff/export memory behavior.
43
-3. Add Core Data UI/report cache and rebuild pipeline.
44
-4. Replace SwiftData UI dependencies with Core Data/cache DTOs.
45
-5. Update UI language from anomaly/status to observation/diff/export.
46
-6. Add streaming exports with manifests.
47
-7. Validate on low-memory/legacy-class devices.
41
+1. Expand the synthetic large-data test harness for diff/export memory behavior.
42
+2. Add Core Data UI/report cache and rebuild pipeline.
43
+3. Replace SwiftData UI dependencies with Core Data/cache DTOs.
44
+4. Update UI language from anomaly/status to observation/diff/export.
45
+5. Add streaming exports with manifests.
46
+6. Validate on low-memory/legacy-class devices.
48 47
 
49 48
 ## Known Prototype Mismatches
50 49
 
@@ -54,13 +53,13 @@ Detailed checkable milestones live in [`Refactoring-Plan.md`](Refactoring-Plan.m
54 53
 - Current archive schema is not sufficient as the long-term source of truth.
55 54
 - Existing implementation may decode or cache too much data for low-end devices.
56 55
 - Old prototype database compatibility is no longer required.
57
-- Initial SQLite archive tests cover open/init/reset/idempotency, small observation diffs, and materialized aggregate comparison, but not yet large-volume diff/export behavior.
56
+- Initial SQLite archive tests cover open/init/reset/idempotency, small observation diffs, materialized aggregate comparison, source/provenance breakdowns, and consolidation-evidence labels, but not yet large-volume diff/export behavior.
58 57
 
59 58
 ## Verification Checklist
60 59
 
61 60
 - [ ] SQLite archive v2 can reconstruct records visible at observation T.
62 61
 - [ ] No recurring complete snapshot copies are written for high-volume types.
63
-- [ ] SQL diff between two observations runs without loading full datasets into Swift arrays.
62
+- [x] SQL diff between two observations runs without loading full datasets into Swift arrays.
64 63
 - [ ] Expensive counts used by reports/UI are cached and rebuildable.
65 64
 - [ ] Deleting Core Data cache and rebuilding from SQLite restores UI/report summaries.
66 65
 - [ ] Export can stream large selected record sets.
+3 -3
HealthProbe/Doc/04-project/Refactoring-Plan.md
@@ -150,15 +150,15 @@ Checklist:
150 150
 - [x] Implement representationChanged query between observations.
151 151
 - [x] Implement diff counts using temp tables or equivalent SQL-first strategy.
152 152
 - [x] Implement aggregate comparison query.
153
-- [ ] Implement consolidation-likely evidence query.
154
-- [ ] Implement source/provenance breakdown query.
153
+- [x] Implement consolidation-likely evidence query.
154
+- [x] Implement source/provenance breakdown query.
155 155
 - [ ] Add query timing/memory tests on synthetic large datasets.
156 156
 
157 157
 Acceptance:
158 158
 - [x] Observation T can be reconstructed from ranges/events.
159 159
 - [ ] Large diff returns counts and first page without loading all rows.
160 160
 - [x] Query results are deterministic and ordered.
161
-- [ ] Consolidation evidence includes count, aggregate, coverage, density, and uncertainty data.
161
+- [x] Consolidation evidence includes count, aggregate, coverage, density, and uncertainty data.
162 162
 
163 163
 ## Milestone 6 - Core Data UI/Report Cache
164 164
 
+47 -0
HealthProbe/Services/Protocols/HealthArchiveStore.swift
@@ -10,6 +10,8 @@ protocol HealthArchiveStore {
10 10
     func diffSummary(_ request: HealthArchiveDiffRequest) async throws -> HealthArchiveDiffSummary
11 11
     func diffRecords(_ request: HealthArchiveDiffRecordRequest) async throws -> [ArchivedHealthRecord]
12 12
     func aggregateComparison(_ request: HealthArchiveAggregateComparisonRequest) async throws -> [HealthArchiveAggregateComparisonRow]
13
+    func sourceProvenanceBreakdown(_ request: HealthArchiveSourceProvenanceRequest) async throws -> [HealthArchiveSourceProvenanceRow]
14
+    func consolidationEvidence(_ request: HealthArchiveConsolidationEvidenceRequest) async throws -> [HealthArchiveConsolidationEvidence]
13 15
     func exportReport(_ request: HealthArchiveReportRequest) async throws -> URL
14 16
     func checkIntegrity() async throws -> HealthArchiveIntegrityReport
15 17
 }
@@ -176,6 +178,51 @@ struct HealthArchiveAggregateComparisonRow: Equatable, Sendable {
176 178
     }
177 179
 }
178 180
 
181
+struct HealthArchiveSourceProvenanceRequest: Equatable, Sendable {
182
+    let visibleAtObservationID: Int64?
183
+    let sampleTypeIdentifier: String?
184
+    let limit: Int?
185
+}
186
+
187
+struct HealthArchiveSourceProvenanceRow: Equatable, Sendable {
188
+    let sampleTypeIdentifier: String
189
+    let sourceNameHash: String?
190
+    let sourceBundleIdentifier: String?
191
+    let sourceProductType: String?
192
+    let sourceVersion: String?
193
+    let sourceOperatingSystemVersion: String?
194
+    let deviceModel: String?
195
+    let visibleRecordCount: Int
196
+    let valueSum: Double?
197
+    let earliestStartDate: Date?
198
+    let latestEndDate: Date?
199
+}
200
+
201
+struct HealthArchiveConsolidationEvidenceRequest: Equatable, Sendable {
202
+    let fromObservationID: Int64
203
+    let toObservationID: Int64
204
+    let sampleTypeIdentifier: String?
205
+}
206
+
207
+struct HealthArchiveConsolidationEvidence: Equatable, Sendable {
208
+    let sampleTypeIdentifier: String
209
+    let fromObservationID: Int64
210
+    let toObservationID: Int64
211
+    let disappearedCount: Int
212
+    let appearedCount: Int
213
+    let representationChangedCount: Int
214
+    let fromVisibleRecordCount: Int
215
+    let toVisibleRecordCount: Int
216
+    let fromValueSum: Double?
217
+    let toValueSum: Double?
218
+    let coverageOverlapSeconds: Double?
219
+    let densityBefore: Double?
220
+    let densityAfter: Double?
221
+    let sourceCompatible: Bool
222
+    let label: String
223
+    let uncertaintyReason: String?
224
+}
225
+
179 226
 struct HealthArchiveReportRequest: Equatable, Sendable {
180 227
     let reportID: UUID
181 228
     let title: String
+327 -0
HealthProbe/Services/SQLiteHealthArchiveStore.swift
@@ -559,6 +559,333 @@ actor SQLiteHealthArchiveStore: HealthArchiveStore {
559 559
         }
560 560
     }
561 561
 
562
+    func sourceProvenanceBreakdown(_ request: HealthArchiveSourceProvenanceRequest) async throws -> [HealthArchiveSourceProvenanceRow] {
563
+        let db = try openDatabase()
564
+        defer { sqlite3_close(db) }
565
+        try prepareSchemaIfNeeded(db)
566
+
567
+        var clauses: [String] = []
568
+        if request.sampleTypeIdentifier != nil {
569
+            clauses.append("t.type_identifier = ?")
570
+        }
571
+        let whereClause = clauses.isEmpty ? "" : "WHERE \(clauses.joined(separator: " AND "))"
572
+        let limitClause = request.limit.map { "LIMIT \(max($0, 0))" } ?? ""
573
+        let sql = """
574
+        WITH visible_ranges AS (
575
+            SELECT sample_id, version_id
576
+            FROM sample_visibility_ranges
577
+            WHERE (? IS NULL OR (
578
+                first_observation_id <= ?
579
+                AND (last_observation_id IS NULL OR last_observation_id > ?)
580
+            ))
581
+        )
582
+        SELECT
583
+            t.type_identifier,
584
+            src.source_name_hash,
585
+            src.bundle_identifier,
586
+            sr.product_type,
587
+            sr.version,
588
+            sr.operating_system_version,
589
+            d.model,
590
+            COUNT(*) AS visible_record_count,
591
+            SUM(v.numeric_value) AS value_sum,
592
+            MIN(v.start_date) AS earliest_start_date,
593
+            MAX(v.end_date) AS latest_end_date
594
+        FROM visible_ranges vr
595
+        JOIN sample_versions v ON v.id = vr.version_id
596
+        JOIN samples s ON s.id = vr.sample_id
597
+        JOIN sample_types t ON t.id = s.sample_type_id
598
+        LEFT JOIN source_revisions sr ON sr.id = v.source_revision_id
599
+        LEFT JOIN sources src ON src.id = sr.source_id
600
+        LEFT JOIN hk_devices d ON d.id = v.hk_device_id
601
+        \(whereClause)
602
+        GROUP BY
603
+            t.type_identifier,
604
+            src.source_name_hash,
605
+            src.bundle_identifier,
606
+            sr.product_type,
607
+            sr.version,
608
+            sr.operating_system_version,
609
+            d.model
610
+        ORDER BY visible_record_count DESC, t.type_identifier ASC, src.bundle_identifier ASC, d.model ASC
611
+        \(limitClause)
612
+        """
613
+
614
+        return try withStatement(sql, db: db) { statement in
615
+            var index: Int32 = 1
616
+            bindInt64(request.visibleAtObservationID, to: index, in: statement)
617
+            index += 1
618
+            bindInt64(request.visibleAtObservationID, to: index, in: statement)
619
+            index += 1
620
+            bindInt64(request.visibleAtObservationID, to: index, in: statement)
621
+            index += 1
622
+            if let sampleTypeIdentifier = request.sampleTypeIdentifier {
623
+                bindText(sampleTypeIdentifier, to: index, in: statement)
624
+                index += 1
625
+            }
626
+
627
+            var rows: [HealthArchiveSourceProvenanceRow] = []
628
+            while sqlite3_step(statement) == SQLITE_ROW {
629
+                rows.append(HealthArchiveSourceProvenanceRow(
630
+                    sampleTypeIdentifier: columnText(statement, 0) ?? "",
631
+                    sourceNameHash: columnText(statement, 1),
632
+                    sourceBundleIdentifier: columnText(statement, 2),
633
+                    sourceProductType: columnText(statement, 3),
634
+                    sourceVersion: columnText(statement, 4),
635
+                    sourceOperatingSystemVersion: columnText(statement, 5),
636
+                    deviceModel: columnText(statement, 6),
637
+                    visibleRecordCount: columnInt(statement, 7) ?? 0,
638
+                    valueSum: columnDouble(statement, 8),
639
+                    earliestStartDate: columnUnixDate(statement, 9),
640
+                    latestEndDate: columnUnixDate(statement, 10)
641
+                ))
642
+            }
643
+            return rows
644
+        }
645
+    }
646
+
647
+    func consolidationEvidence(_ request: HealthArchiveConsolidationEvidenceRequest) async throws -> [HealthArchiveConsolidationEvidence] {
648
+        let db = try openDatabase()
649
+        defer { sqlite3_close(db) }
650
+        try prepareSchemaIfNeeded(db)
651
+
652
+        let typeClause = request.sampleTypeIdentifier == nil ? "" : "AND t.type_identifier = ?"
653
+        let topFilterClause = request.sampleTypeIdentifier == nil ? "" : "WHERE tk.type_identifier = ?"
654
+        let sql = """
655
+        WITH from_visible AS (
656
+            SELECT sample_id, version_id
657
+            FROM sample_visibility_ranges
658
+            WHERE first_observation_id <= ?
659
+              AND (last_observation_id IS NULL OR last_observation_id > ?)
660
+        ),
661
+        to_visible AS (
662
+            SELECT sample_id, version_id
663
+            FROM sample_visibility_ranges
664
+            WHERE first_observation_id <= ?
665
+              AND (last_observation_id IS NULL OR last_observation_id > ?)
666
+        ),
667
+        type_keys AS (
668
+            SELECT DISTINCT sample_type_id, type_identifier
669
+            FROM (
670
+                SELECT s.sample_type_id, t.type_identifier
671
+                FROM from_visible fv
672
+                JOIN samples s ON s.id = fv.sample_id
673
+                JOIN sample_types t ON t.id = s.sample_type_id
674
+                UNION
675
+                SELECT s.sample_type_id, t.type_identifier
676
+                FROM to_visible tv
677
+                JOIN samples s ON s.id = tv.sample_id
678
+                JOIN sample_types t ON t.id = s.sample_type_id
679
+            )
680
+        ),
681
+        from_type_metrics AS (
682
+            SELECT
683
+                s.sample_type_id,
684
+                COUNT(*) AS from_visible_record_count,
685
+                SUM(v.numeric_value) AS from_value_sum,
686
+                MIN(v.start_date) AS from_start_date,
687
+                MAX(v.end_date) AS from_end_date,
688
+                COUNT(DISTINCT CASE WHEN src.bundle_identifier IS NULL THEN '__NULL__' ELSE src.bundle_identifier END) AS from_bundle_count,
689
+                COUNT(DISTINCT CASE WHEN d.model IS NULL THEN '__NULL__' ELSE d.model END) AS from_model_count
690
+            FROM from_visible fv
691
+            JOIN samples s ON s.id = fv.sample_id
692
+            JOIN sample_versions v ON v.id = fv.version_id
693
+            LEFT JOIN source_revisions sr ON sr.id = v.source_revision_id
694
+            LEFT JOIN sources src ON src.id = sr.source_id
695
+            LEFT JOIN hk_devices d ON d.id = v.hk_device_id
696
+            GROUP BY s.sample_type_id
697
+        ),
698
+        to_type_metrics AS (
699
+            SELECT
700
+                s.sample_type_id,
701
+                COUNT(*) AS to_visible_record_count,
702
+                SUM(v.numeric_value) AS to_value_sum,
703
+                MIN(v.start_date) AS to_start_date,
704
+                MAX(v.end_date) AS to_end_date,
705
+                COUNT(DISTINCT CASE WHEN src.bundle_identifier IS NULL THEN '__NULL__' ELSE src.bundle_identifier END) AS to_bundle_count,
706
+                COUNT(DISTINCT CASE WHEN d.model IS NULL THEN '__NULL__' ELSE d.model END) AS to_model_count
707
+            FROM to_visible tv
708
+            JOIN samples s ON s.id = tv.sample_id
709
+            JOIN sample_versions v ON v.id = tv.version_id
710
+            LEFT JOIN source_revisions sr ON sr.id = v.source_revision_id
711
+            LEFT JOIN sources src ON src.id = sr.source_id
712
+            LEFT JOIN hk_devices d ON d.id = v.hk_device_id
713
+            GROUP BY s.sample_type_id
714
+        ),
715
+        appeared_by_type AS (
716
+            SELECT s.sample_type_id, COUNT(*) AS appeared_count
717
+            FROM to_visible tv
718
+            LEFT JOIN from_visible fv ON fv.sample_id = tv.sample_id
719
+            JOIN samples s ON s.id = tv.sample_id
720
+            JOIN sample_types t ON t.id = s.sample_type_id
721
+            WHERE fv.sample_id IS NULL \(typeClause)
722
+            GROUP BY s.sample_type_id
723
+        ),
724
+        disappeared_by_type AS (
725
+            SELECT s.sample_type_id, COUNT(*) AS disappeared_count
726
+            FROM from_visible fv
727
+            LEFT JOIN to_visible tv ON tv.sample_id = fv.sample_id
728
+            JOIN samples s ON s.id = fv.sample_id
729
+            JOIN sample_types t ON t.id = s.sample_type_id
730
+            WHERE tv.sample_id IS NULL \(typeClause)
731
+            GROUP BY s.sample_type_id
732
+        ),
733
+        changed_by_type AS (
734
+            SELECT s.sample_type_id, COUNT(*) AS representation_changed_count
735
+            FROM to_visible tv
736
+            JOIN from_visible fv ON fv.sample_id = tv.sample_id
737
+            JOIN samples s ON s.id = tv.sample_id
738
+            JOIN sample_types t ON t.id = s.sample_type_id
739
+            WHERE tv.version_id != fv.version_id \(typeClause)
740
+            GROUP BY s.sample_type_id
741
+        )
742
+        SELECT
743
+            tk.type_identifier,
744
+            COALESCE(d.disappeared_count, 0) AS disappeared_count,
745
+            COALESCE(a.appeared_count, 0) AS appeared_count,
746
+            COALESCE(c.representation_changed_count, 0) AS representation_changed_count,
747
+            COALESCE(f.from_visible_record_count, 0) AS from_visible_record_count,
748
+            COALESCE(t.to_visible_record_count, 0) AS to_visible_record_count,
749
+            f.from_value_sum,
750
+            t.to_value_sum,
751
+            f.from_start_date,
752
+            f.from_end_date,
753
+            t.to_start_date,
754
+            t.to_end_date,
755
+            CASE
756
+                WHEN f.from_start_date IS NULL OR f.from_end_date IS NULL OR t.to_start_date IS NULL OR t.to_end_date IS NULL THEN NULL
757
+                ELSE MAX(0, MIN(f.from_end_date, t.to_end_date) - MAX(f.from_start_date, t.to_start_date))
758
+            END AS coverage_overlap_seconds,
759
+            CASE
760
+                WHEN COALESCE(f.from_visible_record_count, 0) = 0 OR f.from_start_date IS NULL OR f.from_end_date IS NULL OR f.from_end_date <= f.from_start_date THEN NULL
761
+                ELSE CAST(f.from_visible_record_count AS REAL) / ((f.from_end_date - f.from_start_date) / 3600.0)
762
+            END AS density_before,
763
+            CASE
764
+                WHEN COALESCE(t.to_visible_record_count, 0) = 0 OR t.to_start_date IS NULL OR t.to_end_date IS NULL OR t.to_end_date <= t.to_start_date THEN NULL
765
+                ELSE CAST(t.to_visible_record_count AS REAL) / ((t.to_end_date - t.to_start_date) / 3600.0)
766
+            END AS density_after,
767
+            CASE
768
+                WHEN COALESCE(f.from_bundle_count, 0) <= 1
769
+                  AND COALESCE(f.from_model_count, 0) <= 1
770
+                  AND COALESCE(t.to_bundle_count, 0) <= 1
771
+                  AND COALESCE(t.to_model_count, 0) <= 1
772
+                THEN 1 ELSE 0
773
+            END AS source_compatible
774
+        FROM type_keys tk
775
+        LEFT JOIN from_type_metrics f ON f.sample_type_id = tk.sample_type_id
776
+        LEFT JOIN to_type_metrics t ON t.sample_type_id = tk.sample_type_id
777
+        LEFT JOIN appeared_by_type a ON a.sample_type_id = tk.sample_type_id
778
+        LEFT JOIN disappeared_by_type d ON d.sample_type_id = tk.sample_type_id
779
+        LEFT JOIN changed_by_type c ON c.sample_type_id = tk.sample_type_id
780
+        \(topFilterClause)
781
+        ORDER BY tk.type_identifier
782
+        """
783
+
784
+        return try withStatement(sql, db: db) { statement in
785
+            var index: Int32 = 1
786
+            bindInt64(request.fromObservationID, to: index, in: statement)
787
+            index += 1
788
+            bindInt64(request.fromObservationID, to: index, in: statement)
789
+            index += 1
790
+            bindInt64(request.toObservationID, to: index, in: statement)
791
+            index += 1
792
+            bindInt64(request.toObservationID, to: index, in: statement)
793
+            index += 1
794
+            if let sampleTypeIdentifier = request.sampleTypeIdentifier {
795
+                bindText(sampleTypeIdentifier, to: index, in: statement)
796
+                index += 1
797
+            }
798
+            if let sampleTypeIdentifier = request.sampleTypeIdentifier {
799
+                bindText(sampleTypeIdentifier, to: index, in: statement)
800
+                index += 1
801
+            }
802
+            if let sampleTypeIdentifier = request.sampleTypeIdentifier {
803
+                bindText(sampleTypeIdentifier, to: index, in: statement)
804
+                index += 1
805
+            }
806
+            if request.sampleTypeIdentifier != nil {
807
+                bindText(request.sampleTypeIdentifier, to: index, in: statement)
808
+                index += 1
809
+            }
810
+
811
+            var rows: [HealthArchiveConsolidationEvidence] = []
812
+            while sqlite3_step(statement) == SQLITE_ROW {
813
+                let sampleTypeIdentifier = columnText(statement, 0) ?? ""
814
+                let disappearedCount = columnInt(statement, 1) ?? 0
815
+                let appearedCount = columnInt(statement, 2) ?? 0
816
+                let representationChangedCount = columnInt(statement, 3) ?? 0
817
+                let fromVisibleRecordCount = columnInt(statement, 4) ?? 0
818
+                let toVisibleRecordCount = columnInt(statement, 5) ?? 0
819
+                let fromValueSum = columnDouble(statement, 6)
820
+                let toValueSum = columnDouble(statement, 7)
821
+                let coverageOverlapSeconds = columnDouble(statement, 12)
822
+                let densityBefore = columnDouble(statement, 13)
823
+                let densityAfter = columnDouble(statement, 14)
824
+                let sourceCompatible = columnInt(statement, 15) == 1
825
+
826
+                let stableValue: Bool
827
+                if let fromValueSum, let toValueSum {
828
+                    stableValue = abs(toValueSum - fromValueSum) <= max(1.0, abs(fromValueSum) * 0.1)
829
+                } else {
830
+                    stableValue = false
831
+                }
832
+                let hasCoverageOverlap = (coverageOverlapSeconds ?? 0) > 0
833
+                let densityDoesNotIncrease: Bool
834
+                if let densityBefore, let densityAfter {
835
+                    densityDoesNotIncrease = densityAfter <= densityBefore
836
+                } else {
837
+                    densityDoesNotIncrease = false
838
+                }
839
+                let consolidationEvidencePresent = disappearedCount > 0
840
+                    && sourceCompatible
841
+                    && hasCoverageOverlap
842
+                    && densityDoesNotIncrease
843
+                    && stableValue
844
+                let label: String
845
+                let uncertaintyReason: String?
846
+                if representationChangedCount > 0 && disappearedCount == 0 && appearedCount == 0 {
847
+                    label = "representation_changed"
848
+                    uncertaintyReason = nil
849
+                } else if consolidationEvidencePresent {
850
+                    label = "consolidation_likely"
851
+                    uncertaintyReason = nil
852
+                } else if appearedCount > 0 && disappearedCount == 0 && representationChangedCount == 0 {
853
+                    label = "appeared"
854
+                    uncertaintyReason = nil
855
+                } else if appearedCount > 0 && disappearedCount > 0 && sourceCompatible && stableValue {
856
+                    label = "aggregate_changed"
857
+                    uncertaintyReason = nil
858
+                } else if disappearedCount == 0 && appearedCount == 0 && representationChangedCount == 0 {
859
+                    label = "uncertain"
860
+                    uncertaintyReason = "no structural change detected"
861
+                } else {
862
+                    label = "uncertain"
863
+                    uncertaintyReason = "insufficient evidence for a stable consolidation label"
864
+                }
865
+
866
+                rows.append(HealthArchiveConsolidationEvidence(
867
+                    sampleTypeIdentifier: sampleTypeIdentifier,
868
+                    fromObservationID: request.fromObservationID,
869
+                    toObservationID: request.toObservationID,
870
+                    disappearedCount: disappearedCount,
871
+                    appearedCount: appearedCount,
872
+                    representationChangedCount: representationChangedCount,
873
+                    fromVisibleRecordCount: fromVisibleRecordCount,
874
+                    toVisibleRecordCount: toVisibleRecordCount,
875
+                    fromValueSum: fromValueSum,
876
+                    toValueSum: toValueSum,
877
+                    coverageOverlapSeconds: coverageOverlapSeconds,
878
+                    densityBefore: densityBefore,
879
+                    densityAfter: densityAfter,
880
+                    sourceCompatible: sourceCompatible,
881
+                    label: label,
882
+                    uncertaintyReason: uncertaintyReason
883
+                ))
884
+            }
885
+            return rows
886
+        }
887
+    }
888
+
562 889
     func exportReport(_ request: HealthArchiveReportRequest) async throws -> URL {
563 890
         let recordRequest = HealthArchiveRecordRequest(
564 891
             sampleTypeIdentifier: request.typeIdentifierFilter,
+89 -2
HealthProbeTests/SQLiteHealthArchiveStoreTests.swift
@@ -159,6 +159,93 @@ final class SQLiteHealthArchiveStoreTests: XCTestCase {
159 159
         XCTAssertEqual(rows.first?.valueSumDelta, 7)
160 160
     }
161 161
 
162
+    func testSourceProvenanceBreakdownReturnsVisibleSourceRows() async throws {
163
+        let url = databaseURL()
164
+        let store = SQLiteHealthArchiveStore(databaseURL: url)
165
+        let sample = makeStepCountSample(value: 42, start: 1_000)
166
+        let typeIdentifier = sample.sampleType.identifier
167
+
168
+        _ = try await store.upsertSamples([sample], observedAt: Date(timeIntervalSince1970: 3_000))
169
+
170
+        let rows = try await store.sourceProvenanceBreakdown(HealthArchiveSourceProvenanceRequest(
171
+            visibleAtObservationID: nil,
172
+            sampleTypeIdentifier: typeIdentifier,
173
+            limit: 10
174
+        ))
175
+
176
+        XCTAssertEqual(rows.count, 1)
177
+        XCTAssertEqual(rows.first?.sampleTypeIdentifier, typeIdentifier)
178
+        XCTAssertEqual(rows.first?.visibleRecordCount, 1)
179
+        XCTAssertEqual(rows.first?.valueSum, 42)
180
+        XCTAssertEqual(rows.first?.sourceBundleIdentifier, sample.sourceRevision.source.bundleIdentifier)
181
+    }
182
+
183
+    func testConsolidationEvidenceClassifiesStableCountDropAsConsolidationLikely() async throws {
184
+        let url = databaseURL()
185
+        let store = SQLiteHealthArchiveStore(databaseURL: url)
186
+        let typeIdentifier = HKQuantityTypeIdentifier.stepCount.rawValue
187
+
188
+        let firstOldSample = makeStepCountSample(value: 10, start: 1_000, end: 1_500)
189
+        let secondOldSample = makeStepCountSample(value: 20, start: 1_600, end: 2_000)
190
+        let consolidatedSample = makeStepCountSample(value: 30, start: 1_000, end: 2_500)
191
+
192
+        _ = try await store.upsertSamples([firstOldSample, secondOldSample], observedAt: Date(timeIntervalSince1970: 3_000))
193
+        _ = try await store.upsertSamples([consolidatedSample], observedAt: Date(timeIntervalSince1970: 3_060))
194
+        try await store.recordDisappearance(sampleUUIDHash: HashService.sampleUUIDHash(firstOldSample.uuid.uuidString), sampleTypeIdentifier: typeIdentifier, observedMissingAt: Date(timeIntervalSince1970: 3_120))
195
+        try await store.recordDisappearance(sampleUUIDHash: HashService.sampleUUIDHash(secondOldSample.uuid.uuidString), sampleTypeIdentifier: typeIdentifier, observedMissingAt: Date(timeIntervalSince1970: 3_180))
196
+
197
+        let observationIDs = try observationIDs(at: url)
198
+        XCTAssertEqual(observationIDs.count, 4)
199
+
200
+        let rows = try await store.consolidationEvidence(HealthArchiveConsolidationEvidenceRequest(
201
+            fromObservationID: observationIDs[0],
202
+            toObservationID: observationIDs[observationIDs.count - 1],
203
+            sampleTypeIdentifier: typeIdentifier
204
+        ))
205
+
206
+        XCTAssertEqual(rows.count, 1)
207
+        XCTAssertEqual(rows.first?.sampleTypeIdentifier, typeIdentifier)
208
+        XCTAssertEqual(rows.first?.disappearedCount, 2)
209
+        XCTAssertEqual(rows.first?.appearedCount, 1)
210
+        XCTAssertEqual(rows.first?.fromVisibleRecordCount, 2)
211
+        XCTAssertEqual(rows.first?.toVisibleRecordCount, 1)
212
+        XCTAssertEqual(rows.first?.fromValueSum, 30)
213
+        XCTAssertEqual(rows.first?.toValueSum, 30)
214
+        XCTAssertEqual(rows.first?.label, "consolidation_likely")
215
+        XCTAssertTrue(rows.first?.sourceCompatible == true)
216
+    }
217
+
218
+    func testConsolidationEvidenceClassifiesDenseStableShiftAsAggregateChanged() async throws {
219
+        let url = databaseURL()
220
+        let store = SQLiteHealthArchiveStore(databaseURL: url)
221
+        let typeIdentifier = HKQuantityTypeIdentifier.stepCount.rawValue
222
+
223
+        let firstOldSample = makeStepCountSample(value: 10, start: 1_000, end: 1_500)
224
+        let secondOldSample = makeStepCountSample(value: 20, start: 5_000, end: 6_000)
225
+        let denseSample = makeStepCountSample(value: 30, start: 1_000, end: 1_200)
226
+
227
+        _ = try await store.upsertSamples([firstOldSample, secondOldSample], observedAt: Date(timeIntervalSince1970: 3_000))
228
+        _ = try await store.upsertSamples([denseSample], observedAt: Date(timeIntervalSince1970: 3_060))
229
+        try await store.recordDisappearance(sampleUUIDHash: HashService.sampleUUIDHash(firstOldSample.uuid.uuidString), sampleTypeIdentifier: typeIdentifier, observedMissingAt: Date(timeIntervalSince1970: 3_120))
230
+        try await store.recordDisappearance(sampleUUIDHash: HashService.sampleUUIDHash(secondOldSample.uuid.uuidString), sampleTypeIdentifier: typeIdentifier, observedMissingAt: Date(timeIntervalSince1970: 3_180))
231
+
232
+        let observationIDs = try observationIDs(at: url)
233
+        XCTAssertEqual(observationIDs.count, 4)
234
+
235
+        let rows = try await store.consolidationEvidence(HealthArchiveConsolidationEvidenceRequest(
236
+            fromObservationID: observationIDs[0],
237
+            toObservationID: observationIDs[observationIDs.count - 1],
238
+            sampleTypeIdentifier: typeIdentifier
239
+        ))
240
+
241
+        XCTAssertEqual(rows.count, 1)
242
+        XCTAssertEqual(rows.first?.label, "aggregate_changed")
243
+        XCTAssertEqual(rows.first?.fromVisibleRecordCount, 2)
244
+        XCTAssertEqual(rows.first?.toVisibleRecordCount, 1)
245
+        XCTAssertEqual(rows.first?.fromValueSum, 30)
246
+        XCTAssertEqual(rows.first?.toValueSum, 30)
247
+    }
248
+
162 249
     private func databaseURL() -> URL {
163 250
         temporaryDirectory.appending(path: "Archive.sqlite")
164 251
     }
@@ -179,11 +266,11 @@ final class SQLiteHealthArchiveStoreTests: XCTestCase {
179 266
         makeStepCountSample(value: 42, start: 1_000)
180 267
     }
181 268
 
182
-    private func makeStepCountSample(value: Double, start: TimeInterval) -> HKQuantitySample {
269
+    private func makeStepCountSample(value: Double, start: TimeInterval, end: TimeInterval? = nil) -> HKQuantitySample {
183 270
         let quantityType = HKQuantityType.quantityType(forIdentifier: .stepCount)!
184 271
         let quantity = HKQuantity(unit: .count(), doubleValue: value)
185 272
         let startDate = Date(timeIntervalSince1970: start)
186
-        let endDate = Date(timeIntervalSince1970: start + 300)
273
+        let endDate = Date(timeIntervalSince1970: end ?? (start + 300))
187 274
         return HKQuantitySample(type: quantityType, quantity: quantity, start: startDate, end: endDate)
188 275
     }
189 276