@@ -193,13 +193,16 @@ final class TypeDistributionBin {
|
||
| 193 | 193 |
// Deletions are recorded by sampleUUIDHash because HKDeletedObject exposes UUIDs, |
| 194 | 194 |
// not complete sample payloads. |
| 195 | 195 |
|
| 196 |
-// Interface updated 2026-05-23 — see AGENTS.md |
|
| 196 |
+// Interface updated 2026-05-24 — see AGENTS.md |
|
| 197 | 197 |
// HealthArchiveStore exposes SQL-first observation diff APIs: |
| 198 | 198 |
// diffSummary(_:) returns appeared/disappeared/representationChanged counts and |
| 199 | 199 |
// diffRecords(_:) returns a paged record list for one change kind. |
| 200 | 200 |
// aggregateComparison(_:) compares materialized daily aggregates between two |
| 201 |
-// observations. UI/cache agents should consume these APIs instead of loading full |
|
| 202 |
-// observation record sets. |
|
| 201 |
+// observations. sourceProvenanceBreakdown(_:) returns visible source/device |
|
| 202 |
+// composition, and consolidationEvidence(_:) returns conservative evidence labels |
|
| 203 |
+// with counts, aggregate sums, coverage, density, source compatibility, and |
|
| 204 |
+// uncertainty text. UI/cache agents should consume these APIs instead of loading |
|
| 205 |
+// full observation record sets. |
|
| 203 | 206 |
|
| 204 | 207 |
// Storage objective updated 2026-05-23 — see AGENTS.md |
| 205 | 208 |
// Recurring complete snapshots are out of scope for the target architecture. |
@@ -1,6 +1,6 @@ |
||
| 1 | 1 |
# HealthProbe - Implementation Status |
| 2 | 2 |
|
| 3 |
-**Last Updated:** 2026-05-23 |
|
| 3 |
+**Last Updated:** 2026-05-24 |
|
| 4 | 4 |
|
| 5 | 5 |
## Current Reality |
| 6 | 6 |
|
@@ -25,7 +25,7 @@ There are no real deployments, only test installations. Existing prototype datab |
||
| 25 | 25 |
|------|----------------|--------------------| |
| 26 | 26 |
| Product docs | Updated | Keep `HealthProbe/Doc/README.md` as canonical index | |
| 27 | 27 |
| HealthKit capture | Prototype exists | Adapt capture to write differential SQLite observations first | |
| 28 |
-| SQLite archive | Archive v2 schema, differential write path, daily aggregate rebuilds, integrity report, v2 record reads, initial SQL diff/count/aggregate APIs, and XCTest coverage are in place; legacy write mirror still exists | Add provenance and consolidation-evidence SQL analysis, large synthetic-data tests, then retire `archive_samples` | |
|
| 28 |
+| SQLite archive | Archive v2 schema, differential write path, daily aggregate rebuilds, integrity report, v2 record reads, SQL diff/count/aggregate/provenance/consolidation-evidence APIs, and XCTest coverage are in place; legacy write mirror still exists | Add large synthetic-data timing/memory tests, then retire `archive_samples` | |
|
| 29 | 29 |
| Core Data cache | Not implemented | Add rebuildable cache for expensive counts, summaries, report metadata, UI state | |
| 30 | 30 |
| SwiftData cache | Exists | Treat as disposable prototype data; reset/ignore during v2 transition | |
| 31 | 31 |
| UI | Prototype exists | Reframe screens around observations, diffs, export, archive status | |
@@ -38,13 +38,12 @@ There are no real deployments, only test installations. Existing prototype datab |
||
| 38 | 38 |
|
| 39 | 39 |
Detailed checkable milestones live in [`Refactoring-Plan.md`](Refactoring-Plan.md). |
| 40 | 40 |
|
| 41 |
-1. Add provenance and consolidation-evidence queries on top of the SQLite archive. |
|
| 42 |
-2. Expand the synthetic large-data test harness for diff/export memory behavior. |
|
| 43 |
-3. Add Core Data UI/report cache and rebuild pipeline. |
|
| 44 |
-4. Replace SwiftData UI dependencies with Core Data/cache DTOs. |
|
| 45 |
-5. Update UI language from anomaly/status to observation/diff/export. |
|
| 46 |
-6. Add streaming exports with manifests. |
|
| 47 |
-7. Validate on low-memory/legacy-class devices. |
|
| 41 |
+1. Expand the synthetic large-data test harness for diff/export memory behavior. |
|
| 42 |
+2. Add Core Data UI/report cache and rebuild pipeline. |
|
| 43 |
+3. Replace SwiftData UI dependencies with Core Data/cache DTOs. |
|
| 44 |
+4. Update UI language from anomaly/status to observation/diff/export. |
|
| 45 |
+5. Add streaming exports with manifests. |
|
| 46 |
+6. Validate on low-memory/legacy-class devices. |
|
| 48 | 47 |
|
| 49 | 48 |
## Known Prototype Mismatches |
| 50 | 49 |
|
@@ -54,13 +53,13 @@ Detailed checkable milestones live in [`Refactoring-Plan.md`](Refactoring-Plan.m |
||
| 54 | 53 |
- Current archive schema is not sufficient as the long-term source of truth. |
| 55 | 54 |
- Existing implementation may decode or cache too much data for low-end devices. |
| 56 | 55 |
- Old prototype database compatibility is no longer required. |
| 57 |
-- Initial SQLite archive tests cover open/init/reset/idempotency, small observation diffs, and materialized aggregate comparison, but not yet large-volume diff/export behavior. |
|
| 56 |
+- Initial SQLite archive tests cover open/init/reset/idempotency, small observation diffs, materialized aggregate comparison, source/provenance breakdowns, and consolidation-evidence labels, but not yet large-volume diff/export behavior. |
|
| 58 | 57 |
|
| 59 | 58 |
## Verification Checklist |
| 60 | 59 |
|
| 61 | 60 |
- [ ] SQLite archive v2 can reconstruct records visible at observation T. |
| 62 | 61 |
- [ ] No recurring complete snapshot copies are written for high-volume types. |
| 63 |
-- [ ] SQL diff between two observations runs without loading full datasets into Swift arrays. |
|
| 62 |
+- [x] SQL diff between two observations runs without loading full datasets into Swift arrays. |
|
| 64 | 63 |
- [ ] Expensive counts used by reports/UI are cached and rebuildable. |
| 65 | 64 |
- [ ] Deleting Core Data cache and rebuilding from SQLite restores UI/report summaries. |
| 66 | 65 |
- [ ] Export can stream large selected record sets. |
@@ -150,15 +150,15 @@ Checklist: |
||
| 150 | 150 |
- [x] Implement representationChanged query between observations. |
| 151 | 151 |
- [x] Implement diff counts using temp tables or equivalent SQL-first strategy. |
| 152 | 152 |
- [x] Implement aggregate comparison query. |
| 153 |
-- [ ] Implement consolidation-likely evidence query. |
|
| 154 |
-- [ ] Implement source/provenance breakdown query. |
|
| 153 |
+- [x] Implement consolidation-likely evidence query. |
|
| 154 |
+- [x] Implement source/provenance breakdown query. |
|
| 155 | 155 |
- [ ] Add query timing/memory tests on synthetic large datasets. |
| 156 | 156 |
|
| 157 | 157 |
Acceptance: |
| 158 | 158 |
- [x] Observation T can be reconstructed from ranges/events. |
| 159 | 159 |
- [ ] Large diff returns counts and first page without loading all rows. |
| 160 | 160 |
- [x] Query results are deterministic and ordered. |
| 161 |
-- [ ] Consolidation evidence includes count, aggregate, coverage, density, and uncertainty data. |
|
| 161 |
+- [x] Consolidation evidence includes count, aggregate, coverage, density, and uncertainty data. |
|
| 162 | 162 |
|
| 163 | 163 |
## Milestone 6 - Core Data UI/Report Cache |
| 164 | 164 |
|
@@ -10,6 +10,8 @@ protocol HealthArchiveStore {
|
||
| 10 | 10 |
func diffSummary(_ request: HealthArchiveDiffRequest) async throws -> HealthArchiveDiffSummary |
| 11 | 11 |
func diffRecords(_ request: HealthArchiveDiffRecordRequest) async throws -> [ArchivedHealthRecord] |
| 12 | 12 |
func aggregateComparison(_ request: HealthArchiveAggregateComparisonRequest) async throws -> [HealthArchiveAggregateComparisonRow] |
| 13 |
+ func sourceProvenanceBreakdown(_ request: HealthArchiveSourceProvenanceRequest) async throws -> [HealthArchiveSourceProvenanceRow] |
|
| 14 |
+ func consolidationEvidence(_ request: HealthArchiveConsolidationEvidenceRequest) async throws -> [HealthArchiveConsolidationEvidence] |
|
| 13 | 15 |
func exportReport(_ request: HealthArchiveReportRequest) async throws -> URL |
| 14 | 16 |
func checkIntegrity() async throws -> HealthArchiveIntegrityReport |
| 15 | 17 |
} |
@@ -176,6 +178,51 @@ struct HealthArchiveAggregateComparisonRow: Equatable, Sendable {
|
||
| 176 | 178 |
} |
| 177 | 179 |
} |
| 178 | 180 |
|
| 181 |
+struct HealthArchiveSourceProvenanceRequest: Equatable, Sendable {
|
|
| 182 |
+ let visibleAtObservationID: Int64? |
|
| 183 |
+ let sampleTypeIdentifier: String? |
|
| 184 |
+ let limit: Int? |
|
| 185 |
+} |
|
| 186 |
+ |
|
| 187 |
+struct HealthArchiveSourceProvenanceRow: Equatable, Sendable {
|
|
| 188 |
+ let sampleTypeIdentifier: String |
|
| 189 |
+ let sourceNameHash: String? |
|
| 190 |
+ let sourceBundleIdentifier: String? |
|
| 191 |
+ let sourceProductType: String? |
|
| 192 |
+ let sourceVersion: String? |
|
| 193 |
+ let sourceOperatingSystemVersion: String? |
|
| 194 |
+ let deviceModel: String? |
|
| 195 |
+ let visibleRecordCount: Int |
|
| 196 |
+ let valueSum: Double? |
|
| 197 |
+ let earliestStartDate: Date? |
|
| 198 |
+ let latestEndDate: Date? |
|
| 199 |
+} |
|
| 200 |
+ |
|
| 201 |
+struct HealthArchiveConsolidationEvidenceRequest: Equatable, Sendable {
|
|
| 202 |
+ let fromObservationID: Int64 |
|
| 203 |
+ let toObservationID: Int64 |
|
| 204 |
+ let sampleTypeIdentifier: String? |
|
| 205 |
+} |
|
| 206 |
+ |
|
| 207 |
+struct HealthArchiveConsolidationEvidence: Equatable, Sendable {
|
|
| 208 |
+ let sampleTypeIdentifier: String |
|
| 209 |
+ let fromObservationID: Int64 |
|
| 210 |
+ let toObservationID: Int64 |
|
| 211 |
+ let disappearedCount: Int |
|
| 212 |
+ let appearedCount: Int |
|
| 213 |
+ let representationChangedCount: Int |
|
| 214 |
+ let fromVisibleRecordCount: Int |
|
| 215 |
+ let toVisibleRecordCount: Int |
|
| 216 |
+ let fromValueSum: Double? |
|
| 217 |
+ let toValueSum: Double? |
|
| 218 |
+ let coverageOverlapSeconds: Double? |
|
| 219 |
+ let densityBefore: Double? |
|
| 220 |
+ let densityAfter: Double? |
|
| 221 |
+ let sourceCompatible: Bool |
|
| 222 |
+ let label: String |
|
| 223 |
+ let uncertaintyReason: String? |
|
| 224 |
+} |
|
| 225 |
+ |
|
| 179 | 226 |
struct HealthArchiveReportRequest: Equatable, Sendable {
|
| 180 | 227 |
let reportID: UUID |
| 181 | 228 |
let title: String |
@@ -559,6 +559,333 @@ actor SQLiteHealthArchiveStore: HealthArchiveStore {
|
||
| 559 | 559 |
} |
| 560 | 560 |
} |
| 561 | 561 |
|
| 562 |
+ func sourceProvenanceBreakdown(_ request: HealthArchiveSourceProvenanceRequest) async throws -> [HealthArchiveSourceProvenanceRow] {
|
|
| 563 |
+ let db = try openDatabase() |
|
| 564 |
+ defer { sqlite3_close(db) }
|
|
| 565 |
+ try prepareSchemaIfNeeded(db) |
|
| 566 |
+ |
|
| 567 |
+ var clauses: [String] = [] |
|
| 568 |
+ if request.sampleTypeIdentifier != nil {
|
|
| 569 |
+ clauses.append("t.type_identifier = ?")
|
|
| 570 |
+ } |
|
| 571 |
+ let whereClause = clauses.isEmpty ? "" : "WHERE \(clauses.joined(separator: " AND "))" |
|
| 572 |
+ let limitClause = request.limit.map { "LIMIT \(max($0, 0))" } ?? ""
|
|
| 573 |
+ let sql = """ |
|
| 574 |
+ WITH visible_ranges AS ( |
|
| 575 |
+ SELECT sample_id, version_id |
|
| 576 |
+ FROM sample_visibility_ranges |
|
| 577 |
+ WHERE (? IS NULL OR ( |
|
| 578 |
+ first_observation_id <= ? |
|
| 579 |
+ AND (last_observation_id IS NULL OR last_observation_id > ?) |
|
| 580 |
+ )) |
|
| 581 |
+ ) |
|
| 582 |
+ SELECT |
|
| 583 |
+ t.type_identifier, |
|
| 584 |
+ src.source_name_hash, |
|
| 585 |
+ src.bundle_identifier, |
|
| 586 |
+ sr.product_type, |
|
| 587 |
+ sr.version, |
|
| 588 |
+ sr.operating_system_version, |
|
| 589 |
+ d.model, |
|
| 590 |
+ COUNT(*) AS visible_record_count, |
|
| 591 |
+ SUM(v.numeric_value) AS value_sum, |
|
| 592 |
+ MIN(v.start_date) AS earliest_start_date, |
|
| 593 |
+ MAX(v.end_date) AS latest_end_date |
|
| 594 |
+ FROM visible_ranges vr |
|
| 595 |
+ JOIN sample_versions v ON v.id = vr.version_id |
|
| 596 |
+ JOIN samples s ON s.id = vr.sample_id |
|
| 597 |
+ JOIN sample_types t ON t.id = s.sample_type_id |
|
| 598 |
+ LEFT JOIN source_revisions sr ON sr.id = v.source_revision_id |
|
| 599 |
+ LEFT JOIN sources src ON src.id = sr.source_id |
|
| 600 |
+ LEFT JOIN hk_devices d ON d.id = v.hk_device_id |
|
| 601 |
+ \(whereClause) |
|
| 602 |
+ GROUP BY |
|
| 603 |
+ t.type_identifier, |
|
| 604 |
+ src.source_name_hash, |
|
| 605 |
+ src.bundle_identifier, |
|
| 606 |
+ sr.product_type, |
|
| 607 |
+ sr.version, |
|
| 608 |
+ sr.operating_system_version, |
|
| 609 |
+ d.model |
|
| 610 |
+ ORDER BY visible_record_count DESC, t.type_identifier ASC, src.bundle_identifier ASC, d.model ASC |
|
| 611 |
+ \(limitClause) |
|
| 612 |
+ """ |
|
| 613 |
+ |
|
| 614 |
+ return try withStatement(sql, db: db) { statement in
|
|
| 615 |
+ var index: Int32 = 1 |
|
| 616 |
+ bindInt64(request.visibleAtObservationID, to: index, in: statement) |
|
| 617 |
+ index += 1 |
|
| 618 |
+ bindInt64(request.visibleAtObservationID, to: index, in: statement) |
|
| 619 |
+ index += 1 |
|
| 620 |
+ bindInt64(request.visibleAtObservationID, to: index, in: statement) |
|
| 621 |
+ index += 1 |
|
| 622 |
+ if let sampleTypeIdentifier = request.sampleTypeIdentifier {
|
|
| 623 |
+ bindText(sampleTypeIdentifier, to: index, in: statement) |
|
| 624 |
+ index += 1 |
|
| 625 |
+ } |
|
| 626 |
+ |
|
| 627 |
+ var rows: [HealthArchiveSourceProvenanceRow] = [] |
|
| 628 |
+ while sqlite3_step(statement) == SQLITE_ROW {
|
|
| 629 |
+ rows.append(HealthArchiveSourceProvenanceRow( |
|
| 630 |
+ sampleTypeIdentifier: columnText(statement, 0) ?? "", |
|
| 631 |
+ sourceNameHash: columnText(statement, 1), |
|
| 632 |
+ sourceBundleIdentifier: columnText(statement, 2), |
|
| 633 |
+ sourceProductType: columnText(statement, 3), |
|
| 634 |
+ sourceVersion: columnText(statement, 4), |
|
| 635 |
+ sourceOperatingSystemVersion: columnText(statement, 5), |
|
| 636 |
+ deviceModel: columnText(statement, 6), |
|
| 637 |
+ visibleRecordCount: columnInt(statement, 7) ?? 0, |
|
| 638 |
+ valueSum: columnDouble(statement, 8), |
|
| 639 |
+ earliestStartDate: columnUnixDate(statement, 9), |
|
| 640 |
+ latestEndDate: columnUnixDate(statement, 10) |
|
| 641 |
+ )) |
|
| 642 |
+ } |
|
| 643 |
+ return rows |
|
| 644 |
+ } |
|
| 645 |
+ } |
|
| 646 |
+ |
|
| 647 |
+ func consolidationEvidence(_ request: HealthArchiveConsolidationEvidenceRequest) async throws -> [HealthArchiveConsolidationEvidence] {
|
|
| 648 |
+ let db = try openDatabase() |
|
| 649 |
+ defer { sqlite3_close(db) }
|
|
| 650 |
+ try prepareSchemaIfNeeded(db) |
|
| 651 |
+ |
|
| 652 |
+ let typeClause = request.sampleTypeIdentifier == nil ? "" : "AND t.type_identifier = ?" |
|
| 653 |
+ let topFilterClause = request.sampleTypeIdentifier == nil ? "" : "WHERE tk.type_identifier = ?" |
|
| 654 |
+ let sql = """ |
|
| 655 |
+ WITH from_visible AS ( |
|
| 656 |
+ SELECT sample_id, version_id |
|
| 657 |
+ FROM sample_visibility_ranges |
|
| 658 |
+ WHERE first_observation_id <= ? |
|
| 659 |
+ AND (last_observation_id IS NULL OR last_observation_id > ?) |
|
| 660 |
+ ), |
|
| 661 |
+ to_visible AS ( |
|
| 662 |
+ SELECT sample_id, version_id |
|
| 663 |
+ FROM sample_visibility_ranges |
|
| 664 |
+ WHERE first_observation_id <= ? |
|
| 665 |
+ AND (last_observation_id IS NULL OR last_observation_id > ?) |
|
| 666 |
+ ), |
|
| 667 |
+ type_keys AS ( |
|
| 668 |
+ SELECT DISTINCT sample_type_id, type_identifier |
|
| 669 |
+ FROM ( |
|
| 670 |
+ SELECT s.sample_type_id, t.type_identifier |
|
| 671 |
+ FROM from_visible fv |
|
| 672 |
+ JOIN samples s ON s.id = fv.sample_id |
|
| 673 |
+ JOIN sample_types t ON t.id = s.sample_type_id |
|
| 674 |
+ UNION |
|
| 675 |
+ SELECT s.sample_type_id, t.type_identifier |
|
| 676 |
+ FROM to_visible tv |
|
| 677 |
+ JOIN samples s ON s.id = tv.sample_id |
|
| 678 |
+ JOIN sample_types t ON t.id = s.sample_type_id |
|
| 679 |
+ ) |
|
| 680 |
+ ), |
|
| 681 |
+ from_type_metrics AS ( |
|
| 682 |
+ SELECT |
|
| 683 |
+ s.sample_type_id, |
|
| 684 |
+ COUNT(*) AS from_visible_record_count, |
|
| 685 |
+ SUM(v.numeric_value) AS from_value_sum, |
|
| 686 |
+ MIN(v.start_date) AS from_start_date, |
|
| 687 |
+ MAX(v.end_date) AS from_end_date, |
|
| 688 |
+ COUNT(DISTINCT CASE WHEN src.bundle_identifier IS NULL THEN '__NULL__' ELSE src.bundle_identifier END) AS from_bundle_count, |
|
| 689 |
+ COUNT(DISTINCT CASE WHEN d.model IS NULL THEN '__NULL__' ELSE d.model END) AS from_model_count |
|
| 690 |
+ FROM from_visible fv |
|
| 691 |
+ JOIN samples s ON s.id = fv.sample_id |
|
| 692 |
+ JOIN sample_versions v ON v.id = fv.version_id |
|
| 693 |
+ LEFT JOIN source_revisions sr ON sr.id = v.source_revision_id |
|
| 694 |
+ LEFT JOIN sources src ON src.id = sr.source_id |
|
| 695 |
+ LEFT JOIN hk_devices d ON d.id = v.hk_device_id |
|
| 696 |
+ GROUP BY s.sample_type_id |
|
| 697 |
+ ), |
|
| 698 |
+ to_type_metrics AS ( |
|
| 699 |
+ SELECT |
|
| 700 |
+ s.sample_type_id, |
|
| 701 |
+ COUNT(*) AS to_visible_record_count, |
|
| 702 |
+ SUM(v.numeric_value) AS to_value_sum, |
|
| 703 |
+ MIN(v.start_date) AS to_start_date, |
|
| 704 |
+ MAX(v.end_date) AS to_end_date, |
|
| 705 |
+ COUNT(DISTINCT CASE WHEN src.bundle_identifier IS NULL THEN '__NULL__' ELSE src.bundle_identifier END) AS to_bundle_count, |
|
| 706 |
+ COUNT(DISTINCT CASE WHEN d.model IS NULL THEN '__NULL__' ELSE d.model END) AS to_model_count |
|
| 707 |
+ FROM to_visible tv |
|
| 708 |
+ JOIN samples s ON s.id = tv.sample_id |
|
| 709 |
+ JOIN sample_versions v ON v.id = tv.version_id |
|
| 710 |
+ LEFT JOIN source_revisions sr ON sr.id = v.source_revision_id |
|
| 711 |
+ LEFT JOIN sources src ON src.id = sr.source_id |
|
| 712 |
+ LEFT JOIN hk_devices d ON d.id = v.hk_device_id |
|
| 713 |
+ GROUP BY s.sample_type_id |
|
| 714 |
+ ), |
|
| 715 |
+ appeared_by_type AS ( |
|
| 716 |
+ SELECT s.sample_type_id, COUNT(*) AS appeared_count |
|
| 717 |
+ FROM to_visible tv |
|
| 718 |
+ LEFT JOIN from_visible fv ON fv.sample_id = tv.sample_id |
|
| 719 |
+ JOIN samples s ON s.id = tv.sample_id |
|
| 720 |
+ JOIN sample_types t ON t.id = s.sample_type_id |
|
| 721 |
+ WHERE fv.sample_id IS NULL \(typeClause) |
|
| 722 |
+ GROUP BY s.sample_type_id |
|
| 723 |
+ ), |
|
| 724 |
+ disappeared_by_type AS ( |
|
| 725 |
+ SELECT s.sample_type_id, COUNT(*) AS disappeared_count |
|
| 726 |
+ FROM from_visible fv |
|
| 727 |
+ LEFT JOIN to_visible tv ON tv.sample_id = fv.sample_id |
|
| 728 |
+ JOIN samples s ON s.id = fv.sample_id |
|
| 729 |
+ JOIN sample_types t ON t.id = s.sample_type_id |
|
| 730 |
+ WHERE tv.sample_id IS NULL \(typeClause) |
|
| 731 |
+ GROUP BY s.sample_type_id |
|
| 732 |
+ ), |
|
| 733 |
+ changed_by_type AS ( |
|
| 734 |
+ SELECT s.sample_type_id, COUNT(*) AS representation_changed_count |
|
| 735 |
+ FROM to_visible tv |
|
| 736 |
+ JOIN from_visible fv ON fv.sample_id = tv.sample_id |
|
| 737 |
+ JOIN samples s ON s.id = tv.sample_id |
|
| 738 |
+ JOIN sample_types t ON t.id = s.sample_type_id |
|
| 739 |
+ WHERE tv.version_id != fv.version_id \(typeClause) |
|
| 740 |
+ GROUP BY s.sample_type_id |
|
| 741 |
+ ) |
|
| 742 |
+ SELECT |
|
| 743 |
+ tk.type_identifier, |
|
| 744 |
+ COALESCE(d.disappeared_count, 0) AS disappeared_count, |
|
| 745 |
+ COALESCE(a.appeared_count, 0) AS appeared_count, |
|
| 746 |
+ COALESCE(c.representation_changed_count, 0) AS representation_changed_count, |
|
| 747 |
+ COALESCE(f.from_visible_record_count, 0) AS from_visible_record_count, |
|
| 748 |
+ COALESCE(t.to_visible_record_count, 0) AS to_visible_record_count, |
|
| 749 |
+ f.from_value_sum, |
|
| 750 |
+ t.to_value_sum, |
|
| 751 |
+ f.from_start_date, |
|
| 752 |
+ f.from_end_date, |
|
| 753 |
+ t.to_start_date, |
|
| 754 |
+ t.to_end_date, |
|
| 755 |
+ CASE |
|
| 756 |
+ WHEN f.from_start_date IS NULL OR f.from_end_date IS NULL OR t.to_start_date IS NULL OR t.to_end_date IS NULL THEN NULL |
|
| 757 |
+ ELSE MAX(0, MIN(f.from_end_date, t.to_end_date) - MAX(f.from_start_date, t.to_start_date)) |
|
| 758 |
+ END AS coverage_overlap_seconds, |
|
| 759 |
+ CASE |
|
| 760 |
+ WHEN COALESCE(f.from_visible_record_count, 0) = 0 OR f.from_start_date IS NULL OR f.from_end_date IS NULL OR f.from_end_date <= f.from_start_date THEN NULL |
|
| 761 |
+ ELSE CAST(f.from_visible_record_count AS REAL) / ((f.from_end_date - f.from_start_date) / 3600.0) |
|
| 762 |
+ END AS density_before, |
|
| 763 |
+ CASE |
|
| 764 |
+ WHEN COALESCE(t.to_visible_record_count, 0) = 0 OR t.to_start_date IS NULL OR t.to_end_date IS NULL OR t.to_end_date <= t.to_start_date THEN NULL |
|
| 765 |
+ ELSE CAST(t.to_visible_record_count AS REAL) / ((t.to_end_date - t.to_start_date) / 3600.0) |
|
| 766 |
+ END AS density_after, |
|
| 767 |
+ CASE |
|
| 768 |
+ WHEN COALESCE(f.from_bundle_count, 0) <= 1 |
|
| 769 |
+ AND COALESCE(f.from_model_count, 0) <= 1 |
|
| 770 |
+ AND COALESCE(t.to_bundle_count, 0) <= 1 |
|
| 771 |
+ AND COALESCE(t.to_model_count, 0) <= 1 |
|
| 772 |
+ THEN 1 ELSE 0 |
|
| 773 |
+ END AS source_compatible |
|
| 774 |
+ FROM type_keys tk |
|
| 775 |
+ LEFT JOIN from_type_metrics f ON f.sample_type_id = tk.sample_type_id |
|
| 776 |
+ LEFT JOIN to_type_metrics t ON t.sample_type_id = tk.sample_type_id |
|
| 777 |
+ LEFT JOIN appeared_by_type a ON a.sample_type_id = tk.sample_type_id |
|
| 778 |
+ LEFT JOIN disappeared_by_type d ON d.sample_type_id = tk.sample_type_id |
|
| 779 |
+ LEFT JOIN changed_by_type c ON c.sample_type_id = tk.sample_type_id |
|
| 780 |
+ \(topFilterClause) |
|
| 781 |
+ ORDER BY tk.type_identifier |
|
| 782 |
+ """ |
|
| 783 |
+ |
|
| 784 |
+ return try withStatement(sql, db: db) { statement in
|
|
| 785 |
+ var index: Int32 = 1 |
|
| 786 |
+ bindInt64(request.fromObservationID, to: index, in: statement) |
|
| 787 |
+ index += 1 |
|
| 788 |
+ bindInt64(request.fromObservationID, to: index, in: statement) |
|
| 789 |
+ index += 1 |
|
| 790 |
+ bindInt64(request.toObservationID, to: index, in: statement) |
|
| 791 |
+ index += 1 |
|
| 792 |
+ bindInt64(request.toObservationID, to: index, in: statement) |
|
| 793 |
+ index += 1 |
|
| 794 |
+ if let sampleTypeIdentifier = request.sampleTypeIdentifier {
|
|
| 795 |
+ bindText(sampleTypeIdentifier, to: index, in: statement) |
|
| 796 |
+ index += 1 |
|
| 797 |
+ } |
|
| 798 |
+ if let sampleTypeIdentifier = request.sampleTypeIdentifier {
|
|
| 799 |
+ bindText(sampleTypeIdentifier, to: index, in: statement) |
|
| 800 |
+ index += 1 |
|
| 801 |
+ } |
|
| 802 |
+ if let sampleTypeIdentifier = request.sampleTypeIdentifier {
|
|
| 803 |
+ bindText(sampleTypeIdentifier, to: index, in: statement) |
|
| 804 |
+ index += 1 |
|
| 805 |
+ } |
|
| 806 |
+ if request.sampleTypeIdentifier != nil {
|
|
| 807 |
+ bindText(request.sampleTypeIdentifier, to: index, in: statement) |
|
| 808 |
+ index += 1 |
|
| 809 |
+ } |
|
| 810 |
+ |
|
| 811 |
+ var rows: [HealthArchiveConsolidationEvidence] = [] |
|
| 812 |
+ while sqlite3_step(statement) == SQLITE_ROW {
|
|
| 813 |
+ let sampleTypeIdentifier = columnText(statement, 0) ?? "" |
|
| 814 |
+ let disappearedCount = columnInt(statement, 1) ?? 0 |
|
| 815 |
+ let appearedCount = columnInt(statement, 2) ?? 0 |
|
| 816 |
+ let representationChangedCount = columnInt(statement, 3) ?? 0 |
|
| 817 |
+ let fromVisibleRecordCount = columnInt(statement, 4) ?? 0 |
|
| 818 |
+ let toVisibleRecordCount = columnInt(statement, 5) ?? 0 |
|
| 819 |
+ let fromValueSum = columnDouble(statement, 6) |
|
| 820 |
+ let toValueSum = columnDouble(statement, 7) |
|
| 821 |
+ let coverageOverlapSeconds = columnDouble(statement, 12) |
|
| 822 |
+ let densityBefore = columnDouble(statement, 13) |
|
| 823 |
+ let densityAfter = columnDouble(statement, 14) |
|
| 824 |
+ let sourceCompatible = columnInt(statement, 15) == 1 |
|
| 825 |
+ |
|
| 826 |
+ let stableValue: Bool |
|
| 827 |
+ if let fromValueSum, let toValueSum {
|
|
| 828 |
+ stableValue = abs(toValueSum - fromValueSum) <= max(1.0, abs(fromValueSum) * 0.1) |
|
| 829 |
+ } else {
|
|
| 830 |
+ stableValue = false |
|
| 831 |
+ } |
|
| 832 |
+ let hasCoverageOverlap = (coverageOverlapSeconds ?? 0) > 0 |
|
| 833 |
+ let densityDoesNotIncrease: Bool |
|
| 834 |
+ if let densityBefore, let densityAfter {
|
|
| 835 |
+ densityDoesNotIncrease = densityAfter <= densityBefore |
|
| 836 |
+ } else {
|
|
| 837 |
+ densityDoesNotIncrease = false |
|
| 838 |
+ } |
|
| 839 |
+ let consolidationEvidencePresent = disappearedCount > 0 |
|
| 840 |
+ && sourceCompatible |
|
| 841 |
+ && hasCoverageOverlap |
|
| 842 |
+ && densityDoesNotIncrease |
|
| 843 |
+ && stableValue |
|
| 844 |
+ let label: String |
|
| 845 |
+ let uncertaintyReason: String? |
|
| 846 |
+ if representationChangedCount > 0 && disappearedCount == 0 && appearedCount == 0 {
|
|
| 847 |
+ label = "representation_changed" |
|
| 848 |
+ uncertaintyReason = nil |
|
| 849 |
+ } else if consolidationEvidencePresent {
|
|
| 850 |
+ label = "consolidation_likely" |
|
| 851 |
+ uncertaintyReason = nil |
|
| 852 |
+ } else if appearedCount > 0 && disappearedCount == 0 && representationChangedCount == 0 {
|
|
| 853 |
+ label = "appeared" |
|
| 854 |
+ uncertaintyReason = nil |
|
| 855 |
+ } else if appearedCount > 0 && disappearedCount > 0 && sourceCompatible && stableValue {
|
|
| 856 |
+ label = "aggregate_changed" |
|
| 857 |
+ uncertaintyReason = nil |
|
| 858 |
+ } else if disappearedCount == 0 && appearedCount == 0 && representationChangedCount == 0 {
|
|
| 859 |
+ label = "uncertain" |
|
| 860 |
+ uncertaintyReason = "no structural change detected" |
|
| 861 |
+ } else {
|
|
| 862 |
+ label = "uncertain" |
|
| 863 |
+ uncertaintyReason = "insufficient evidence for a stable consolidation label" |
|
| 864 |
+ } |
|
| 865 |
+ |
|
| 866 |
+ rows.append(HealthArchiveConsolidationEvidence( |
|
| 867 |
+ sampleTypeIdentifier: sampleTypeIdentifier, |
|
| 868 |
+ fromObservationID: request.fromObservationID, |
|
| 869 |
+ toObservationID: request.toObservationID, |
|
| 870 |
+ disappearedCount: disappearedCount, |
|
| 871 |
+ appearedCount: appearedCount, |
|
| 872 |
+ representationChangedCount: representationChangedCount, |
|
| 873 |
+ fromVisibleRecordCount: fromVisibleRecordCount, |
|
| 874 |
+ toVisibleRecordCount: toVisibleRecordCount, |
|
| 875 |
+ fromValueSum: fromValueSum, |
|
| 876 |
+ toValueSum: toValueSum, |
|
| 877 |
+ coverageOverlapSeconds: coverageOverlapSeconds, |
|
| 878 |
+ densityBefore: densityBefore, |
|
| 879 |
+ densityAfter: densityAfter, |
|
| 880 |
+ sourceCompatible: sourceCompatible, |
|
| 881 |
+ label: label, |
|
| 882 |
+ uncertaintyReason: uncertaintyReason |
|
| 883 |
+ )) |
|
| 884 |
+ } |
|
| 885 |
+ return rows |
|
| 886 |
+ } |
|
| 887 |
+ } |
|
| 888 |
+ |
|
| 562 | 889 |
func exportReport(_ request: HealthArchiveReportRequest) async throws -> URL {
|
| 563 | 890 |
let recordRequest = HealthArchiveRecordRequest( |
| 564 | 891 |
sampleTypeIdentifier: request.typeIdentifierFilter, |
@@ -159,6 +159,93 @@ final class SQLiteHealthArchiveStoreTests: XCTestCase {
|
||
| 159 | 159 |
XCTAssertEqual(rows.first?.valueSumDelta, 7) |
| 160 | 160 |
} |
| 161 | 161 |
|
| 162 |
+ func testSourceProvenanceBreakdownReturnsVisibleSourceRows() async throws {
|
|
| 163 |
+ let url = databaseURL() |
|
| 164 |
+ let store = SQLiteHealthArchiveStore(databaseURL: url) |
|
| 165 |
+ let sample = makeStepCountSample(value: 42, start: 1_000) |
|
| 166 |
+ let typeIdentifier = sample.sampleType.identifier |
|
| 167 |
+ |
|
| 168 |
+ _ = try await store.upsertSamples([sample], observedAt: Date(timeIntervalSince1970: 3_000)) |
|
| 169 |
+ |
|
| 170 |
+ let rows = try await store.sourceProvenanceBreakdown(HealthArchiveSourceProvenanceRequest( |
|
| 171 |
+ visibleAtObservationID: nil, |
|
| 172 |
+ sampleTypeIdentifier: typeIdentifier, |
|
| 173 |
+ limit: 10 |
|
| 174 |
+ )) |
|
| 175 |
+ |
|
| 176 |
+ XCTAssertEqual(rows.count, 1) |
|
| 177 |
+ XCTAssertEqual(rows.first?.sampleTypeIdentifier, typeIdentifier) |
|
| 178 |
+ XCTAssertEqual(rows.first?.visibleRecordCount, 1) |
|
| 179 |
+ XCTAssertEqual(rows.first?.valueSum, 42) |
|
| 180 |
+ XCTAssertEqual(rows.first?.sourceBundleIdentifier, sample.sourceRevision.source.bundleIdentifier) |
|
| 181 |
+ } |
|
| 182 |
+ |
|
| 183 |
+ func testConsolidationEvidenceClassifiesStableCountDropAsConsolidationLikely() async throws {
|
|
| 184 |
+ let url = databaseURL() |
|
| 185 |
+ let store = SQLiteHealthArchiveStore(databaseURL: url) |
|
| 186 |
+ let typeIdentifier = HKQuantityTypeIdentifier.stepCount.rawValue |
|
| 187 |
+ |
|
| 188 |
+ let firstOldSample = makeStepCountSample(value: 10, start: 1_000, end: 1_500) |
|
| 189 |
+ let secondOldSample = makeStepCountSample(value: 20, start: 1_600, end: 2_000) |
|
| 190 |
+ let consolidatedSample = makeStepCountSample(value: 30, start: 1_000, end: 2_500) |
|
| 191 |
+ |
|
| 192 |
+ _ = try await store.upsertSamples([firstOldSample, secondOldSample], observedAt: Date(timeIntervalSince1970: 3_000)) |
|
| 193 |
+ _ = try await store.upsertSamples([consolidatedSample], observedAt: Date(timeIntervalSince1970: 3_060)) |
|
| 194 |
+ try await store.recordDisappearance(sampleUUIDHash: HashService.sampleUUIDHash(firstOldSample.uuid.uuidString), sampleTypeIdentifier: typeIdentifier, observedMissingAt: Date(timeIntervalSince1970: 3_120)) |
|
| 195 |
+ try await store.recordDisappearance(sampleUUIDHash: HashService.sampleUUIDHash(secondOldSample.uuid.uuidString), sampleTypeIdentifier: typeIdentifier, observedMissingAt: Date(timeIntervalSince1970: 3_180)) |
|
| 196 |
+ |
|
| 197 |
+ let observationIDs = try observationIDs(at: url) |
|
| 198 |
+ XCTAssertEqual(observationIDs.count, 4) |
|
| 199 |
+ |
|
| 200 |
+ let rows = try await store.consolidationEvidence(HealthArchiveConsolidationEvidenceRequest( |
|
| 201 |
+ fromObservationID: observationIDs[0], |
|
| 202 |
+ toObservationID: observationIDs[observationIDs.count - 1], |
|
| 203 |
+ sampleTypeIdentifier: typeIdentifier |
|
| 204 |
+ )) |
|
| 205 |
+ |
|
| 206 |
+ XCTAssertEqual(rows.count, 1) |
|
| 207 |
+ XCTAssertEqual(rows.first?.sampleTypeIdentifier, typeIdentifier) |
|
| 208 |
+ XCTAssertEqual(rows.first?.disappearedCount, 2) |
|
| 209 |
+ XCTAssertEqual(rows.first?.appearedCount, 1) |
|
| 210 |
+ XCTAssertEqual(rows.first?.fromVisibleRecordCount, 2) |
|
| 211 |
+ XCTAssertEqual(rows.first?.toVisibleRecordCount, 1) |
|
| 212 |
+ XCTAssertEqual(rows.first?.fromValueSum, 30) |
|
| 213 |
+ XCTAssertEqual(rows.first?.toValueSum, 30) |
|
| 214 |
+ XCTAssertEqual(rows.first?.label, "consolidation_likely") |
|
| 215 |
+ XCTAssertTrue(rows.first?.sourceCompatible == true) |
|
| 216 |
+ } |
|
| 217 |
+ |
|
| 218 |
+ func testConsolidationEvidenceClassifiesDenseStableShiftAsAggregateChanged() async throws {
|
|
| 219 |
+ let url = databaseURL() |
|
| 220 |
+ let store = SQLiteHealthArchiveStore(databaseURL: url) |
|
| 221 |
+ let typeIdentifier = HKQuantityTypeIdentifier.stepCount.rawValue |
|
| 222 |
+ |
|
| 223 |
+ let firstOldSample = makeStepCountSample(value: 10, start: 1_000, end: 1_500) |
|
| 224 |
+ let secondOldSample = makeStepCountSample(value: 20, start: 5_000, end: 6_000) |
|
| 225 |
+ let denseSample = makeStepCountSample(value: 30, start: 1_000, end: 1_200) |
|
| 226 |
+ |
|
| 227 |
+ _ = try await store.upsertSamples([firstOldSample, secondOldSample], observedAt: Date(timeIntervalSince1970: 3_000)) |
|
| 228 |
+ _ = try await store.upsertSamples([denseSample], observedAt: Date(timeIntervalSince1970: 3_060)) |
|
| 229 |
+ try await store.recordDisappearance(sampleUUIDHash: HashService.sampleUUIDHash(firstOldSample.uuid.uuidString), sampleTypeIdentifier: typeIdentifier, observedMissingAt: Date(timeIntervalSince1970: 3_120)) |
|
| 230 |
+ try await store.recordDisappearance(sampleUUIDHash: HashService.sampleUUIDHash(secondOldSample.uuid.uuidString), sampleTypeIdentifier: typeIdentifier, observedMissingAt: Date(timeIntervalSince1970: 3_180)) |
|
| 231 |
+ |
|
| 232 |
+ let observationIDs = try observationIDs(at: url) |
|
| 233 |
+ XCTAssertEqual(observationIDs.count, 4) |
|
| 234 |
+ |
|
| 235 |
+ let rows = try await store.consolidationEvidence(HealthArchiveConsolidationEvidenceRequest( |
|
| 236 |
+ fromObservationID: observationIDs[0], |
|
| 237 |
+ toObservationID: observationIDs[observationIDs.count - 1], |
|
| 238 |
+ sampleTypeIdentifier: typeIdentifier |
|
| 239 |
+ )) |
|
| 240 |
+ |
|
| 241 |
+ XCTAssertEqual(rows.count, 1) |
|
| 242 |
+ XCTAssertEqual(rows.first?.label, "aggregate_changed") |
|
| 243 |
+ XCTAssertEqual(rows.first?.fromVisibleRecordCount, 2) |
|
| 244 |
+ XCTAssertEqual(rows.first?.toVisibleRecordCount, 1) |
|
| 245 |
+ XCTAssertEqual(rows.first?.fromValueSum, 30) |
|
| 246 |
+ XCTAssertEqual(rows.first?.toValueSum, 30) |
|
| 247 |
+ } |
|
| 248 |
+ |
|
| 162 | 249 |
private func databaseURL() -> URL {
|
| 163 | 250 |
temporaryDirectory.appending(path: "Archive.sqlite") |
| 164 | 251 |
} |
@@ -179,11 +266,11 @@ final class SQLiteHealthArchiveStoreTests: XCTestCase {
|
||
| 179 | 266 |
makeStepCountSample(value: 42, start: 1_000) |
| 180 | 267 |
} |
| 181 | 268 |
|
| 182 |
- private func makeStepCountSample(value: Double, start: TimeInterval) -> HKQuantitySample {
|
|
| 269 |
+ private func makeStepCountSample(value: Double, start: TimeInterval, end: TimeInterval? = nil) -> HKQuantitySample {
|
|
| 183 | 270 |
let quantityType = HKQuantityType.quantityType(forIdentifier: .stepCount)! |
| 184 | 271 |
let quantity = HKQuantity(unit: .count(), doubleValue: value) |
| 185 | 272 |
let startDate = Date(timeIntervalSince1970: start) |
| 186 |
- let endDate = Date(timeIntervalSince1970: start + 300) |
|
| 273 |
+ let endDate = Date(timeIntervalSince1970: end ?? (start + 300)) |
|
| 187 | 274 |
return HKQuantitySample(type: quantityType, quantity: quantity, start: startDate, end: endDate) |
| 188 | 275 |
} |
| 189 | 276 |
|