@@ -392,9 +392,20 @@ Integration tests: |
||
| 392 | 392 |
- export generation with manifest hashes |
| 393 | 393 |
- high-frequency capture memory/performance |
| 394 | 394 |
- deletion evidence via `HKDeletedObject` |
| 395 |
+- opt-in large synthetic full-import benchmark for SQLite archive write/finalize cost |
|
| 395 | 396 |
|
| 396 | 397 |
Synthetic fixtures only. No real health values or identifiable metadata. |
| 397 | 398 |
|
| 399 |
+Large-import benchmark policy: |
|
| 400 |
+- keep one opt-in XCTest benchmark for a synthetic full import into SQLite; |
|
| 401 |
+- measure at least `XCTClockMetric` and `XCTMemoryMetric`; |
|
| 402 |
+- enable it only with explicit environment variables or launch arguments so |
|
| 403 |
+ normal test runs stay fast; |
|
| 404 |
+- use it to compare archive write/finalize regressions between commits, not to |
|
| 405 |
+ prove end-to-end HealthKit device performance by itself; |
|
| 406 |
+- combine it with real-device diagnostic reports before declaring background |
|
| 407 |
+ import safe on large live datasets. |
|
| 408 |
+ |
|
| 398 | 409 |
## 11. Performance Considerations |
| 399 | 410 |
|
| 400 | 411 |
| Operation | Target | Notes | |
@@ -27,7 +27,7 @@ There are no real deployments, only test installations. Existing prototype datab |
||
| 27 | 27 |
|------|----------------|--------------------| |
| 28 | 28 |
| Product docs | Updated | Keep `HealthProbe/Doc/README.md` as canonical index | |
| 29 | 29 |
| HealthKit capture | Capture now opens one archive observation per user-visible snapshot, attaches HealthKit pages, deleted-object evidence, and type verification to that observation id before finishing it, no longer aborts initial full-history imports after a fixed 30-minute wall-clock cap while page-level HealthKit timeouts remain in place, defers grouped observation summary/daily aggregate rebuilds until per-type verification instead of rebuilding after every imported page, persists large HealthKit pages in smaller archive chunks while using type-specific import strategies, and has an expanded HealthKit type registry for full-dataset discovery while keeping the original 15-type profile as the tested default | Run full dataset discovery/coverage on real devices before declaring import/storage complete; then revisit full checkpoint/resume and background collection | |
| 30 |
-| SQLite archive | Archive v2 schema, snapshot-level observation grouping, differential write path, v2 verification/delete bookkeeping, daily aggregate rebuilds, integrity report, v2 record reads, SQL diff/count/aggregate/provenance/consolidation-evidence APIs, large synthetic diff pagination coverage, formal timing/memory metrics, and XCTest coverage are in place; the legacy `archive_samples` mirror has been removed, the hot write path now reuses prepared SQLite statements within grouped page writes instead of reparsing the same SQL for every sample, caches repeated sample-type/source/source-revision/device/metadata id lookups within grouped writes, skips redundant visibility close/existence checks when grouped imports create a brand-new sample or payload version, skips follow-up id lookup queries when SQLite confirms new sample/sample-version inserts, reuses verification aggregates instead of rescanning them twice, drives per-type finalize queries from sample-type-filtered sample ids, processes sample rows in a lower-allocation streaming loop, batches same-page deleted-object evidence in one transaction, adds composite indexes for visibility-range and sample-uuid hot lookups, and opens SQLite connections with import-friendly busy timeout / synchronous / temp-store pragmas | Continue moving capture/Dashboard actions to archive/cache DTOs | |
|
| 30 |
+| SQLite archive | Archive v2 schema, snapshot-level observation grouping, differential write path, v2 verification/delete bookkeeping, daily aggregate rebuilds, integrity report, v2 record reads, SQL diff/count/aggregate/provenance/consolidation-evidence APIs, large synthetic diff pagination coverage, formal timing/memory metrics, XCTest coverage, and an opt-in large synthetic full-import benchmark are in place; the legacy `archive_samples` mirror has been removed, the hot write path now reuses prepared SQLite statements within grouped page writes instead of reparsing the same SQL for every sample, caches repeated sample-type/source/source-revision/device/metadata id lookups within grouped writes, skips redundant visibility close/existence checks when grouped imports create a brand-new sample or payload version, skips follow-up id lookup queries when SQLite confirms new sample/sample-version inserts, reuses verification aggregates instead of rescanning them twice, drives per-type finalize queries from sample-type-filtered sample ids, processes sample rows in a lower-allocation streaming loop, batches same-page deleted-object evidence in one transaction, adds composite indexes for visibility-range and sample-uuid hot lookups, and opens SQLite connections with import-friendly busy timeout / synchronous / temp-store pragmas | Continue moving capture/Dashboard actions to archive/cache DTOs | |
|
| 31 | 31 |
| Core Data cache | Initial programmatic Core Data model, full-cache rebuild service, read DTOs for observation/type/diff/health rows, and Dashboard archive-cache status wiring are in place | Move remaining export/report paths to cache DTOs and add targeted partial invalidation | |
| 32 | 32 |
| SwiftData cache | Exists; test builds now reset legacy prototype UI/archive/cache stores once for archive v2 so old SwiftData-only snapshots are not treated as backed-up observations. Metric timeout calibration, local device profile settings, operation logging, ContentView preview, Settings data maintenance, legacy detail/PDF views, unused legacy repair/observer services, Dashboard view/view-model access, and legacy anomaly/count-drop review have moved outside SwiftData or been removed. Remaining SwiftData imports are inventoried in [`SwiftData-Retirement-Inventory.md`](SwiftData-Retirement-Inventory.md) | Treat as disposable prototype data; stop returning/storing `HealthSnapshot` bridge handles before removing `ModelContainer` | |
| 33 | 33 |
| UI | Prototype exists; Dashboard status reads archive/cache observation rows and shows cache health, and Dashboard view/view-model code no longer imports SwiftData or reads `ModelContext`; capture/review actions now route through DTOs and snapshot ids, with the remaining legacy bridge isolated in `HealthKitService`. Snapshots and Data Types tab roots no longer import SwiftData, load Core Data cached observation rows, and open archive/cache-backed detail rows; `SnapshotArchiveDetailView` and `DataTypeArchiveDetailView` read Core Data type/diff summaries and page record drill-down through SQLite; unused legacy SwiftData snapshot/type detail and PDF views have been deleted; record-change evolution and temporal distribution screens now receive DTO rows/cache input instead of querying SwiftData directly; export preview reads the archive export API before showing/exporting JSON; simplified detail mode replaces heavy charts with summary rows on small/accessibility layouts or when enabled in Settings; visible change labels now use neutral new/missing/change-review language; Settings can now schedule a full test-database reset for the next app launch | Stop writing prototype `HealthSnapshot` bridge rows during capture/review | |
@@ -63,6 +63,7 @@ Import performance iterations and measured reports live in [`Import-Optimization |
||
| 63 | 63 |
the full HealthKit-accessible dataset. |
| 64 | 64 |
- Old prototype database compatibility is no longer required. |
| 65 | 65 |
- Initial SQLite archive tests cover open/init/reset/idempotency, snapshot-level observation grouping, legacy mirror removal, small observation diffs, large synthetic diff pagination, formal timing/memory metrics, materialized aggregate comparison, source/provenance breakdowns, consolidation-evidence labels, export preview, paged JSON output, and manifest row persistence. |
| 66 |
+- Initial SQLite archive tests now also include a small always-on synthetic large-import smoke test plus an opt-in full-import benchmark to track archive write/finalize regressions without real HealthKit data. |
|
| 66 | 67 |
- Initial Core Data cache tests cover full rebuild from SQLite and delete-cache-then-rebuild without losing archive data. |
| 67 | 68 |
|
| 68 | 69 |
## Verification Checklist |
@@ -41,6 +41,34 @@ Important interpretation: |
||
| 41 | 41 |
- progress rates shown during import may overestimate throughput if overhead is not included; |
| 42 | 42 |
- compare first snapshots only against first snapshots after a database reset. |
| 43 | 43 |
|
| 44 |
+## Benchmark Harness |
|
| 45 |
+ |
|
| 46 |
+The repo now keeps one opt-in XCTest benchmark for synthetic large full imports: |
|
| 47 |
+- test: `SQLiteLargeImportPerformanceTests.testLargeSyntheticFullImportBenchmark` |
|
| 48 |
+- enable with: |
|
| 49 |
+ `HP_ENABLE_LARGE_IMPORT_BENCHMARK=1` or |
|
| 50 |
+ `-HP_ENABLE_LARGE_IMPORT_BENCHMARK YES` |
|
| 51 |
+- optional tuning: |
|
| 52 |
+ `HP_LARGE_IMPORT_SAMPLE_COUNT=<count>`, |
|
| 53 |
+ `HP_LARGE_IMPORT_ITERATIONS=<count>`, or matching launch arguments |
|
| 54 |
+ (`-HP_LARGE_IMPORT_SAMPLE_COUNT <count>`, |
|
| 55 |
+ `-HP_LARGE_IMPORT_ITERATIONS <count>`) |
|
| 56 |
+- metrics: `XCTClockMetric` and `XCTMemoryMetric` |
|
| 57 |
+ |
|
| 58 |
+To keep the path from drifting, the same test file also contains a small |
|
| 59 |
+always-on smoke case: |
|
| 60 |
+- test: `SQLiteLargeImportPerformanceTests.testLargeSyntheticFullImportSmoke` |
|
| 61 |
+- purpose: prove the archive can still complete a synthetic first-import flow in |
|
| 62 |
+ normal CI/test runs even when the heavier benchmark stays disabled |
|
| 63 |
+ |
|
| 64 |
+Interpretation rules: |
|
| 65 |
+- this benchmark measures SQLite archive write/finalize cost, not real HealthKit |
|
| 66 |
+ fetch latency or device authorization behavior; |
|
| 67 |
+- it is the repeatable regression gate for "can the archive still ingest a very |
|
| 68 |
+ large first import with bounded time/memory?"; |
|
| 69 |
+- a background-import decision still needs one clean real-device full-import |
|
| 70 |
+ report from the large database device in addition to this synthetic benchmark. |
|
| 71 |
+ |
|
| 44 | 72 |
## Real-Device Results |
| 45 | 73 |
|
| 46 | 74 |
### 2026-06-02 Baseline Before Latest Batch/Chunk Work |
@@ -0,0 +1,202 @@ |
||
| 1 |
+import HealthKit |
|
| 2 |
+import SQLite3 |
|
| 3 |
+import XCTest |
|
| 4 |
+@testable import HealthProbe |
|
| 5 |
+ |
|
| 6 |
+final class SQLiteLargeImportPerformanceTests: XCTestCase {
|
|
| 7 |
+ private final class AsyncResultBox<T>: @unchecked Sendable {
|
|
| 8 |
+ var result: Result<T, Error>? |
|
| 9 |
+ } |
|
| 10 |
+ |
|
| 11 |
+ private var temporaryDirectory: URL! |
|
| 12 |
+ |
|
| 13 |
+ override func setUpWithError() throws {
|
|
| 14 |
+ temporaryDirectory = FileManager.default.temporaryDirectory |
|
| 15 |
+ .appending(path: "HealthProbeLargeImportTests-\(UUID().uuidString)", directoryHint: .isDirectory) |
|
| 16 |
+ try FileManager.default.createDirectory(at: temporaryDirectory, withIntermediateDirectories: true) |
|
| 17 |
+ } |
|
| 18 |
+ |
|
| 19 |
+ override func tearDownWithError() throws {
|
|
| 20 |
+ if let temporaryDirectory {
|
|
| 21 |
+ try? FileManager.default.removeItem(at: temporaryDirectory) |
|
| 22 |
+ } |
|
| 23 |
+ temporaryDirectory = nil |
|
| 24 |
+ } |
|
| 25 |
+ |
|
| 26 |
+ func testLargeSyntheticFullImportSmoke() throws {
|
|
| 27 |
+ let definitions = syntheticImportDefinitions() |
|
| 28 |
+ let samples = makeSyntheticImportSamples(totalCount: 5_000, definitions: definitions) |
|
| 29 |
+ let url = databaseURL(named: "LargeImportSmoke-\(UUID().uuidString).sqlite") |
|
| 30 |
+ |
|
| 31 |
+ try runSyntheticImport( |
|
| 32 |
+ samples: samples, |
|
| 33 |
+ definitions: definitions, |
|
| 34 |
+ databaseURL: url |
|
| 35 |
+ ) |
|
| 36 |
+ } |
|
| 37 |
+ |
|
| 38 |
+ func testLargeSyntheticFullImportBenchmark() throws {
|
|
| 39 |
+ let environment = ProcessInfo.processInfo.environment |
|
| 40 |
+ let defaults = UserDefaults.standard |
|
| 41 |
+ let isEnabled = environment["HP_ENABLE_LARGE_IMPORT_BENCHMARK"] == "1" |
|
| 42 |
+ || defaults.bool(forKey: "HP_ENABLE_LARGE_IMPORT_BENCHMARK") |
|
| 43 |
+ |
|
| 44 |
+ guard isEnabled else {
|
|
| 45 |
+ throw XCTSkip("""
|
|
| 46 |
+ Large import benchmark is opt-in. Run with environment variable \ |
|
| 47 |
+ HP_ENABLE_LARGE_IMPORT_BENCHMARK=1 or launch argument \ |
|
| 48 |
+ -HP_ENABLE_LARGE_IMPORT_BENCHMARK YES, plus optional \ |
|
| 49 |
+ HP_LARGE_IMPORT_SAMPLE_COUNT / HP_LARGE_IMPORT_ITERATIONS overrides. |
|
| 50 |
+ """) |
|
| 51 |
+ } |
|
| 52 |
+ |
|
| 53 |
+ let sampleCount = max( |
|
| 54 |
+ 1_000, |
|
| 55 |
+ Int(environment["HP_LARGE_IMPORT_SAMPLE_COUNT"] ?? "") |
|
| 56 |
+ ?? defaults.integer(forKey: "HP_LARGE_IMPORT_SAMPLE_COUNT").nonZero |
|
| 57 |
+ ?? 250_000 |
|
| 58 |
+ ) |
|
| 59 |
+ let iterationCount = max( |
|
| 60 |
+ 1, |
|
| 61 |
+ Int(environment["HP_LARGE_IMPORT_ITERATIONS"] ?? "") |
|
| 62 |
+ ?? defaults.integer(forKey: "HP_LARGE_IMPORT_ITERATIONS").nonZero |
|
| 63 |
+ ?? 1 |
|
| 64 |
+ ) |
|
| 65 |
+ let definitions = syntheticImportDefinitions() |
|
| 66 |
+ let samples = makeSyntheticImportSamples(totalCount: sampleCount, definitions: definitions) |
|
| 67 |
+ let options = XCTMeasureOptions() |
|
| 68 |
+ options.iterationCount = iterationCount |
|
| 69 |
+ |
|
| 70 |
+ measure(metrics: [XCTClockMetric(), XCTMemoryMetric()], options: options) {
|
|
| 71 |
+ let url = databaseURL(named: "LargeImport-\(UUID().uuidString).sqlite") |
|
| 72 |
+ do {
|
|
| 73 |
+ try runSyntheticImport( |
|
| 74 |
+ samples: samples, |
|
| 75 |
+ definitions: definitions, |
|
| 76 |
+ databaseURL: url |
|
| 77 |
+ ) |
|
| 78 |
+ } catch {
|
|
| 79 |
+ XCTFail("Large synthetic import benchmark failed: \(error)")
|
|
| 80 |
+ } |
|
| 81 |
+ } |
|
| 82 |
+ } |
|
| 83 |
+ |
|
| 84 |
+ private func databaseURL(named name: String) -> URL {
|
|
| 85 |
+ temporaryDirectory.appending(path: name) |
|
| 86 |
+ } |
|
| 87 |
+ |
|
| 88 |
+ private func syntheticImportDefinitions() -> [(sampleType: HKQuantityType, unit: HKUnit, baseline: Double)] {
|
|
| 89 |
+ [ |
|
| 90 |
+ (HKQuantityType.quantityType(forIdentifier: .heartRate)!, HKUnit.count().unitDivided(by: .minute()), 55), |
|
| 91 |
+ (HKQuantityType.quantityType(forIdentifier: .activeEnergyBurned)!, .kilocalorie(), 1.5), |
|
| 92 |
+ (HKQuantityType.quantityType(forIdentifier: .basalEnergyBurned)!, .kilocalorie(), 1.1), |
|
| 93 |
+ (HKQuantityType.quantityType(forIdentifier: .stepCount)!, .count(), 12), |
|
| 94 |
+ (HKQuantityType.quantityType(forIdentifier: .distanceWalkingRunning)!, .meter(), 8) |
|
| 95 |
+ ] |
|
| 96 |
+ } |
|
| 97 |
+ |
|
| 98 |
+ private func makeSyntheticImportSamples( |
|
| 99 |
+ totalCount: Int, |
|
| 100 |
+ definitions: [(sampleType: HKQuantityType, unit: HKUnit, baseline: Double)] |
|
| 101 |
+ ) -> [HKQuantitySample] {
|
|
| 102 |
+ return (0..<totalCount).map { index in
|
|
| 103 |
+ let definition = definitions[index % definitions.count] |
|
| 104 |
+ let value = definition.baseline + Double(index % 17) |
|
| 105 |
+ let start = Date(timeIntervalSince1970: 10_000 + Double(index * 60)) |
|
| 106 |
+ let end = start.addingTimeInterval(definition.sampleType.identifier == HKQuantityTypeIdentifier.heartRate.rawValue ? 5 : 60) |
|
| 107 |
+ let quantity = HKQuantity(unit: definition.unit, doubleValue: value) |
|
| 108 |
+ return HKQuantitySample(type: definition.sampleType, quantity: quantity, start: start, end: end) |
|
| 109 |
+ } |
|
| 110 |
+ } |
|
| 111 |
+ |
|
| 112 |
+ private func runSyntheticImport( |
|
| 113 |
+ samples: [HKQuantitySample], |
|
| 114 |
+ definitions: [(sampleType: HKQuantityType, unit: HKUnit, baseline: Double)], |
|
| 115 |
+ databaseURL url: URL |
|
| 116 |
+ ) throws {
|
|
| 117 |
+ let store = SQLiteHealthArchiveStore(databaseURL: url) |
|
| 118 |
+ let observedAt = Date(timeIntervalSince1970: 5_000_000) |
|
| 119 |
+ |
|
| 120 |
+ defer {
|
|
| 121 |
+ try? FileManager.default.removeItem(at: url) |
|
| 122 |
+ try? FileManager.default.removeItem(at: URL(fileURLWithPath: url.path + "-shm")) |
|
| 123 |
+ try? FileManager.default.removeItem(at: URL(fileURLWithPath: url.path + "-wal")) |
|
| 124 |
+ } |
|
| 125 |
+ |
|
| 126 |
+ try waitForArchiveOperation {
|
|
| 127 |
+ let observationID = try await store.beginObservation( |
|
| 128 |
+ observedAt: observedAt, |
|
| 129 |
+ triggerReason: "benchmarkLargeSyntheticImport", |
|
| 130 |
+ selectedTypeSetHash: "synthetic-large-import" |
|
| 131 |
+ ) |
|
| 132 |
+ _ = try await store.upsertSamples( |
|
| 133 |
+ samples, |
|
| 134 |
+ observedAt: observedAt, |
|
| 135 |
+ observationID: observationID |
|
| 136 |
+ ) |
|
| 137 |
+ for definition in definitions {
|
|
| 138 |
+ _ = try await store.markVerification( |
|
| 139 |
+ sampleType: definition.sampleType, |
|
| 140 |
+ verifiedAt: observedAt, |
|
| 141 |
+ observationID: observationID |
|
| 142 |
+ ) |
|
| 143 |
+ } |
|
| 144 |
+ try await store.finishObservation( |
|
| 145 |
+ observationID: observationID, |
|
| 146 |
+ status: "completed", |
|
| 147 |
+ endedAt: observedAt.addingTimeInterval(1) |
|
| 148 |
+ ) |
|
| 149 |
+ return () |
|
| 150 |
+ } |
|
| 151 |
+ |
|
| 152 |
+ XCTAssertEqual(try countRows(in: "samples", at: url), samples.count) |
|
| 153 |
+ XCTAssertEqual(try countRows(in: "sample_versions", at: url), samples.count) |
|
| 154 |
+ XCTAssertEqual(try countRows(in: "observation_type_runs", at: url), definitions.count) |
|
| 155 |
+ } |
|
| 156 |
+ |
|
| 157 |
+ private func waitForArchiveOperation<T>(_ operation: @escaping () async throws -> T) throws -> T {
|
|
| 158 |
+ let expectation = expectation(description: "archive operation") |
|
| 159 |
+ let box = AsyncResultBox<T>() |
|
| 160 |
+ |
|
| 161 |
+ Task {
|
|
| 162 |
+ do {
|
|
| 163 |
+ box.result = .success(try await operation()) |
|
| 164 |
+ } catch {
|
|
| 165 |
+ box.result = .failure(error) |
|
| 166 |
+ } |
|
| 167 |
+ expectation.fulfill() |
|
| 168 |
+ } |
|
| 169 |
+ |
|
| 170 |
+ wait(for: [expectation], timeout: 120) |
|
| 171 |
+ return try XCTUnwrap(box.result).get() |
|
| 172 |
+ } |
|
| 173 |
+ |
|
| 174 |
+ private func countRows(in tableName: String, at url: URL) throws -> Int {
|
|
| 175 |
+ var db: OpaquePointer? |
|
| 176 |
+ guard sqlite3_open_v2(url.path, &db, SQLITE_OPEN_READONLY | SQLITE_OPEN_FULLMUTEX, nil) == SQLITE_OK else {
|
|
| 177 |
+ sqlite3_close(db) |
|
| 178 |
+ XCTFail("Could not open benchmark database")
|
|
| 179 |
+ return 0 |
|
| 180 |
+ } |
|
| 181 |
+ defer { sqlite3_close(db) }
|
|
| 182 |
+ |
|
| 183 |
+ var statement: OpaquePointer? |
|
| 184 |
+ guard sqlite3_prepare_v2(db, "SELECT COUNT(*) FROM \(tableName)", -1, &statement, nil) == SQLITE_OK else {
|
|
| 185 |
+ sqlite3_finalize(statement) |
|
| 186 |
+ XCTFail("Could not prepare benchmark count query")
|
|
| 187 |
+ return 0 |
|
| 188 |
+ } |
|
| 189 |
+ defer { sqlite3_finalize(statement) }
|
|
| 190 |
+ |
|
| 191 |
+ guard sqlite3_step(statement) == SQLITE_ROW else {
|
|
| 192 |
+ return 0 |
|
| 193 |
+ } |
|
| 194 |
+ return Int(sqlite3_column_int(statement, 0)) |
|
| 195 |
+ } |
|
| 196 |
+} |
|
| 197 |
+ |
|
| 198 |
+private extension Int {
|
|
| 199 |
+ var nonZero: Int? {
|
|
| 200 |
+ self == 0 ? nil : self |
|
| 201 |
+ } |
|
| 202 |
+} |
|