Showing 3 changed files with 65 additions and 16 deletions
+10 -7
HealthProbe/Doc/02-architecture/Database-Design.md
@@ -282,11 +282,11 @@ CREATE TABLE samples (
282 282
     UNIQUE(sample_type_id, strict_fingerprint)
283 283
 );
284 284
 
285
-CREATE INDEX idx_samples_uuid_hash
286
-ON samples(sample_uuid_hash);
285
+CREATE INDEX idx_samples_type_id
286
+ON samples(sample_type_id, id);
287 287
 
288
-CREATE INDEX idx_samples_type_semantic
289
-ON samples(sample_type_id, semantic_fingerprint);
288
+CREATE INDEX idx_samples_type_uuid_hash
289
+ON samples(sample_type_id, sample_uuid_hash);
290 290
 
291 291
 CREATE TABLE sample_versions (
292 292
     id INTEGER PRIMARY KEY,
@@ -309,9 +309,6 @@ CREATE TABLE sample_versions (
309 309
 
310 310
 CREATE INDEX idx_sample_versions_sample
311 311
 ON sample_versions(sample_id);
312
-
313
-CREATE INDEX idx_sample_versions_time
314
-ON sample_versions(start_date, end_date);
315 312
 ```
316 313
 
317 314
 ### 5.6 Observation Events And Visibility Ranges
@@ -347,6 +344,12 @@ CREATE TABLE sample_visibility_ranges (
347 344
 CREATE INDEX idx_visibility_open_ranges
348 345
 ON sample_visibility_ranges(last_observation_id);
349 346
 
347
+CREATE INDEX idx_visibility_sample_open_version
348
+ON sample_visibility_ranges(sample_id, last_observation_id, version_id);
349
+
350
+CREATE INDEX idx_visibility_sample_version_open
351
+ON sample_visibility_ranges(sample_id, version_id, last_observation_id);
352
+
350 353
 CREATE INDEX idx_visibility_point_lookup
351 354
 ON sample_visibility_ranges(first_observation_id, last_observation_id);
352 355
 ```
+53 -7
HealthProbe/Doc/04-project/Import-Optimization-Log.md
@@ -177,6 +177,51 @@ touching the archive write path. The next implementation target should reduce
177 177
 per-sample work for unchanged existing samples during verification/full-scan
178 178
 captures.
179 179
 
180
+### 2026-06-02 First Import After Index Removal And Reset Fortification
181
+
182
+Commit context: after `3dd5f48` (`Fortify scheduled test database reset`), with
183
+the unused index removal from `ff59257` included. Source: user-provided
184
+diagnostic report with `previousSnapshotID: none` and `isChainStart: true`.
185
+
186
+This is a comparable first-import benchmark. The `a281c51` verified-event change
187
+is included in the build, but it should not materially affect this run because a
188
+clean first import creates brand-new samples rather than unchanged existing
189
+samples.
190
+
191
+| Metric | Value |
192
+|--------|-------|
193
+| Wall clock | 12m 43s |
194
+| Summed metric total | 12m 42s |
195
+| Summed fetch | 40.4s |
196
+| Summed processing | 1m 37s |
197
+| Summed insert | 10m 11s |
198
+| Summed finalize | 10.8s |
199
+| Total records | 1,579,168 |
200
+| Heart Rate count | 922,450 |
201
+| Heart Rate total | 8m 06s |
202
+| Heart Rate fetch | 18.6s |
203
+| Heart Rate processing | 56.1s |
204
+| Heart Rate insert | 6m 41s |
205
+| Active Energy count | 348,701 |
206
+| Active Energy insert | 2m 09s |
207
+| Steps insert | 21.6s |
208
+| Walking + Running Distance insert | 19.2s |
209
+
210
+Comparison against the previous comparable first-import run (`44d9ebd`):
211
+
212
+| Metric | Previous | Current | Change |
213
+|--------|----------|---------|--------|
214
+| Wall clock | 17m 13s | 12m 43s | -4m 30s / -26% |
215
+| Summed insert | 14m 38s | 10m 11s | -4m 27s / -30% |
216
+| Heart Rate insert | 8m 59s | 6m 41s | -2m 18s / -26% |
217
+| Active Energy insert | 3m 54s | 2m 09s | -1m 45s / -45% |
218
+| Steps insert | 24.2s | 21.6s | -2.6s / -11% |
219
+| Walking + Running Distance insert | 20.7s | 19.2s | -1.5s / -7% |
220
+
221
+Conclusion: first-import reset is now clean and the unused-index removal produced
222
+a large measurable gain. SQLite insert remains dominant, but the main bottleneck
223
+has moved from about 14m38s to 10m11s.
224
+
180 225
 ## Optimization Iterations
181 226
 
182 227
 | Date | Commit | Change | Result / Status |
@@ -193,19 +238,20 @@ captures.
193 238
 | 2026-06-02 | `a026566` | Batched initial import archive writes across several fetched pages. | Wall clock improved from about 20m25s to 18m21s on the measured first import. |
194 239
 | 2026-06-02 | `c138b7b` | Increased initial import write chunk sizes. | Marginal improvement: summed insert from 15m44s to 15m24s on the next comparable run. |
195 240
 | 2026-06-02 | `44d9ebd` | Used direct inserts for dependent rows when `samples` creates a new sample. | Confirmed modest first-import gain: wall clock 18m30s -> 17m13s, summed insert 15m24s -> 14m38s, Heart Rate insert 9m58s -> 8m59s. |
196
-| 2026-06-02 | `ff59257` | Removed unused `samples` indexes on global UUID hash and semantic fingerprint. | Awaiting comparable first-import report. Expected signal is lower `SummedInsertElapsed`; deleted-object lookup remains covered by `(sample_type_id, sample_uuid_hash)`. |
241
+| 2026-06-02 | `ff59257` | Removed unused `samples` indexes on global UUID hash and semantic fingerprint. | Confirmed large first-import gain after clean reset: wall clock 17m13s -> 12m43s, summed insert 14m38s -> 10m11s, Heart Rate insert 8m59s -> 6m41s. Deleted-object lookup remains covered by `(sample_type_id, sample_uuid_hash)`. |
197 242
 | 2026-06-02 | pending | Captured non-chain-start full-scan report after index removal. | Not comparable for first-import performance; reveals a separate full-scan/unchanged-sample write bottleneck. |
198
-| 2026-06-02 | pending | Stopped writing `verified` observation events for unchanged existing samples. | Awaiting comparable non-chain-start/full-scan report. Expected signal is lower `SummedInsertElapsed` and especially lower Heart Rate insert time when most rows are unchanged. |
199
-| 2026-06-02 | pending | Fortified scheduled test database reset with a disk marker and extra SQLite sidecar cleanup. | Awaiting real-device confirmation that reset survives force-close/relaunch and produces a clean first-snapshot timeline. |
243
+| 2026-06-02 | `a281c51` | Stopped writing `verified` observation events for unchanged existing samples. | Awaiting comparable non-chain-start/full-scan report. Expected signal is lower `SummedInsertElapsed` and especially lower Heart Rate insert time when most rows are unchanged. |
244
+| 2026-06-02 | `3dd5f48` | Fortified scheduled test database reset with a disk marker and extra SQLite sidecar cleanup. | Real-device report confirmed reset produced `previousSnapshotID: none`, `isChainStart: true`, and a clean first-snapshot timeline. |
245
+| 2026-06-02 | pending | Removed unused `sample_versions(start_date, end_date)` and redundant `sample_visibility_ranges(sample_id, last_observation_id)` indexes. | Awaiting comparable first-import report. Expected signal is lower insert time because first import writes one sample version and one visibility range per record. |
200 246
 
201 247
 ## Current Diagnosis
202 248
 
203 249
 The import is no longer primarily a HealthKit fetch problem. On the latest comparable first-import measured run:
204 250
 
205
-- total wall clock was 17m13s after the latest direct-insert optimization;
206
-- summed fetch was only 43.0s;
207
-- summed insert was 14m38s;
208
-- Heart Rate alone spent 8m59s inserting.
251
+- total wall clock was 12m43s after the unused-index removal and clean reset;
252
+- summed fetch was only 40.4s;
253
+- summed insert was 10m11s;
254
+- Heart Rate alone spent 6m41s inserting.
209 255
 
210 256
 The likely bottleneck is per-row SQLite work:
211 257
 - uniqueness checks on hot tables;
+2 -2
HealthProbe/Services/SQLiteHealthArchiveStore.swift
@@ -1684,7 +1684,7 @@ actor SQLiteHealthArchiveStore: HealthArchiveStore {
1684 1684
         )
1685 1685
         """, db: db)
1686 1686
         try execute("CREATE INDEX IF NOT EXISTS idx_sample_versions_sample ON sample_versions(sample_id)", db: db)
1687
-        try execute("CREATE INDEX IF NOT EXISTS idx_sample_versions_time ON sample_versions(start_date, end_date)", db: db)
1687
+        try execute("DROP INDEX IF EXISTS idx_sample_versions_time", db: db)
1688 1688
         try execute("""
1689 1689
         CREATE TABLE IF NOT EXISTS sample_observation_events (
1690 1690
             id INTEGER PRIMARY KEY,
@@ -1711,7 +1711,7 @@ actor SQLiteHealthArchiveStore: HealthArchiveStore {
1711 1711
         )
1712 1712
         """, db: db)
1713 1713
         try execute("CREATE INDEX IF NOT EXISTS idx_visibility_open_ranges ON sample_visibility_ranges(last_observation_id)", db: db)
1714
-        try execute("CREATE INDEX IF NOT EXISTS idx_visibility_sample_open ON sample_visibility_ranges(sample_id, last_observation_id)", db: db)
1714
+        try execute("DROP INDEX IF EXISTS idx_visibility_sample_open", db: db)
1715 1715
         try execute("CREATE INDEX IF NOT EXISTS idx_visibility_sample_open_version ON sample_visibility_ranges(sample_id, last_observation_id, version_id)", db: db)
1716 1716
         try execute("CREATE INDEX IF NOT EXISTS idx_visibility_sample_version_open ON sample_visibility_ranges(sample_id, version_id, last_observation_id)", db: db)
1717 1717
         try execute("CREATE INDEX IF NOT EXISTS idx_visibility_point_lookup ON sample_visibility_ranges(first_observation_id, last_observation_id)", db: db)