@@ -282,11 +282,11 @@ CREATE TABLE samples ( |
||
| 282 | 282 |
UNIQUE(sample_type_id, strict_fingerprint) |
| 283 | 283 |
); |
| 284 | 284 |
|
| 285 |
-CREATE INDEX idx_samples_uuid_hash |
|
| 286 |
-ON samples(sample_uuid_hash); |
|
| 285 |
+CREATE INDEX idx_samples_type_id |
|
| 286 |
+ON samples(sample_type_id, id); |
|
| 287 | 287 |
|
| 288 |
-CREATE INDEX idx_samples_type_semantic |
|
| 289 |
-ON samples(sample_type_id, semantic_fingerprint); |
|
| 288 |
+CREATE INDEX idx_samples_type_uuid_hash |
|
| 289 |
+ON samples(sample_type_id, sample_uuid_hash); |
|
| 290 | 290 |
|
| 291 | 291 |
CREATE TABLE sample_versions ( |
| 292 | 292 |
id INTEGER PRIMARY KEY, |
@@ -309,9 +309,6 @@ CREATE TABLE sample_versions ( |
||
| 309 | 309 |
|
| 310 | 310 |
CREATE INDEX idx_sample_versions_sample |
| 311 | 311 |
ON sample_versions(sample_id); |
| 312 |
- |
|
| 313 |
-CREATE INDEX idx_sample_versions_time |
|
| 314 |
-ON sample_versions(start_date, end_date); |
|
| 315 | 312 |
``` |
| 316 | 313 |
|
| 317 | 314 |
### 5.6 Observation Events And Visibility Ranges |
@@ -347,6 +344,12 @@ CREATE TABLE sample_visibility_ranges ( |
||
| 347 | 344 |
CREATE INDEX idx_visibility_open_ranges |
| 348 | 345 |
ON sample_visibility_ranges(last_observation_id); |
| 349 | 346 |
|
| 347 |
+CREATE INDEX idx_visibility_sample_open_version |
|
| 348 |
+ON sample_visibility_ranges(sample_id, last_observation_id, version_id); |
|
| 349 |
+ |
|
| 350 |
+CREATE INDEX idx_visibility_sample_version_open |
|
| 351 |
+ON sample_visibility_ranges(sample_id, version_id, last_observation_id); |
|
| 352 |
+ |
|
| 350 | 353 |
CREATE INDEX idx_visibility_point_lookup |
| 351 | 354 |
ON sample_visibility_ranges(first_observation_id, last_observation_id); |
| 352 | 355 |
``` |
@@ -177,6 +177,51 @@ touching the archive write path. The next implementation target should reduce |
||
| 177 | 177 |
per-sample work for unchanged existing samples during verification/full-scan |
| 178 | 178 |
captures. |
| 179 | 179 |
|
| 180 |
+### 2026-06-02 First Import After Index Removal And Reset Fortification |
|
| 181 |
+ |
|
| 182 |
+Commit context: after `3dd5f48` (`Fortify scheduled test database reset`), with |
|
| 183 |
+the unused index removal from `ff59257` included. Source: user-provided |
|
| 184 |
+diagnostic report with `previousSnapshotID: none` and `isChainStart: true`. |
|
| 185 |
+ |
|
| 186 |
+This is a comparable first-import benchmark. The `a281c51` verified-event change |
|
| 187 |
+is included in the build, but it should not materially affect this run because a |
|
| 188 |
+clean first import creates brand-new samples rather than unchanged existing |
|
| 189 |
+samples. |
|
| 190 |
+ |
|
| 191 |
+| Metric | Value | |
|
| 192 |
+|--------|-------| |
|
| 193 |
+| Wall clock | 12m 43s | |
|
| 194 |
+| Summed metric total | 12m 42s | |
|
| 195 |
+| Summed fetch | 40.4s | |
|
| 196 |
+| Summed processing | 1m 37s | |
|
| 197 |
+| Summed insert | 10m 11s | |
|
| 198 |
+| Summed finalize | 10.8s | |
|
| 199 |
+| Total records | 1,579,168 | |
|
| 200 |
+| Heart Rate count | 922,450 | |
|
| 201 |
+| Heart Rate total | 8m 06s | |
|
| 202 |
+| Heart Rate fetch | 18.6s | |
|
| 203 |
+| Heart Rate processing | 56.1s | |
|
| 204 |
+| Heart Rate insert | 6m 41s | |
|
| 205 |
+| Active Energy count | 348,701 | |
|
| 206 |
+| Active Energy insert | 2m 09s | |
|
| 207 |
+| Steps insert | 21.6s | |
|
| 208 |
+| Walking + Running Distance insert | 19.2s | |
|
| 209 |
+ |
|
| 210 |
+Comparison against the previous comparable first-import run (`44d9ebd`): |
|
| 211 |
+ |
|
| 212 |
+| Metric | Previous | Current | Change | |
|
| 213 |
+|--------|----------|---------|--------| |
|
| 214 |
+| Wall clock | 17m 13s | 12m 43s | -4m 30s / -26% | |
|
| 215 |
+| Summed insert | 14m 38s | 10m 11s | -4m 27s / -30% | |
|
| 216 |
+| Heart Rate insert | 8m 59s | 6m 41s | -2m 18s / -26% | |
|
| 217 |
+| Active Energy insert | 3m 54s | 2m 09s | -1m 45s / -45% | |
|
| 218 |
+| Steps insert | 24.2s | 21.6s | -2.6s / -11% | |
|
| 219 |
+| Walking + Running Distance insert | 20.7s | 19.2s | -1.5s / -7% | |
|
| 220 |
+ |
|
| 221 |
+Conclusion: first-import reset is now clean and the unused-index removal produced |
|
| 222 |
+a large measurable gain. SQLite insert remains dominant, but the main bottleneck |
|
| 223 |
+has moved from about 14m38s to 10m11s. |
|
| 224 |
+ |
|
| 180 | 225 |
## Optimization Iterations |
| 181 | 226 |
|
| 182 | 227 |
| Date | Commit | Change | Result / Status | |
@@ -193,19 +238,20 @@ captures. |
||
| 193 | 238 |
| 2026-06-02 | `a026566` | Batched initial import archive writes across several fetched pages. | Wall clock improved from about 20m25s to 18m21s on the measured first import. | |
| 194 | 239 |
| 2026-06-02 | `c138b7b` | Increased initial import write chunk sizes. | Marginal improvement: summed insert from 15m44s to 15m24s on the next comparable run. | |
| 195 | 240 |
| 2026-06-02 | `44d9ebd` | Used direct inserts for dependent rows when `samples` creates a new sample. | Confirmed modest first-import gain: wall clock 18m30s -> 17m13s, summed insert 15m24s -> 14m38s, Heart Rate insert 9m58s -> 8m59s. | |
| 196 |
-| 2026-06-02 | `ff59257` | Removed unused `samples` indexes on global UUID hash and semantic fingerprint. | Awaiting comparable first-import report. Expected signal is lower `SummedInsertElapsed`; deleted-object lookup remains covered by `(sample_type_id, sample_uuid_hash)`. | |
|
| 241 |
+| 2026-06-02 | `ff59257` | Removed unused `samples` indexes on global UUID hash and semantic fingerprint. | Confirmed large first-import gain after clean reset: wall clock 17m13s -> 12m43s, summed insert 14m38s -> 10m11s, Heart Rate insert 8m59s -> 6m41s. Deleted-object lookup remains covered by `(sample_type_id, sample_uuid_hash)`. | |
|
| 197 | 242 |
| 2026-06-02 | pending | Captured non-chain-start full-scan report after index removal. | Not comparable for first-import performance; reveals a separate full-scan/unchanged-sample write bottleneck. | |
| 198 |
-| 2026-06-02 | pending | Stopped writing `verified` observation events for unchanged existing samples. | Awaiting comparable non-chain-start/full-scan report. Expected signal is lower `SummedInsertElapsed` and especially lower Heart Rate insert time when most rows are unchanged. | |
|
| 199 |
-| 2026-06-02 | pending | Fortified scheduled test database reset with a disk marker and extra SQLite sidecar cleanup. | Awaiting real-device confirmation that reset survives force-close/relaunch and produces a clean first-snapshot timeline. | |
|
| 243 |
+| 2026-06-02 | `a281c51` | Stopped writing `verified` observation events for unchanged existing samples. | Awaiting comparable non-chain-start/full-scan report. Expected signal is lower `SummedInsertElapsed` and especially lower Heart Rate insert time when most rows are unchanged. | |
|
| 244 |
+| 2026-06-02 | `3dd5f48` | Fortified scheduled test database reset with a disk marker and extra SQLite sidecar cleanup. | Real-device report confirmed reset produced `previousSnapshotID: none`, `isChainStart: true`, and a clean first-snapshot timeline. | |
|
| 245 |
+| 2026-06-02 | pending | Removed unused `sample_versions(start_date, end_date)` and redundant `sample_visibility_ranges(sample_id, last_observation_id)` indexes. | Awaiting comparable first-import report. Expected signal is lower insert time because first import writes one sample version and one visibility range per record. | |
|
| 200 | 246 |
|
| 201 | 247 |
## Current Diagnosis |
| 202 | 248 |
|
| 203 | 249 |
The import is no longer primarily a HealthKit fetch problem. On the latest comparable first-import measured run: |
| 204 | 250 |
|
| 205 |
-- total wall clock was 17m13s after the latest direct-insert optimization; |
|
| 206 |
-- summed fetch was only 43.0s; |
|
| 207 |
-- summed insert was 14m38s; |
|
| 208 |
-- Heart Rate alone spent 8m59s inserting. |
|
| 251 |
+- total wall clock was 12m43s after the unused-index removal and clean reset; |
|
| 252 |
+- summed fetch was only 40.4s; |
|
| 253 |
+- summed insert was 10m11s; |
|
| 254 |
+- Heart Rate alone spent 6m41s inserting. |
|
| 209 | 255 |
|
| 210 | 256 |
The likely bottleneck is per-row SQLite work: |
| 211 | 257 |
- uniqueness checks on hot tables; |
@@ -1684,7 +1684,7 @@ actor SQLiteHealthArchiveStore: HealthArchiveStore {
|
||
| 1684 | 1684 |
) |
| 1685 | 1685 |
""", db: db) |
| 1686 | 1686 |
try execute("CREATE INDEX IF NOT EXISTS idx_sample_versions_sample ON sample_versions(sample_id)", db: db)
|
| 1687 |
- try execute("CREATE INDEX IF NOT EXISTS idx_sample_versions_time ON sample_versions(start_date, end_date)", db: db)
|
|
| 1687 |
+ try execute("DROP INDEX IF EXISTS idx_sample_versions_time", db: db)
|
|
| 1688 | 1688 |
try execute("""
|
| 1689 | 1689 |
CREATE TABLE IF NOT EXISTS sample_observation_events ( |
| 1690 | 1690 |
id INTEGER PRIMARY KEY, |
@@ -1711,7 +1711,7 @@ actor SQLiteHealthArchiveStore: HealthArchiveStore {
|
||
| 1711 | 1711 |
) |
| 1712 | 1712 |
""", db: db) |
| 1713 | 1713 |
try execute("CREATE INDEX IF NOT EXISTS idx_visibility_open_ranges ON sample_visibility_ranges(last_observation_id)", db: db)
|
| 1714 |
- try execute("CREATE INDEX IF NOT EXISTS idx_visibility_sample_open ON sample_visibility_ranges(sample_id, last_observation_id)", db: db)
|
|
| 1714 |
+ try execute("DROP INDEX IF EXISTS idx_visibility_sample_open", db: db)
|
|
| 1715 | 1715 |
try execute("CREATE INDEX IF NOT EXISTS idx_visibility_sample_open_version ON sample_visibility_ranges(sample_id, last_observation_id, version_id)", db: db)
|
| 1716 | 1716 |
try execute("CREATE INDEX IF NOT EXISTS idx_visibility_sample_version_open ON sample_visibility_ranges(sample_id, version_id, last_observation_id)", db: db)
|
| 1717 | 1717 |
try execute("CREATE INDEX IF NOT EXISTS idx_visibility_point_lookup ON sample_visibility_ranges(first_observation_id, last_observation_id)", db: db)
|