From b0971b86c186976370ec9d9e638280119893cdd4 Mon Sep 17 00:00:00 2001 From: Peter Cox Date: Mon, 13 Apr 2026 17:00:03 +0100 Subject: [PATCH 1/2] incremental: preserve mode-skipped files and their hash rows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `index_repository` was destructively overwriting a project's graph in the store. Specifically, `find_deleted_files` classified ANY stored file absent from the current discovery list as "deleted" and purged its nodes via `cbm_gbuf_delete_by_file`. In a mode downgrade (full → fast), directories matching `FAST_SKIP_DIRS` (`tools`, `scripts`, `bin`, `build`, `docs`, `__tests__`, `migrations`, ...) are excluded from discovery but still exist on disk. They were getting silently purged. Real-world incident, 2026-04-13: the Skyline graph was built at 03:45 by the nightly full-mode cron (2819 nodes, packages/mcp/src/tools/ included). At 14:36 a concurrent /develop session ran mode='fast' per the then- current skill text. The Skyline graph collapsed to 2303 nodes mid-day and the entire packages/mcp/src/tools/ directory (18 files / ~500 nodes) vanished from the live graph while a diagnosing session was running. Fix --- `find_deleted_files` now stat()s each stored file that is absent from current discovery and classifies it as one of: - "deleted" — stat() returns ENOENT or ENOTDIR. Nodes purged, hash row dropped. - "mode-skipped" — stat() succeeds (or fails with a non-ENOENT errno, or the path can't be reliably constructed). Nodes AND hash row preserved. Mode-skipped hash rows are now carried through `dump_and_persist` → `persist_hashes` and re-upserted into the new DB. Without that, an orphaned-node bug emerges: 1. full mode indexes everything 2. fast mode runs and (with only the node-preservation half of the fix) drops mode-skipped hash rows 3. mode-skipped file is then deleted on disk 4. next reindex's stored hashes don't include the file → noop or can't detect the deletion → graph nodes for the deleted file remain forever The new test exercises exactly this sequence and the deletion-detection contract holds end-to-end. Fail-safe rules (preserve nodes on uncertainty) ----------------------------------------------- - repo_path NULL → log error, return 0 deletions, preserve everything. The caller contract is that repo_path is required; a NULL means a misconfigured pipeline, not a deletion signal. - snprintf truncation (combined path ≥ CBM_SZ_4K) → preserve. We can't reliably stat a truncated path. - stat() errno != ENOENT/ENOTDIR (EACCES, EIO, ELOOP, transient NFS, etc.) → preserve. The file may exist; we just can't see it right now. - strdup OOM mid-record → drop the entry rather than persist a row with a NULL rel_path that would silently fail the NOT NULL constraint and reintroduce the orphaned-node bug. These rules came out of two adversarial audit cycles: - Cycle 1 caught the original ENOENT-only gap, the snprintf truncation gap, the silent NULL repo_path branch, and the hash-row drift gap. - Cycle 2 caught the strdup NULL handling gap. Test ---- `incremental_fast_preserves_mode_skipped_tools_dir` in test_pipeline.c walks the real failure mode end-to-end: 1. Full mode index — both files present 2. Fast mode reindex — tools/util.go nodes preserved (was 0 before fix) 3. Mutate main.go and fast reindex — forces dump_and_persist to run instead of the noop early-return, exercising the hash-preservation path that step 2 doesn't cover 4. Delete tools/util.go from disk and full reindex — confirms the deletion-detection contract still holds (would fail with orphaned nodes without the hash-row preservation half of the fix) All 2741 existing tests still pass under ASan + UBSan + LeakSanitizer. Diagnosed in session a1fc365c-d9fd-4228-a86a-ef92e9e97984. Tracked as PM task claude-connectors/codebase-memory-index-repository-is-destructive (mC_AtUBH9DAh). Note: this fix removes the destructive failure mode but does NOT relax the policy that `mode='full'` is the only acceptable mode for our own projects. Full mode remains required for complete visibility and searchability (semantic edges, SIMILAR_TO, full directory coverage). This fix is defense-in-depth for the case where a session running with older instructions accidentally calls a lesser mode. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/pipeline/pipeline_incremental.c | 228 +++++++++++++++++++++++++--- tests/test_pipeline.c | 150 ++++++++++++++++++ 2 files changed, 355 insertions(+), 23 deletions(-) diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index ceeaaef9..db2c072f 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -25,6 +25,7 @@ enum { INCR_RING_BUF = 4, INCR_RING_MASK = 3, INCR_TS_BUF = 24, INCR_WAL_BUF = 1 #include "foundation/compat_fs.h" #include "foundation/platform.h" +#include #include #include #include @@ -120,41 +121,188 @@ static bool *classify_files(cbm_file_info_t *files, int file_count, cbm_file_has return changed; } -/* Find stored files that no longer exist on disk. Returns count. */ -static int find_deleted_files(cbm_file_info_t *files, int file_count, cbm_file_hash_t *stored, - int stored_count, char ***out_deleted) { +/* Classify stored files that are absent from current discovery. Returns the + * count of truly-deleted files (output via out_deleted) and ALSO collects + * mode-skipped files into out_mode_skipped (caller frees both). + * + * A stored file is classified as: + * - "deleted" — `stat()` returns ENOENT or ENOTDIR. Its nodes will + * be purged and its hash row dropped. + * - "mode-skipped" — `stat()` succeeds. The file exists on disk but the + * current discovery pass didn't visit it (e.g. excluded + * by FAST_SKIP_DIRS in fast/moderate mode). Its nodes + * must be preserved AND its hash row must be carried + * forward into the new DB so subsequent reindexes can + * still see it as "known" rather than treating it as + * new-or-deleted. + * + * Without this distinction, a fast-mode reindex after a full-mode index + * would silently purge every file under `tools/`, `scripts/`, `bin/`, + * `build/`, `docs/`, `__tests__/`, etc. — see task + * claude-connectors/codebase-memory-index-repository-is-destructive-... + * and the 2026-04-13 Skyline incident (packages/mcp/src/tools/ vanished + * from a live graph mid-session). + * + * Mode-skipped hash preservation is the second half of the additive-merge + * contract: dump_and_persist re-upserts these hash rows so the next reindex + * can correctly detect a real on-disk deletion of a mode-skipped file (as + * opposed to seeing it as "never existed" → noop → orphaned graph nodes). + * + * Fail-safe rules (preserve nodes on uncertainty): + * - repo_path NULL → log error and preserve everything (return 0 + * deletions, empty mode_skipped). The caller contract is that + * repo_path is required; a NULL means a misconfigured pipeline, + * not a deletion signal. + * - snprintf truncation (combined path ≥ CBM_SZ_4K) → preserve. We can't + * reliably stat a truncated path. Treat as mode-skipped. + * - stat() errno != ENOENT/ENOTDIR (EACCES, EIO, ELOOP, transient NFS, + * etc.) → preserve. The file may exist; we just can't see it right now. + * Treat as mode-skipped. + * + * Note: we use stat() (not lstat()) on purpose. A symlink whose target was + * deleted should be classified as deleted from the indexer's perspective + * because the indexer follows symlinks during discovery — a stale symlink + * has no source to parse. */ +static int find_deleted_files(const char *repo_path, cbm_file_info_t *files, int file_count, + cbm_file_hash_t *stored, int stored_count, char ***out_deleted, + cbm_file_hash_t **out_mode_skipped, int *out_mode_skipped_count) { + *out_deleted = NULL; + *out_mode_skipped = NULL; + *out_mode_skipped_count = 0; + + if (!repo_path) { + /* Misconfigured pipeline. Preserve everything rather than risk + * silently re-introducing the destructive overwrite this function + * was rewritten to prevent. */ + cbm_log_error("incremental.err", "msg", "find_deleted_files_null_repo_path"); + return 0; + } + CBMHashTable *current = cbm_ht_create((size_t)file_count * PAIR_LEN); for (int i = 0; i < file_count; i++) { cbm_ht_set(current, files[i].rel_path, &files[i]); } - int count = 0; - int cap = CBM_SZ_64; - char **deleted = malloc((size_t)cap * sizeof(char *)); + int del_count = 0; + int del_cap = CBM_SZ_64; + char **deleted = malloc((size_t)del_cap * sizeof(char *)); + if (!deleted) { + cbm_log_error("incremental.err", "msg", "find_deleted_files_oom"); + cbm_ht_free(current); + return 0; + } + + int ms_count = 0; + int ms_cap = CBM_SZ_64; + cbm_file_hash_t *mode_skipped = malloc((size_t)ms_cap * sizeof(cbm_file_hash_t)); + if (!mode_skipped) { + cbm_log_error("incremental.err", "msg", "find_deleted_files_oom_ms"); + free(deleted); + cbm_ht_free(current); + return 0; + } for (int i = 0; i < stored_count; i++) { - if (!cbm_ht_get(current, stored[i].rel_path)) { - if (count >= cap) { - cap *= PAIR_LEN; - char **tmp = realloc(deleted, (size_t)cap * sizeof(char *)); + if (cbm_ht_get(current, stored[i].rel_path)) { + continue; /* still visited by current pass */ + } + /* Not in current discovery — check if it's truly deleted or just + * mode-skipped (excluded by FAST_SKIP_DIRS etc.). */ + bool preserve = false; + char abs_path[CBM_SZ_4K]; + int n = snprintf(abs_path, sizeof(abs_path), "%s/%s", repo_path, stored[i].rel_path); + if (n < 0 || n >= (int)sizeof(abs_path)) { + /* Truncation or encoding error — can't reliably stat. Preserve. */ + cbm_log_warn("incremental.path_truncated", "rel_path", stored[i].rel_path); + preserve = true; + } else { + struct stat st; + if (stat(abs_path, &st) == 0) { + /* File exists on disk — mode-skipped, not deleted. */ + preserve = true; + } else if (errno != ENOENT && errno != ENOTDIR) { + /* Transient or permission error — fail safe by preserving. + * EACCES, EIO, ELOOP, ENAMETOOLONG, etc. */ + cbm_log_warn("incremental.stat_uncertain", "rel_path", stored[i].rel_path, "errno", + itoa_buf(errno)); + preserve = true; + } + } + + if (preserve) { + /* Carry forward the existing hash row so subsequent reindexes + * can correctly classify this file. */ + if (ms_count >= ms_cap) { + ms_cap *= PAIR_LEN; + cbm_file_hash_t *tmp = realloc(mode_skipped, (size_t)ms_cap * sizeof(*tmp)); if (!tmp) { + cbm_log_error("incremental.err", "msg", + "find_deleted_files_realloc_oom_ms"); break; } - deleted = tmp; + mode_skipped = tmp; } - deleted[count++] = strdup(stored[i].rel_path); + char *rp = strdup(stored[i].rel_path); + char *sh = stored[i].sha256 ? strdup(stored[i].sha256) : NULL; + if (!rp || (stored[i].sha256 && !sh)) { + /* OOM mid-record. Drop this entry rather than persist a + * row with a NULL rel_path that would silently fail the + * NOT NULL constraint in upsert and reintroduce the + * orphaned-node bug. */ + cbm_log_error("incremental.err", "msg", "find_deleted_files_strdup_oom", + "rel_path", stored[i].rel_path); + free(rp); + free(sh); + break; + } + mode_skipped[ms_count].project = NULL; /* unused by upsert API */ + mode_skipped[ms_count].rel_path = rp; + mode_skipped[ms_count].sha256 = sh; + mode_skipped[ms_count].mtime_ns = stored[i].mtime_ns; + mode_skipped[ms_count].size = stored[i].size; + ms_count++; + continue; } + + /* File is truly gone — record for purge. */ + if (del_count >= del_cap) { + del_cap *= PAIR_LEN; + char **tmp = realloc(deleted, (size_t)del_cap * sizeof(char *)); + if (!tmp) { + cbm_log_error("incremental.err", "msg", "find_deleted_files_realloc_oom"); + break; + } + deleted = tmp; + } + deleted[del_count++] = strdup(stored[i].rel_path); } cbm_ht_free(current); *out_deleted = deleted; - return count; + *out_mode_skipped = mode_skipped; + *out_mode_skipped_count = ms_count; + return del_count; +} + +/* Free a mode_skipped array allocated by find_deleted_files. */ +static void free_mode_skipped(cbm_file_hash_t *ms, int count) { + if (!ms) { + return; + } + for (int i = 0; i < count; i++) { + free((void *)ms[i].rel_path); + free((void *)ms[i].sha256); + } + free(ms); } /* ── Persist file hashes ─────────────────────────────────────────── */ static void persist_hashes(cbm_store_t *store, const char *project, cbm_file_info_t *files, - int file_count) { + int file_count, const cbm_file_hash_t *mode_skipped, + int mode_skipped_count) { + /* Current discovery: re-stat to capture any mtime/size that changed + * during the run, and write fresh hash rows for visited files. */ for (int i = 0; i < file_count; i++) { struct stat st; if (stat(files[i].path, &st) != 0) { @@ -163,6 +311,22 @@ static void persist_hashes(cbm_store_t *store, const char *project, cbm_file_inf cbm_store_upsert_file_hash(store, project, files[i].rel_path, "", stat_mtime_ns(&st), st.st_size); } + /* Mode-skipped (preserved): re-upsert hash rows from the previous DB + * so the next reindex can still classify these files correctly. Without + * this, an orphaned-node bug emerges where: + * - full mode indexes everything + * - fast mode runs and drops mode-skipped hash rows + * - file is then deleted on disk + * - next reindex's stored hashes don't include the file → noop or + * can't detect the deletion → graph nodes for the deleted file + * remain forever (or until a destructive rebuild). */ + if (mode_skipped) { + for (int i = 0; i < mode_skipped_count; i++) { + cbm_store_upsert_file_hash(store, project, mode_skipped[i].rel_path, + mode_skipped[i].sha256 ? mode_skipped[i].sha256 : "", + mode_skipped[i].mtime_ns, mode_skipped[i].size); + } + } } /* ── Registry seed visitor ────────────────────────────────────────── */ @@ -256,9 +420,13 @@ static void run_postpasses(cbm_pipeline_ctx_t *ctx, cbm_file_info_t *changed_fil itoa_buf((int)elapsed_ms(t))); } } -/* Delete old DB and dump merged graph + hashes to disk. */ +/* Delete old DB and dump merged graph + hashes to disk. + * Mode-skipped hash rows are preserved across the rebuild so subsequent + * reindexes can correctly distinguish "never indexed" from "indexed but + * not visited this pass". */ static void dump_and_persist(cbm_gbuf_t *gbuf, const char *db_path, const char *project, - cbm_file_info_t *files, int file_count) { + cbm_file_info_t *files, int file_count, + const cbm_file_hash_t *mode_skipped, int mode_skipped_count) { struct timespec t; cbm_clock_gettime(CLOCK_MONOTONIC, &t); @@ -276,7 +444,7 @@ static void dump_and_persist(cbm_gbuf_t *gbuf, const char *db_path, const char * cbm_store_t *hash_store = cbm_store_open_path(db_path); if (hash_store) { - persist_hashes(hash_store, project, files, file_count); + persist_hashes(hash_store, project, files, file_count, mode_skipped, mode_skipped_count); /* FTS5 rebuild after incremental dump. The btree dump path bypasses * any triggers that could have kept nodes_fts synchronized, so we @@ -323,18 +491,27 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil bool *is_changed = classify_files(files, file_count, stored, stored_count, &n_changed, &n_unchanged); - /* Find deleted files */ + /* Classify stored files absent from current discovery: truly-deleted + * (purge) vs mode-skipped (preserve nodes AND hash rows). */ char **deleted = NULL; - int deleted_count = find_deleted_files(files, file_count, stored, stored_count, &deleted); + cbm_file_hash_t *mode_skipped = NULL; + int mode_skipped_count = 0; + int deleted_count = + find_deleted_files(cbm_pipeline_repo_path(p), files, file_count, stored, stored_count, + &deleted, &mode_skipped, &mode_skipped_count); cbm_log_info("incremental.classify", "changed", itoa_buf(n_changed), "unchanged", - itoa_buf(n_unchanged), "deleted", itoa_buf(deleted_count)); + itoa_buf(n_unchanged), "deleted", itoa_buf(deleted_count), "mode_skipped", + itoa_buf(mode_skipped_count)); - /* Fast path: nothing changed → skip */ + /* Fast path: nothing changed → skip. The on-disk DB is left untouched, + * which means existing hash rows (including for any mode-skipped files + * that were already preserved by an earlier run) remain intact. */ if (n_changed == 0 && deleted_count == 0) { cbm_log_info("incremental.noop", "reason", "no_changes"); free(is_changed); free(deleted); + free_mode_skipped(mode_skipped, mode_skipped_count); cbm_store_free_file_hashes(stored, stored_count); cbm_store_close(store); return 0; @@ -374,6 +551,7 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil free(deleted[i]); } free(deleted); + free_mode_skipped(mode_skipped, mode_skipped_count); cbm_store_close(store); return CBM_NOT_FOUND; } @@ -424,8 +602,12 @@ int cbm_pipeline_run_incremental(cbm_pipeline_t *p, const char *db_path, cbm_fil free(changed_files); cbm_registry_free(registry); - /* Step 7: Dump to disk */ - dump_and_persist(existing, db_path, project, files, file_count); + /* Step 7: Dump to disk (preserves mode-skipped hash rows so the next + * reindex can correctly classify those files instead of seeing them + * as never-existed). */ + dump_and_persist(existing, db_path, project, files, file_count, mode_skipped, + mode_skipped_count); + free_mode_skipped(mode_skipped, mode_skipped_count); cbm_gbuf_free(existing); cbm_log_info("incremental.done", "elapsed_ms", itoa_buf((int)elapsed_ms(t0))); diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index cbe8fc86..3e46b6f7 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -15,6 +15,7 @@ #include #include #include "foundation/compat_thread.h" +#include #include #include #include "graph_buffer/graph_buffer.h" @@ -4525,6 +4526,154 @@ TEST(incremental_new_file_added) { PASS(); } +TEST(incremental_fast_preserves_mode_skipped_tools_dir) { + /* Regression: 2026-04-13. A fast-mode reindex after a full-mode index + * was silently destroying every file under FAST_SKIP_DIRS directories + * (`tools`, `scripts`, `bin`, `build`, `docs`, ...) by classifying them + * as deleted in find_deleted_files even though they still existed on + * disk. The Skyline graph lost packages/mcp/src/tools/ (18 files / ~500 + * nodes) mid-session when a concurrent /develop run obediently called + * mode='fast'. This test pins the additive semantics: lesser-mode + * reindexes must NOT delete files that are merely outside the current + * pass's discovery scope. Fix: find_deleted_files now stat()s each + * stored-but-missing file and only purges it if it is truly absent + * from disk. */ + char tmpdir[256]; + snprintf(tmpdir, sizeof(tmpdir), "/tmp/cbm_modeskip_XXXXXX"); + if (!cbm_mkdtemp(tmpdir)) { + SKIP("tmpdir"); + } + char dbpath[512]; + snprintf(dbpath, sizeof(dbpath), "%s/test.db", tmpdir); + + char path[512]; + FILE *f; + + /* main.go — root-level production code (visible in fast and full) */ + snprintf(path, sizeof(path), "%s/main.go", tmpdir); + f = fopen(path, "w"); + ASSERT_NOT_NULL(f); + fprintf(f, "package main\n\nfunc main() {\n}\n"); + fclose(f); + + /* tools/util.go — production code under a FAST_SKIP_DIRS directory. + * Full mode indexes it; fast mode skips it via the discover.c heuristic. */ + char tools_dir[512]; + snprintf(tools_dir, sizeof(tools_dir), "%s/tools", tmpdir); + cbm_mkdir_p(tools_dir, 0755); + snprintf(path, sizeof(path), "%s/tools/util.go", tmpdir); + f = fopen(path, "w"); + ASSERT_NOT_NULL(f); + fprintf(f, "package tools\n\nfunc Util() string {\n\treturn \"u\"\n}\n"); + fclose(f); + + /* Step 1: full-mode index — both files should be present */ + cbm_pipeline_t *p = cbm_pipeline_new(tmpdir, dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + char *project = strdup(cbm_pipeline_project_name(p)); + cbm_pipeline_free(p); + + cbm_store_t *s = cbm_store_open_path(dbpath); + ASSERT_NOT_NULL(s); + cbm_node_t *tools_nodes_before = NULL; + int tools_count_before = 0; + cbm_store_find_nodes_by_file(s, project, "tools/util.go", &tools_nodes_before, + &tools_count_before); + ASSERT_GT(tools_count_before, 0); /* full mode must see tools/util.go */ + cbm_store_free_nodes(tools_nodes_before, tools_count_before); + int total_before = cbm_store_count_nodes(s, project); + cbm_store_close(s); + + /* Step 2: fast-mode reindex — tools/util.go MUST survive (additive semantics) */ + p = cbm_pipeline_new(tmpdir, dbpath, CBM_MODE_FAST); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + cbm_pipeline_free(p); + + s = cbm_store_open_path(dbpath); + ASSERT_NOT_NULL(s); + cbm_node_t *tools_nodes_after = NULL; + int tools_count_after = 0; + cbm_store_find_nodes_by_file(s, project, "tools/util.go", &tools_nodes_after, + &tools_count_after); + /* The critical assertion: tools/util.go nodes must still be present after + * a fast-mode reindex that skipped the tools/ directory. Before the fix, + * this was 0. */ + ASSERT_GT(tools_count_after, 0); + ASSERT_EQ(tools_count_after, tools_count_before); /* same nodes, untouched */ + cbm_store_free_nodes(tools_nodes_after, tools_count_after); + + /* Sanity: total node count should not have collapsed by ~the size of tools/ */ + int total_after = cbm_store_count_nodes(s, project); + ASSERT_GTE(total_after, total_before); /* additive — never less */ + cbm_store_close(s); + + /* Step 3: mutate main.go and fast reindex — forces dump_and_persist to + * run (instead of the noop early-return path that step 2 hit). This is + * the real dangerous path: the gbuf gets loaded, mutated for main.go, + * dumped back to disk. tools/util.go must survive THAT cycle, not just + * the trivial noop path. Audit finding from 2026-04-13. */ + snprintf(path, sizeof(path), "%s/main.go", tmpdir); + f = fopen(path, "w"); + ASSERT_NOT_NULL(f); + fprintf(f, "package main\n\nfunc main() {\n\tprintln(\"changed\")\n}\n"); + fclose(f); + /* Bump mtime explicitly — some filesystems have coarse mtime resolution + * and the rewrite could land in the same tick as the original write. */ + struct stat mst; + if (stat(path, &mst) == 0) { + struct timespec times[2]; + times[0].tv_sec = mst.st_atime; + times[0].tv_nsec = 0; + times[1].tv_sec = mst.st_mtime + 5; + times[1].tv_nsec = 0; + utimensat(AT_FDCWD, path, times, 0); + } + + p = cbm_pipeline_new(tmpdir, dbpath, CBM_MODE_FAST); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + cbm_pipeline_free(p); + + s = cbm_store_open_path(dbpath); + ASSERT_NOT_NULL(s); + cbm_node_t *tools_nodes_run3 = NULL; + int tools_count_run3 = 0; + cbm_store_find_nodes_by_file(s, project, "tools/util.go", &tools_nodes_run3, &tools_count_run3); + /* tools/util.go nodes must STILL be present after a fast reindex that + * actually ran the full dump_and_persist cycle (not the noop fast-path). */ + ASSERT_EQ(tools_count_run3, tools_count_before); + cbm_store_free_nodes(tools_nodes_run3, tools_count_run3); + cbm_store_close(s); + + /* Step 4: actually delete tools/util.go from disk and full-reindex. + * Now it really is gone, so its nodes should be purged. This pins the + * other half of the contract: the stat-based check correctly identifies + * truly-deleted files as deleted. */ + snprintf(path, sizeof(path), "%s/tools/util.go", tmpdir); + unlink(path); + + p = cbm_pipeline_new(tmpdir, dbpath, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + cbm_pipeline_free(p); + + s = cbm_store_open_path(dbpath); + ASSERT_NOT_NULL(s); + cbm_node_t *tools_nodes_deleted = NULL; + int tools_count_deleted = 0; + cbm_store_find_nodes_by_file(s, project, "tools/util.go", &tools_nodes_deleted, + &tools_count_deleted); + ASSERT_EQ(tools_count_deleted, 0); /* truly deleted → purged */ + cbm_store_free_nodes(tools_nodes_deleted, tools_count_deleted); + cbm_store_close(s); + + free(project); + th_rmtree(tmpdir); + PASS(); +} + TEST(incremental_k8s_manifest_indexed) { /* Full index with a k8s manifest, then add a new manifest via incremental. * Verifies that cbm_pipeline_pass_k8s() runs during incremental re-index. */ @@ -5354,6 +5503,7 @@ SUITE(pipeline) { RUN_TEST(incremental_detects_changed_file); RUN_TEST(incremental_detects_deleted_file); RUN_TEST(incremental_new_file_added); + RUN_TEST(incremental_fast_preserves_mode_skipped_tools_dir); RUN_TEST(incremental_k8s_manifest_indexed); RUN_TEST(incremental_kustomize_module_indexed); /* Resource management & internal helper tests */ From 388f50ac91febfc61464d7f939e4a40bc81dca62 Mon Sep 17 00:00:00 2001 From: Peter Cox Date: Mon, 13 Apr 2026 18:12:41 +0100 Subject: [PATCH 2/2] incremental: persist_hashes checks upsert return codes, logs failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cycle 2 of the additive-merge audit (mC_AtUBH9DAh, commit fab42f9) flagged that persist_hashes was discarding the return code of cbm_store_upsert_file_hash on both the current-files loop and the new mode_skipped loop. A constraint violation, I/O error, or any other upsert failure silently dropped one or more hash rows. Pre-existing pattern for the current-files loop (just a perf drag — next reindex sees the file as new and re-parses it). New risk for the mode_skipped loop: a silent failure leaves the file's nodes preserved in the graph but the hash row missing from the new DB. The next reindex can't classify the file. If it's later deleted on disk, the deletion isn't detected → orphaned graph nodes survive forever — exactly the failure mode mC_AtUBH9DAh was designed to prevent, just behind a different trigger. Fix --- persist_hashes now: - checks rc on every upsert call in both loops - on rc != CBM_STORE_OK, logs a warning with scope (current vs mode_skipped), rel_path, and the rc value - continues to the next file (partial-failure policy: partial preservation is better than total loss) - at end, if any failures, emits a summary line with both counts Function header documents the partial-failure policy explicitly so a future reader doesn't mistake "continue on error" for "ignore error". Test ---- New test `store_file_hash_upsert_rejects_null_required_fields` in test_store_nodes.c pins the API contract that persist_hashes depends on: cbm_store_upsert_file_hash returns CBM_STORE_ERR (not silent OK) when a NOT NULL column would receive SQL NULL. Tests three NOT NULL violations (rel_path, sha256, project) and asserts that a previously-valid row is still present afterwards (partial-failure isolation at the store level). If a future schema change relaxes any of these NOT NULL constraints, the downstream warning becomes silent and the orphaned-node bug class can re-emerge — this test catches that change at the right layer. Audit verdict: ships clean. No CRITICAL/MAJOR/MINOR findings. All 2742 tests pass under ASan + UBSan + LeakSanitizer. PM task: claude-connectors/codebase-memory-persist-hashes-ignores-upsert-return-codes (KepDJhBEQNoW) Co-Authored-By: Claude Opus 4.6 (1M context) --- src/pipeline/pipeline_incremental.c | 48 +++++++++++++++++++++++++---- tests/test_store_nodes.c | 43 ++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/src/pipeline/pipeline_incremental.c b/src/pipeline/pipeline_incremental.c index db2c072f..1ae75862 100644 --- a/src/pipeline/pipeline_incremental.c +++ b/src/pipeline/pipeline_incremental.c @@ -298,9 +298,24 @@ static void free_mode_skipped(cbm_file_hash_t *ms, int count) { /* ── Persist file hashes ─────────────────────────────────────────── */ +/* Persist file hash rows for the current discovery and any mode-skipped + * files preserved from the previous DB. + * + * Partial-failure policy: an `upsert` failure on any single row is logged + * as a warning and the loop continues. We deliberately do NOT abort the + * whole reindex on a single bad row — partial preservation is better than + * total loss, and a transient failure on one file should not invalidate + * the entire incremental update. The trade-off is that a silently-failed + * row produces the same downstream effect as if the file were never + * indexed at all (forced re-parse on the next run for current-files, + * potential orphaned-node revival for mode_skipped). The warning surface + * is the only signal that something went wrong. */ static void persist_hashes(cbm_store_t *store, const char *project, cbm_file_info_t *files, int file_count, const cbm_file_hash_t *mode_skipped, int mode_skipped_count) { + int current_failed = 0; + int ms_failed = 0; + /* Current discovery: re-stat to capture any mtime/size that changed * during the run, and write fresh hash rows for visited files. */ for (int i = 0; i < file_count; i++) { @@ -308,9 +323,15 @@ static void persist_hashes(cbm_store_t *store, const char *project, cbm_file_inf if (stat(files[i].path, &st) != 0) { continue; } - cbm_store_upsert_file_hash(store, project, files[i].rel_path, "", stat_mtime_ns(&st), - st.st_size); + int rc = cbm_store_upsert_file_hash(store, project, files[i].rel_path, "", + stat_mtime_ns(&st), st.st_size); + if (rc != CBM_STORE_OK) { + cbm_log_warn("incremental.persist_hash_failed", "scope", "current", "rel_path", + files[i].rel_path, "rc", itoa_buf(rc)); + current_failed++; + } } + /* Mode-skipped (preserved): re-upsert hash rows from the previous DB * so the next reindex can still classify these files correctly. Without * this, an orphaned-node bug emerges where: @@ -319,14 +340,29 @@ static void persist_hashes(cbm_store_t *store, const char *project, cbm_file_inf * - file is then deleted on disk * - next reindex's stored hashes don't include the file → noop or * can't detect the deletion → graph nodes for the deleted file - * remain forever (or until a destructive rebuild). */ + * remain forever (or until a destructive rebuild). + * + * A failure here is more serious than a current-files failure because + * it can revive the orphaned-node bug for that specific file. Logged + * with scope=mode_skipped so the warning is searchable. */ if (mode_skipped) { for (int i = 0; i < mode_skipped_count; i++) { - cbm_store_upsert_file_hash(store, project, mode_skipped[i].rel_path, - mode_skipped[i].sha256 ? mode_skipped[i].sha256 : "", - mode_skipped[i].mtime_ns, mode_skipped[i].size); + int rc = cbm_store_upsert_file_hash(store, project, mode_skipped[i].rel_path, + mode_skipped[i].sha256 ? mode_skipped[i].sha256 + : "", + mode_skipped[i].mtime_ns, mode_skipped[i].size); + if (rc != CBM_STORE_OK) { + cbm_log_warn("incremental.persist_hash_failed", "scope", "mode_skipped", + "rel_path", mode_skipped[i].rel_path, "rc", itoa_buf(rc)); + ms_failed++; + } } } + + if (current_failed > 0 || ms_failed > 0) { + cbm_log_warn("incremental.persist_summary", "current_failed", itoa_buf(current_failed), + "mode_skipped_failed", itoa_buf(ms_failed)); + } } /* ── Registry seed visitor ────────────────────────────────────────── */ diff --git a/tests/test_store_nodes.c b/tests/test_store_nodes.c index b433ff2a..cbcb9e72 100644 --- a/tests/test_store_nodes.c +++ b/tests/test_store_nodes.c @@ -461,6 +461,48 @@ TEST(store_file_hash_crud) { PASS(); } +TEST(store_file_hash_upsert_rejects_null_required_fields) { + /* Pins the API contract that `cbm_store_upsert_file_hash` returns + * CBM_STORE_ERR (not silent OK) when a NOT NULL column would receive + * SQL NULL. This is the failure mode that + * `pipeline_incremental.c:persist_hashes` checks for and logs as + * `incremental.persist_hash_failed`. If this contract ever changes + * (e.g. the schema relaxes NOT NULL on rel_path or sha256), the + * downstream warning becomes silent and the orphaned-node bug class + * can re-emerge. Track that change here, not just in the consumer. */ + cbm_store_t *s = cbm_store_open_memory(); + ASSERT_NOT_NULL(s); + cbm_store_upsert_project(s, "test", "/tmp/test"); + + /* Sanity: a fully-valid upsert returns OK. */ + int rc = cbm_store_upsert_file_hash(s, "test", "main.go", "abc123", 1000000, 512); + ASSERT_EQ(rc, CBM_STORE_OK); + + /* NULL sha256 violates NOT NULL on file_hashes.sha256 → must return ERR. */ + rc = cbm_store_upsert_file_hash(s, "test", "other.go", NULL, 2000000, 1024); + ASSERT_EQ(rc, CBM_STORE_ERR); + + /* NULL rel_path violates NOT NULL on file_hashes.rel_path → must return ERR. */ + rc = cbm_store_upsert_file_hash(s, "test", NULL, "deadbeef", 3000000, 2048); + ASSERT_EQ(rc, CBM_STORE_ERR); + + /* NULL project violates NOT NULL on file_hashes.project → must return ERR. */ + rc = cbm_store_upsert_file_hash(s, NULL, "third.go", "cafebabe", 4000000, 4096); + ASSERT_EQ(rc, CBM_STORE_ERR); + + /* The valid row from earlier must still be present — partial-failure + * policy: a single bad upsert does not corrupt or remove other rows. */ + cbm_file_hash_t *hashes = NULL; + int count = 0; + cbm_store_get_file_hashes(s, "test", &hashes, &count); + ASSERT_EQ(count, 1); + ASSERT_STR_EQ(hashes[0].rel_path, "main.go"); + cbm_store_free_file_hashes(hashes, count); + + cbm_store_close(s); + PASS(); +} + /* ── Properties JSON round-trip ─────────────────────────────────── */ TEST(store_node_properties_json) { @@ -1523,6 +1565,7 @@ SUITE(store_nodes) { RUN_TEST(store_node_batch_empty); RUN_TEST(store_cascade_delete); RUN_TEST(store_file_hash_crud); + RUN_TEST(store_file_hash_upsert_rejects_null_required_fields); RUN_TEST(store_node_properties_json); RUN_TEST(store_node_null_properties); RUN_TEST(store_find_by_file_overlap);