From 8350ec8a6de9658e2e666a89368448dec0432e17 Mon Sep 17 00:00:00 2001 From: jdubdevs Date: Fri, 15 May 2026 21:03:04 -0700 Subject: [PATCH] fix(indexer): wikilinks are directional; record unresolved targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #35, closes #36. Two ingest-time bugs in `build_edges_for_file`: 1. (#35) unresolved_links table never populated. Indexer extracted wikilinks but the `Some(target_id)` branch was the only branch — `None` (target didn't resolve) was a silent drop. The helper `Store::insert_unresolved_link` existed and worked but was never called from the index path. 2. (#36) edges stored bidirectionally. For each `[[target]]` resolved in source A → target B, the indexer wrote TWO directed edges: `(A, B, 'wikilink')` AND `(B, A, 'wikilink')`. The reverse edge was inserted regardless of B's actual content. Effect: outgoing- edge queries returned each file's actual wikilink targets PLUS every file that linked INTO it; asymmetric-link detection via `NOT EXISTS (reverse edge)` was structurally always-false. Fix: - Resolve `file_id → source_path` via `get_file_by_id` so we can pass `source_file` to `insert_unresolved_link`. - Clear pre-existing `unresolved_links` entries for the source file before re-recording — supports incremental re-index without stale entries accumulating. - Insert exactly ONE directed edge `(file_id → target_id)` per resolved wikilink. The reverse edge is inserted (if it should exist) when `build_edges_for_file` is called on the target file with its own content — which is the natural sequence during a full index pass. - Self-links (target_id == file_id) continue to be ignored. - Unresolved targets land in `unresolved_links` via `insert_unresolved_link`. Tests added in `src/indexer.rs::tests`: - `test_wikilink_edges_are_directional_not_bidirectional` — A has `[[B]]`, B has no wikilinks; verifies A→B exists, B→A doesn't, and B has 1 incoming. - `test_unresolved_wikilinks_are_recorded` — A has one resolving and one broken wikilink; verifies the broken target lands in `unresolved_links` with correct source. - `test_unresolved_links_cleared_on_re_index` — when source is re-indexed with the broken wikilink removed, the prior unresolved entry is cleared (no stale accumulation). The existing `test_edge_building_during_index` continues to pass because that test happens to use files where both directions DO have wikilinks in source content — so the natural a-then-b indexing sequence still produces both directed edges correctly. All 461 library tests pass (`cargo test --lib`). Verified against a 1,646-file Obsidian vault after `engraph index --rebuild` with the patched binary: previously-empty `unresolved_links` table now populated, previously-fabricated reverse edges no longer present. --- src/indexer.rs | 150 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 145 insertions(+), 5 deletions(-) diff --git a/src/indexer.rs b/src/indexer.rs index 3835655..a6ba91b 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -166,14 +166,44 @@ fn resolve_link_target(store: &Store, target: &str) -> Result> { } /// Build wikilink edges for a single file. +/// +/// For each `[[target]]` wikilink in `content`: +/// - If target resolves: insert ONE directed edge from source → target. +/// Wikilinks are directional — the reverse edge should only exist if +/// the target's own content contains a wikilink back to source. (That +/// reverse edge gets inserted when `build_edges_for_file` is called on +/// the target file with its own content.) +/// - If target doesn't resolve: record in `unresolved_links` for +/// downstream broken-wikilink tooling. +/// +/// Clears pre-existing `unresolved_links` entries for the source file +/// before re-recording, so this is safe to call repeatedly during +/// incremental indexing. pub fn build_edges_for_file(store: &Store, file_id: i64, content: &str) -> Result<()> { + let source_path = match store.get_file_by_id(file_id)? { + Some(f) => f.path, + None => return Ok(()), // file vanished mid-index; no-op + }; + + // Clear stale unresolved entries for this file before re-recording. + store.clear_unresolved_links_for_file(&source_path)?; + let targets = extract_wikilink_targets(content); for target in targets { - if let Some(target_id) = resolve_link_target(store, &target)? - && target_id != file_id - { - store.insert_edge(file_id, target_id, "wikilink")?; - store.insert_edge(target_id, file_id, "wikilink")?; + match resolve_link_target(store, &target)? { + Some(target_id) if target_id != file_id => { + store.insert_edge(file_id, target_id, "wikilink")?; + // NOTE: do NOT insert a reverse edge here. Wikilinks are + // directional — the reverse edge is inserted (if it exists) + // when build_edges_for_file is called on the target file + // with its own content. + } + Some(_) => { + // Self-link — ignore + } + None => { + store.insert_unresolved_link(&source_path, &target)?; + } } } Ok(()) @@ -912,6 +942,116 @@ mod tests { assert_eq!(b_out[0].0, f_a); } + #[test] + fn test_wikilink_edges_are_directional_not_bidirectional() { + // Regression test for the "edges stored bidirectionally" bug. + // A has [[B]]; B has NO wikilink to A. Expected: A→B edge exists, + // B→A edge does NOT exist. Pre-fix, the indexer fabricated the + // reverse edge regardless of B's actual content. + let tmp = TempDir::new().unwrap(); + let root = tmp.path(); + write_file(root, "a.md", "# A\nSee [[b]] for details."); + write_file(root, "b.md", "# B\nNo backlink here."); + + let store = Store::open_memory().unwrap(); + let f_a = store + .insert_file("a.md", "h1", 100, &[], "aaa111", None, None) + .unwrap(); + let f_b = store + .insert_file("b.md", "h2", 100, &[], "bbb222", None, None) + .unwrap(); + + let content_a = std::fs::read_to_string(root.join("a.md")).unwrap(); + let content_b = std::fs::read_to_string(root.join("b.md")).unwrap(); + + build_edges_for_file(&store, f_a, &content_a).unwrap(); + build_edges_for_file(&store, f_b, &content_b).unwrap(); + + // A → B exists (A's content has [[b]]) + let a_out = store.get_outgoing(f_a, Some("wikilink")).unwrap(); + assert_eq!(a_out.len(), 1, "A should have 1 outgoing wikilink"); + assert_eq!(a_out[0].0, f_b); + + // B → A does NOT exist (B's content has no wikilink to A) + let b_out = store.get_outgoing(f_b, Some("wikilink")).unwrap(); + assert_eq!( + b_out.len(), + 0, + "B should have 0 outgoing wikilinks (B has no [[a]] in content)" + ); + + // But B should have 1 INCOMING from A + let b_in = store.get_incoming(f_b, Some("wikilink")).unwrap(); + assert_eq!(b_in.len(), 1, "B should have 1 incoming wikilink (from A)"); + assert_eq!(b_in[0].0, f_a); + } + + #[test] + fn test_unresolved_wikilinks_are_recorded() { + // Regression test for the "unresolved_links table never populated" bug. + // A has [[b]] (resolves) and [[nonexistent-target]] (doesn't). + // Expected: the unresolved target is recorded in unresolved_links. + let tmp = TempDir::new().unwrap(); + let root = tmp.path(); + write_file( + root, + "a.md", + "# A\nSee [[b]] for details.\nAlso [[nonexistent-target]] for nothing.", + ); + write_file(root, "b.md", "# B"); + + let store = Store::open_memory().unwrap(); + let f_a = store + .insert_file("a.md", "h1", 100, &[], "aaa111", None, None) + .unwrap(); + let _f_b = store + .insert_file("b.md", "h2", 100, &[], "bbb222", None, None) + .unwrap(); + + let content_a = std::fs::read_to_string(root.join("a.md")).unwrap(); + build_edges_for_file(&store, f_a, &content_a).unwrap(); + + // Unresolved target should be recorded + let unresolved = store.get_unresolved_links().unwrap(); + assert_eq!( + unresolved.len(), + 1, + "Should have 1 unresolved wikilink (nonexistent-target)" + ); + assert_eq!(unresolved[0].0, "a.md"); + assert_eq!(unresolved[0].1, "nonexistent-target"); + } + + #[test] + fn test_unresolved_links_cleared_on_re_index() { + // When build_edges_for_file is called again on the same source + // (incremental update / re-index), stale unresolved entries for + // that source should be cleared before re-recording. Otherwise + // entries accumulate even after the user fixes broken links. + let tmp = TempDir::new().unwrap(); + let root = tmp.path(); + write_file(root, "a.md", "# A\nSee [[broken-target]] for nothing."); + + let store = Store::open_memory().unwrap(); + let f_a = store + .insert_file("a.md", "h1", 100, &[], "aaa111", None, None) + .unwrap(); + + let content_a_v1 = std::fs::read_to_string(root.join("a.md")).unwrap(); + build_edges_for_file(&store, f_a, &content_a_v1).unwrap(); + assert_eq!(store.get_unresolved_links().unwrap().len(), 1); + + // Now A is edited to remove the broken wikilink entirely. + let content_a_v2 = "# A\nNo wikilinks here now."; + build_edges_for_file(&store, f_a, content_a_v2).unwrap(); + let unresolved = store.get_unresolved_links().unwrap(); + assert_eq!( + unresolved.len(), + 0, + "Stale unresolved entry should be cleared after re-index" + ); + } + #[test] fn test_extract_aliases_from_frontmatter() { let content = "---\ntags:\n - person\naliases:\n - Johnny\n - JN\n---\n# John Nelson";