From 904d80431763bb91d8c16396e4e937b6bd365cf2 Mon Sep 17 00:00:00 2001
From: Klappy <klappy@users.noreply.github.com>
Date: Sun, 26 Apr 2026 02:57:18 +0000
Subject: [PATCH] fix(telemetry): tracer recognizes file:* spans for oddkit_get
 fast path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to #138. The streaming-race fix (#138) corrected the timing
bug, but oddkit_get for klappy:// URIs still records cache_tier="none"
because of a *separate* defect: the tracer's _indexSource setter only
recognized 'index' and 'index-build' labels.

runGet for klappy:// URIs takes a fast path that skips getIndex
entirely (URI → path is computable without the index). It calls
getFile directly. The fetcher emits 'file:${path}' spans
(memory/r2/build), but the setter's exact-match check
(label === 'index' || label === 'index-build') ignored them. Result:
_indexSource stayed null, blob9 = 'none'.

This is the dominant get path: 'klappy://' URIs make up ~95% of
oddkit_get calls. So even after #138, ~95% of get calls had
cache_tier='none' — defeating the dashboard's purpose for that tool.

Fix: extend the setter to also recognize 'file:*' labels alongside
'index' / 'index-build'. First-wins guard preserved, so:

  - runSearch: 'index' span fires first → index tier wins.
    Subsequent 'file:*' spans for hit fetches are ignored.
    blob9 = index tier (correct, that's the primary work).

  - runGet (klappy://): only 'file:*' span fires → file tier wins.
    blob9 = file tier (correct, that's the primary work).

  - runGet (kb://, odd://): 'index' span fires first (URI lookup),
    then 'file:*'. Index wins. blob9 = index tier (acceptable
    approximation for the ~5% of non-klappy gets).

  - 'file-r2:*' (R2 miss with source='miss') excluded by adding
    'source !== "miss"' guard. 'miss' is not a tier and must not
    appear in blob9.

Internal field name (_indexSource) and getter name (indexSource)
stay unchanged. The public envelope key 'index_source' in
tracer.toJSON() is part of the response contract and renaming would
be a breaking change. The doc-comment on the getter is updated to
reflect the broader semantic (primary cache tier, not strictly the
navigability index).

Regression tests added (4):
  1. file:* span with each tier (memory/r2/build) populates indexSource
  2. Index wins when fired before file spans (search pattern)
  3. file-r2:* miss spans correctly excluded
  4. Original index-only behavior preserved (no regression)

Test count: 18 → 22 passing.
---
 workers/src/tracing.ts                      | 32 +++++--
 workers/test/telemetry-integration.test.mjs | 92 +++++++++++++++++++++
 2 files changed, 118 insertions(+), 6 deletions(-)

diff --git a/workers/src/tracing.ts b/workers/src/tracing.ts
index 9daae6d..5a86183 100644
--- a/workers/src/tracing.ts
+++ b/workers/src/tracing.ts
@@ -36,9 +36,23 @@ export class RequestTracer {
       ...(detail ? { detail } : {}),
     });
 
-    // Track the index source for telemetry (first span matching an index tier)
-    if ((label === "index" || label === "index-build") && source && !this._indexSource) {
-      this._indexSource = source;
+    // Track the primary cache tier for telemetry (first span matching a data
+    // fetch). Three label families count:
+    //   - "index" / "index-build"  → navigability index fetch (search/orient/etc.)
+    //   - "file:*"                 → individual file fetch (oddkit_get fast path)
+    // First-wins: actions like runSearch call getIndex *before* getFile, so
+    // the index tier wins for those — file:* spans that fire later are
+    // ignored. Actions like runGet for klappy:// URIs call getFile only,
+    // so the file tier wins. file-r2:* (r2 miss with source="miss") is
+    // excluded because "miss" is not a tier.
+    if (!this._indexSource && source && source !== "miss") {
+      if (
+        label === "index" ||
+        label === "index-build" ||
+        label.startsWith("file:")
+      ) {
+        this._indexSource = source;
+      }
     }
   }
 
@@ -64,13 +78,19 @@ export class RequestTracer {
   }
 
   /**
-   * Which storage tier served the navigability index for this request.
-   * This is the single summary value that feeds telemetry blob9.
+   * Which storage tier served the primary data fetch for this request.
+   * This is the single summary value that feeds telemetry blob9 (cache_tier).
    * "memory" = module-level cache hit (0ms, best case)
    * "cache"  = Cache API edge hit (~1ms)
    * "r2"     = R2 durable storage read (~40ms)
    * "build"  = cold build from ZIP (seconds, worst case)
-   * null     = no index was loaded (e.g. version action)
+   * "github" = GitHub network fetch (when no R2/cache layers exist)
+   * "none"   = no data fetch happened (e.g. version, time actions)
+   *
+   * The value reflects the primary fetch — for actions like search/orient
+   * that load the navigability index first, this is the index tier. For
+   * oddkit_get with a klappy:// URI (the fast path, no index needed), this
+   * is the file fetch tier. Either way: where did the work come from?
    */
   get indexSource(): string {
     return this._indexSource ?? "none";
diff --git a/workers/test/telemetry-integration.test.mjs b/workers/test/telemetry-integration.test.mjs
index 284fcbc..18136c6 100644
--- a/workers/test/telemetry-integration.test.mjs
+++ b/workers/test/telemetry-integration.test.mjs
@@ -617,5 +617,97 @@ await test("cache_tier reads must happen after the streaming response body compl
   );
 });
 
+// ─── Test 8: file:* spans count as primary tier (oddkit_get fast path) ──────
+
+await test("tracer recognizes file:* spans as primary tier when no index span fires", async () => {
+  // oddkit_get for klappy:// URIs takes the fast path: no getIndex call,
+  // straight to getFile. The fetcher emits `file:${path}` spans (memory/r2/
+  // build). Before this fix, only "index" / "index-build" labels updated
+  // _indexSource, so klappy:// gets always recorded cache_tier="none" even
+  // after the streaming-race fix. This test pins the broader recognition.
+
+  const tracer = new RequestTracer();
+  tracer.addSpan("file:canon/foo.md", 12, "memory");
+  assert.equal(
+    tracer.indexSource,
+    "memory",
+    "file:* span with source 'memory' must populate indexSource (klappy:// fast path)",
+  );
+
+  // r2 source on file fetch
+  const tracer2 = new RequestTracer();
+  tracer2.addSpan("file:canon/bar.md", 40, "r2");
+  assert.equal(tracer2.indexSource, "r2", "file:* with r2 source captured");
+
+  // build source on file fetch (cold ZIP extract)
+  const tracer3 = new RequestTracer();
+  tracer3.addSpan("file:canon/baz.md", 1500, "build", "zip-extract");
+  assert.equal(tracer3.indexSource, "build", "file:* with build source captured");
+});
+
+await test("tracer keeps index-wins when index span fires before file spans (search pattern)", async () => {
+  // runSearch calls getIndex first (emits `index` span), then getFile for
+  // each hit (emits `file:*` spans). First-wins guard ensures the index
+  // tier — which represents the primary work — wins, not the per-file
+  // tiers from secondary fetches.
+
+  const tracer = new RequestTracer();
+  tracer.addSpan("index", 33, "cache");
+  tracer.addSpan("file:canon/result-1.md", 100, "r2");
+  tracer.addSpan("file:canon/result-2.md", 250, "build", "zip-extract");
+
+  assert.equal(
+    tracer.indexSource,
+    "cache",
+    "index tier wins when it fires first (search/orient/catalog pattern)",
+  );
+});
+
+await test("tracer file:* recognition still excludes file-r2:* miss spans", async () => {
+  // file-r2:${path} fires on R2 miss with source="miss". "miss" is not a
+  // tier and must not be recorded as one. The setter excludes any span
+  // whose source is the literal string "miss".
+
+  const tracer = new RequestTracer();
+  tracer.addSpan("file-r2:canon/foo.md", 100, "miss");
+  assert.equal(
+    tracer.indexSource,
+    "none",
+    "file-r2:* with source 'miss' must not be captured as a tier",
+  );
+
+  // After the miss, the actual fetch fires with a real source — that one
+  // should be captured.
+  tracer.addSpan("file:canon/foo.md", 200, "build", "zip-extract");
+  assert.equal(
+    tracer.indexSource,
+    "build",
+    "real file fetch after r2-miss is captured normally",
+  );
+});
+
+await test("tracer existing index-only behavior still works (no regression)", async () => {
+  // Sanity: the original case (just index/index-build with no file:* spans)
+  // must continue to work exactly as before.
+
+  const tracer1 = new RequestTracer();
+  tracer1.addSpan("index", 0, "memory");
+  assert.equal(tracer1.indexSource, "memory", "memory index tier captured");
+
+  const tracer2 = new RequestTracer();
+  tracer2.addSpan("index-build", 2000, "build");
+  assert.equal(tracer2.indexSource, "build", "index-build with build source captured");
+
+  // Without a recognized data fetch, indexSource is "none"
+  const tracer3 = new RequestTracer();
+  tracer3.addSpan("action:version", 5);
+  tracer3.addSpan("sha:klappy.dev", 0, "memory");
+  assert.equal(
+    tracer3.indexSource,
+    "none",
+    "action and sha spans alone do not count as primary tier",
+  );
+});
+
 console.log(`\n${pass} passed, ${fail} failed`);
 process.exit(fail > 0 ? 1 : 0);