From fbe51e461670189e3e271c2d9c3b0fa59aa8a9d4 Mon Sep 17 00:00:00 2001 From: Klappy via Claude Date: Tue, 21 Apr 2026 17:49:47 +0000 Subject: [PATCH] fix(telemetry): blob8 falls back to BUILD_VERSION; clarify duration_ms semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blob8 (worker_version) was logging as the literal 'unknown' for 100% of tool calls in production. Root cause: env.ODDKIT_VERSION is only injected by 'npm run deploy' via --var, but Cloudflare's auto-deploy from GitHub invokes wrangler directly from wrangler.toml and never executes the deploy script. Other sites (index.ts, orchestrate.ts) already fall back to pkg.version; telemetry.ts was missed. Fix: import pkg from ../package.json (mirroring index.ts pattern), define BUILD_VERSION = pkg.version, use it as the fallback. Telemetry now reports a real semver under the canonical deploy path. Also clarifies the duration_ms docstring. The schema previously said 'request processing time (measured by caller)' which under-described the measurement. The value is full MCP request wall-clock measured at the worker edge — V8 cold-start, KB fetch, MCP SDK overhead, action handler compute, all included. This is NOT the per-action debug.duration_ms in tool envelopes (which measures only the action handler's internal compute). The discrepancy explains why telemetry shows oddkit_time avg 269ms / max 9362ms while debug.duration_ms reports near-zero. No behavior change to duration_ms measurement. Documentation only. See: klappy://canon/constraints/telemetry-governance --- workers/src/telemetry.ts | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/workers/src/telemetry.ts b/workers/src/telemetry.ts index 718a36d..6a77881 100644 --- a/workers/src/telemetry.ts +++ b/workers/src/telemetry.ts @@ -13,16 +13,37 @@ * blob5: consumer_source — how label was resolved (e.g. "user-agent") * blob6: knowledge_base_url — which repo is being served * blob7: document_uri — for get calls, the URI requested - * blob8: worker_version — oddkit version string + * blob8: worker_version — oddkit semver string. Sourced from env.ODDKIT_VERSION + * (deploy-time injection) with a build-time fallback + * to workers/package.json::version. Never "unknown" + * on a normal deploy. * blob9: cache_tier — which storage tier served the index (E0008.1) * double1: count — always 1 (for SUM aggregation) - * double2: duration_ms — MCP request processing time (measured by caller) + * double2: duration_ms — Full MCP request wall-clock, measured at the worker + * edge from request entry through handler return. + * Includes V8 cold-start, KB fetch, MCP SDK overhead, + * and action handler compute. NOT the same as the + * per-action `debug.duration_ms` returned in tool + * envelopes — that field measures only the action + * handler's internal compute. Expect a long tail on + * cache-miss requests even for trivial actions like + * oddkit_time. * index1: sampling_key — consumer label (for sampling consistency) * * See: klappy://canon/constraints/telemetry-governance */ import type { Env } from "./zip-baseline-fetcher"; +import pkg from "../package.json"; + +// Build-time fallback for blob8 (worker_version). env.ODDKIT_VERSION is +// injected via `--var ODDKIT_VERSION:...` when deploying through the +// `npm run deploy` script, but Cloudflare's auto-deploy from GitHub does +// not execute that script — it invokes wrangler directly with the config +// in wrangler.toml, leaving env.ODDKIT_VERSION undefined. Falling back to +// pkg.version (read from workers/package.json at build time) gives +// telemetry a real version string under the canonical deploy path. +const BUILD_VERSION = pkg.version; // ────────────────────────────────────────────────────────────────────────────── // Sanitization @@ -215,7 +236,7 @@ export function recordTelemetry(request: Request, env: Env, durationMs: number, consumerSource, toolCall?.knowledgeBaseUrl || env.DEFAULT_KNOWLEDGE_BASE_URL || "", documentUri, - env.ODDKIT_VERSION || "unknown", + env.ODDKIT_VERSION || BUILD_VERSION, cacheTier || "none", // blob9: E0008.1 x-ray cache tier ], doubles: [1, durationMs],