OCWC22 · OCWC22 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml b/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml
@@ -0,0 +1,54 @@
+# ======================================================================
+# STATUS: SPECULATIVE / NON-BLOCKING
+# ----------------------------------------------------------------------
+# This file is NOT the canonical PR framing for fork PR #2. It depends on
+# an unmerged upstream patch that wires MOONCAKE_INPUT through
+# .github/workflows/benchmark-multiturn-tmpl.yml. Do NOT reference from
+# workflows until that path is approved upstream. See
+# datasets/isb1/RECIPE_MOONCAKE.md for the speculative patch set, or
+# https://github.com/OCWC22/InferenceX/pull/2 for the canonical
+# opt-in framing (corpus at datasets/isb1/mooncake/ consumed via existing
+# --custom-dataset-type mooncake_trace, zero harness patches required).
+# ======================================================================
+# ISB1 sweep cells for Cam's aiperf / mooncake_trace replay flow.
+# Schema mirrors .github/configs/multiturn-agentic-trace.yaml and
+# .github/configs/multiturn-agentic-trace-isb1.yaml.
+# 8k code cells expect MOONCAKE_INPUT=datasets/isb1/mooncake/core/code_8k1k/.
+# 32k chat cells expect MOONCAKE_INPUT=datasets/isb1/mooncake/extension_32k/chat_32k1k*/.
+# 131k code cells expect MOONCAKE_INPUT=datasets/isb1/mooncake/extension_131k/*_131k1k*/.
+# Preview 500k / 1m lanes are intentionally omitted in v1.
+#
+# offload values:
+#   on       — KV offload enabled (VLLM_USE_SIMPLE_KV_OFFLOAD=1)
+#   off      — KV offload disabled (baseline)
+#   noprefix — offload off AND --no-enable-prefix-caching (clean-cache floor).
+#              Cam's h100 lane already wires the flag in
+#              multiturn_fp8_h100_lmcache_aiperf.sh:123-126; these cells just
+#              surface the third mode so the sweep generator emits it.
+
+h100-fp8-qwen3-isb1-mooncake-code-8k-lmcache:
+  tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off", "noprefix"]}
+  tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off", "noprefix"]}
+
+h200-fp8-qwen3-isb1-mooncake-code-8k-lmcache:
+  tp2: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off", "noprefix"]}
+  tp4: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off", "noprefix"]}
+
+h200-fp8-qwen3-isb1-mooncake-chat-32k-lmcache:
+  tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off", "noprefix"]}
+  tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off", "noprefix"]}
+
+h200-fp8-qwen3-isb1-mooncake-code-131k-lmcache:
+  tp4: {users: [1, 2, 4, 8], offload: ["on", "off", "noprefix"]}
+  tp8: {users: [1, 2, 4, 8, 16], offload: ["on", "off", "noprefix"]}
+
+b200-fp4-dsr1-isb1-mooncake-code-8k-lmcache:
+  tp4: {ep: 4, users: [4, 8, 16, 32, 64, 128, 256], offload: ["on", "off"]}
+  tp8: {ep: 8, users: [8, 16, 32, 64, 128, 256, 512], offload: ["on", "off"]}
+
+b200-fp4-dsr1-isb1-mooncake-chat-32k-lmcache:
+  tp4: {ep: 4, users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]}
+  tp8: {ep: 8, users: [1, 2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]}
+
+b200-fp4-dsr1-isb1-mooncake-code-131k-lmcache:
+  tp8: {ep: 8, users: [1, 2, 4, 8, 16], offload: ["on", "off"]}
diff --git a/.github/configs/multiturn-agentic-trace-isb1.yaml b/.github/configs/multiturn-agentic-trace-isb1.yaml
@@ -0,0 +1,54 @@
+# ISB1 sweep cells for Cam's kv-cache-tester replay flow.
+# Schema mirrors .github/configs/multiturn-agentic-trace.yaml.
+# Merge these top-level keys into that file (or extend the sweep workflow
+# to glob .github/configs/multiturn-agentic-trace*.yaml) to include ISB1 sweeps.
+# 8k code cells map to datasets/isb1/converted/core/code_8k1k/.
+# 32k chat cells map to datasets/isb1/converted/extension_32k/chat_32k1k*/.
+# 131k code/chat cells map to datasets/isb1/converted/extension_131k/*_131k1k*/.
+# 500k preview cells map to datasets/isb1/converted/preview/long_context_500k/.
+# 1m preview cells map to datasets/isb1/converted/preview/long_context_1m/.
+# Expected TRACE_DIR is either datasets/isb1/converted/ or one of those subdirs.
+
+h200-fp8-qwen3-isb1-code-8k:
+  tp2: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]}
+  tp4: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]}
+
+h200-fp8-qwen3-isb1-chat-32k:
+  tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]}
+  tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]}
+
+h200-fp8-qwen3-isb1-code-131k:
+  tp4: {users: [1, 2, 4, 8], offload: ["on", "off"]}
+  tp8: {users: [1, 2, 4, 8, 16], offload: ["on", "off"]}
+
+b200-fp4-dsr1-isb1-code-8k:
+  tp4: {ep: 4, users: [4, 8, 16, 32, 64, 128, 256], offload: ["on", "off"]}
+  tp8: {ep: 8, users: [8, 16, 32, 64, 128, 256, 512], offload: ["on", "off"]}
+
+b200-fp4-dsr1-isb1-chat-32k:
+  tp4: {ep: 4, users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]}
+  tp8: {ep: 8, users: [1, 2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]}
+
+b200-fp4-dsr1-isb1-code-131k:
+  tp8: {ep: 8, users: [1, 2, 4, 8, 16], offload: ["on", "off"]}
+
+b200-fp4-qwen3-isb1-chat-500k-preview:
+  tp4: {users: [1, 2, 4], offload: ["on", "off"]}
+  tp8: {users: [1, 2, 4, 8], offload: ["on", "off"]}
+
+b200-fp4-qwen3-isb1-chat-1m-preview:
+  tp8: {users: [1, 2], offload: ["on", "off"]}
+
+mi355x-fp8-qwen3-isb1-code-8k:
+  tp2: {users: [2, 4, 8, 16, 32, 64], offload: ["on", "off"]}
+  tp4: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]}
+
+mi355x-fp8-qwen3-isb1-chat-32k:
+  tp4: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]}
+
+h100-fp8-qwen3-isb1-code-8k-lmcache:
+  tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]}
+  tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]}
+
+h200-fp8-qwen3-isb1-debug:
+  tp2: {users: [2], offload: ["off"]}
diff --git a/datasets/isb1/.gitattributes b/datasets/isb1/.gitattributes
@@ -1 +1,3 @@
 exports/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true
+converted/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true
-converted/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true
+converted/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true
+converted/manifest.json -filter -diff -merge text linguist-generated=false
-converted/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true
+converted/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true
+converted/manifest.json -filter -diff -merge text linguist-generated=false
+mooncake/**/*.jsonl filter=lfs diff=lfs merge=lfs -text linguist-generated=true
diff --git a/datasets/isb1/HF_PUBLISH.md b/datasets/isb1/HF_PUBLISH.md
@@ -0,0 +1,124 @@
+# HF publication recipe for ISB1 converted traces
+
+Mirror `datasets/isb1/converted/` to Hugging Face so Cam's
+`TRACE_DIR=hf_<org>--<repo>` path works immediately with kv-cache-tester.
+Recommended target: `semianalysisai/isb1-cc-traces`.
+
+## 1. Target namespace
+
+- Dataset repo: `semianalysisai/isb1-cc-traces`
+- Source directory: `datasets/isb1/converted/`
+- Consumer contract: Cam's replay scripts interpret `hf_<org>--<repo>` as a
+  Hugging Face dataset reference before calling `trace_replay_tester.py`
+
+## 2. Prereqs
+
+- `huggingface-cli >= 0.20`
+- `HF_TOKEN` with write scope to the destination org
+- Local validation already green:
+  `python3 tools/validate_kvcache_tester_trace.py datasets/isb1/converted/`
+
+Authenticate first:
+
+```bash
+export HF_TOKEN=hf_xxx
+huggingface-cli login --token "$HF_TOKEN"
+```
+
+## 3. Dataset card template
+
+Create the HF dataset `README.md` with this content:
+
+```markdown
+---
+license: apache-2.0
+task_categories: [text-generation]
+language: [en]
+pretty_name: ISB1 Converted kv-cache-tester Traces
+tags: [kv-cache, trace-replay, inference-benchmark, semianalysis, isb1]
+---
+
+# ISB1 Converted kv-cache-tester Traces
+
+This dataset mirrors `datasets/isb1/converted/` from SemiAnalysisAI/InferenceX
+PR #1032 so Cam's kv-cache-tester replay flow from PR #993 can consume ISB1
+traces directly through the `hf_<org>--<repo>` `TRACE_DIR` convention.
+
+## Contents
+
+- 179 pre-converted trace JSON files
+- 8k / 32k / 64k / 131k / 500k preview / 1m preview coverage
+- Kimi K2.5 / DSR1 / GPT-OSS / Qwen3.5 coverage
+- `manifest.json` metadata catalog
+
+## Provenance
+
+- Source repo: `SemiAnalysisAI/InferenceX`
+- Source PR: `#1032`
+- Consumer workflow: `callanjfox/kv-cache-tester` PR `#993`
+- License: Apache-2.0
+```
+
+## 4. Upload command
+
+```bash
+huggingface-cli upload \
+  semianalysisai/isb1-cc-traces \
+  datasets/isb1/converted/ \
+  . \
+  --repo-type dataset \
+  --revision main
+```
+
+If the repo does not exist yet, create it in the HF UI first, then rerun the
+upload.
+
+## 5. Cam's Slurm integration
+
+After publication, switch Cam's script from a local directory to the HF path:
+
+```bash
+TRACE_DIR=hf_semianalysisai--isb1-cc-traces  # replaces datasets/isb1/converted
+```
+
+That triggers the `hf_<org>--<repo>` branch in Cam's PR #993 replay script
+(`benchmarks/single_node/multiturn_fp4_b200_trace_replay.sh`, lines 54-58),
+which rewrites the value into `--hf-dataset <org>/<repo>` before invoking
+`trace_replay_tester.py`.
+
+## 6. Versioning
+
+When new traces land:
+
+1. Regenerate `datasets/isb1/converted/manifest.json`
+2. Re-run local validation on the converted directory
+3. Upload the updated directory to HF `main`
+4. Create a matching HF tag such as `v0.2.0` or `pr1032-r2`
+5. Record the InferenceX commit SHA and HF revision together
+
+Consumers who need immutability should pin an HF revision instead of floating
+on `main`.
+
+## 7. Verification
+
+```bash
+rm -rf /tmp/verify
+huggingface-cli download semianalysisai/isb1-cc-traces \
+  --repo-type dataset \
+  --local-dir /tmp/verify
+python3 tools/validate_kvcache_tester_trace.py /tmp/verify
+```
+
+Expected result:
+
+- Download succeeds with all trace JSONs present
+- Validator reports all converted traces passing
+- Cam's replay wrapper accepts
+  `TRACE_DIR=hf_semianalysisai--isb1-cc-traces` with no shell-script changes
+
+## Notes
+
+- Publish converted artifacts and metadata only
+- Keep the layout compatible with `trace_replay_tester.py`
+- If the org name changes, update both the upload command and `TRACE_DIR`
+  example together
diff --git a/datasets/isb1/README.md b/datasets/isb1/README.md
@@ -160,6 +160,23 @@ python tools/isb1_to_kvcache_tester.py \
     --output-dir  traces_isb1/
 ```
 
+### Pre-converted sidecar
+
+This repo carries a pre-converted mirror at:
+
+- `datasets/isb1/converted/`
+
+Feed that mirror directly to `kv-cache-tester` with:
+
+```bash
+python trace_replay_tester.py --trace-directory datasets/isb1/converted/ --tokenizer Qwen/Qwen2.5-Coder-32B-Instruct --block-size 64
+```
+
+Mapping convention:
+
+- one trace file per ISB1 conversation/event
+- each trace filename is prefixed with the source bundle id
+
 ### Step 3 — replay against a running vLLM / SGLang server
 
 Using PR #993's own recipes (e.g. `benchmarks/single_node/multiturn_fp8_h200_trace_replay.sh`),
@@ -231,6 +248,10 @@ Any failure of the above means the PR is not actually plumbed end-to-end for
 this bundle and should be reproduced against Cam's `trace_replay_tester.py`
 before being claimed as compatible.
 
+### Validate before publishing
+
+Before publishing or mirroring `datasets/isb1/converted/`, run `python3 tools/validate_kvcache_tester_trace.py datasets/isb1/converted/` to catch schema drift early: missing required keys, invalid `block_size`, and broken prefix-extending `hash_ids` that would otherwise fail inside Cam's replay sweep.
+
 ---
 
 ## HF publication