From d53bd3b6c8bfb8b295f84840053b0530123fcb51 Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Mon, 20 Apr 2026 21:49:26 -0700 Subject: [PATCH 01/13] data(isb1): ship 179 pre-converted kv-cache-tester trace JSONs Fold Track A into PR 1032. Consumers now point Cam's trace_replay_tester.py directly at datasets/isb1/converted/ with no conversion step: python $KV_CACHE_TESTER_DIR/trace_replay_tester.py --trace-directory datasets/isb1/converted/ --tokenizer Qwen/Qwen2.5-Coder-32B-Instruct --block-size 64 179 traces across 23 bundles span 6 context scales (8k/32k/64k/131k/500k/1M) and multi-model coverage (Kimi K2.5, DSR1, GPT-OSS, Qwen3.5). Co-Authored-By: Claude Opus 4.7 --- datasets/isb1/.gitattributes | 1 + datasets/isb1/README.md | 17 +++++++++++++++++ .../isb1_sess_chat_lc3_contract_review_01.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0013.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0014.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0015.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0016.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0017.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0018.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0019.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0020.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0021.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0022.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0023.json | 3 +++ .../isb1_sess_tool_free_memory_resume_001.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0001.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0002.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0003.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0004.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0005.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0006.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0007.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0008.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0009.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0010.json | 3 +++ ...1_sess_tool_free_memory_resume_001_0011.json | 3 +++ .../isb1_sess_chat_lc3_contract_review_01.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0001.json | 3 +++ ...1_sess_chat_lc3_contract_review_01_0002.json | 3 +++ ...1_sess_code_ca1_agent_benchmark_plan_01.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0025.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0026.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0027.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0028.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0029.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0030.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0031.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0032.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0033.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0034.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0035.json | 3 +++ .../isb1_sess_debug_repair_repo_001.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0001.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0002.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0003.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0004.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0005.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0006.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0007.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0008.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0009.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0010.json | 3 +++ .../isb1_sess_debug_repair_repo_001_0011.json | 3 +++ .../code_8k1k/isb1_sess_offload_cliff_9982.json | 3 +++ .../isb1_sess_offload_cliff_9982_0013.json | 3 +++ .../isb1_sess_offload_cliff_9982_0014.json | 3 +++ .../isb1_sess_offload_cliff_9982_0015.json | 3 +++ .../isb1_sess_offload_cliff_9982_0016.json | 3 +++ .../isb1_sess_offload_cliff_9982_0017.json | 3 +++ .../isb1_sess_offload_cliff_9982_0018.json | 3 +++ .../isb1_sess_offload_cliff_9982_0019.json | 3 +++ .../isb1_sess_offload_cliff_9982_0020.json | 3 +++ .../isb1_sess_offload_cliff_9982_0021.json | 3 +++ .../isb1_sess_offload_cliff_9982_0022.json | 3 +++ .../isb1_sess_offload_cliff_9982_0023.json | 3 +++ ...1_sess_code_ca1_agent_benchmark_plan_01.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0001.json | 3 +++ ...s_code_ca1_agent_benchmark_plan_01_0002.json | 3 +++ .../isb1_sess_xlc1_text_resume_bridge_01.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0001.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0002.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0003.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0004.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0005.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0006.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0007.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0008.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0009.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0010.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0011.json | 3 +++ .../isb1_sess_xlc1_text_resume_bridge_01.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0001.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0002.json | 3 +++ .../isb1_sess_xlc1_text_resume_bridge_01.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0001.json | 3 +++ ...b1_sess_xlc1_text_resume_bridge_01_0002.json | 3 +++ ..._cache_xlc1_text_shared_prefix_swarm_01.json | 3 +++ ..._cache_xlc1_text_shared_prefix_swarm_01.json | 3 +++ .../isb1_sess_chat_lc2_resume_reasoning_01.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0001.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0002.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0003.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0004.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0005.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0006.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0007.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0008.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0009.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0010.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0011.json | 3 +++ .../isb1_sess_chat_lc2_resume_reasoning_01.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0001.json | 3 +++ ..._sess_chat_lc2_resume_reasoning_01_0002.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0001.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0002.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0003.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0004.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0005.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0006.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0007.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0008.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0009.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0010.json | 3 +++ .../code_32k1k/isb1_sess_2c2a96a7_0011.json | 3 +++ .../isb1_sess_doc_comp_fanout_01.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0013.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0014.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0015.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0016.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0017.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0018.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0019.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0020.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0021.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0022.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0023.json | 3 +++ .../code_32k1k_qwen3.5/isb1_sess_2c2a96a7.json | 3 +++ .../isb1_sess_2c2a96a7_0001.json | 3 +++ .../isb1_sess_2c2a96a7_0002.json | 3 +++ .../isb1_sess_doc_comp_fanout_01.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0004.json | 3 +++ .../isb1_sess_doc_comp_fanout_01_0005.json | 3 +++ ...sb1_sess_chat_lc3_multi_day_strategy_01.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0001.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0002.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0003.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0004.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0005.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0006.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0007.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0008.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0009.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0010.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0011.json | 3 +++ ...sb1_sess_chat_lc3_multi_day_strategy_01.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0001.json | 3 +++ ...ess_chat_lc3_multi_day_strategy_01_0002.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0001.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0002.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0003.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0004.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0005.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0006.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0007.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0008.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0009.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0010.json | 3 +++ .../code_64k1k/isb1_sess_optimizer_01_0011.json | 3 +++ .../isb1_sess_optimizer_01.json | 3 +++ .../isb1_sess_optimizer_01_0001.json | 3 +++ .../isb1_sess_optimizer_01_0002.json | 3 +++ ...b1_hb_depth_cache_ulc2_offload_cliff_01.json | 3 +++ ..._depth_cache_ulc2_offload_cliff_01_0001.json | 3 +++ ..._depth_cache_ulc2_offload_cliff_01_0002.json | 3 +++ ...b1_hb_depth_cache_ulc2_offload_cliff_01.json | 3 +++ ..._depth_cache_ulc2_offload_cliff_01_0001.json | 3 +++ ..._depth_cache_ulc2_offload_cliff_01_0002.json | 3 +++ ...sess_cache_xlc2_hot_cold_session_mix_01.json | 3 +++ ...cache_xlc2_hot_cold_session_mix_01_0001.json | 3 +++ ...cache_xlc2_hot_cold_session_mix_01_0002.json | 3 +++ ...epth_cache_xlc2_hot_cold_session_mix_01.json | 3 +++ ...cache_xlc2_hot_cold_session_mix_01_0001.json | 3 +++ ...cache_xlc2_hot_cold_session_mix_01_0002.json | 3 +++ ...sess_cache_xlc2_hot_cold_session_mix_01.json | 3 +++ ...cache_xlc2_hot_cold_session_mix_01_0001.json | 3 +++ ...cache_xlc2_hot_cold_session_mix_01_0002.json | 3 +++ ...epth_cache_xlc2_hot_cold_session_mix_01.json | 3 +++ ...cache_xlc2_hot_cold_session_mix_01_0001.json | 3 +++ ...cache_xlc2_hot_cold_session_mix_01_0002.json | 3 +++ 181 files changed, 555 insertions(+) create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0013.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0014.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0015.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0016.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0017.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0018.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0019.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0020.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0021.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0022.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0023.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0001.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0002.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0003.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0004.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0005.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0006.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0007.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0008.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0009.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0010.json create mode 100644 datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0011.json create mode 100644 datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01.json create mode 100644 datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01_0001.json create mode 100644 datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01_0002.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0025.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0026.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0027.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0028.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0029.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0030.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0031.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0032.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0033.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0034.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0035.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0001.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0002.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0003.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0004.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0005.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0006.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0007.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0008.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0009.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0010.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0011.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0013.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0014.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0015.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0016.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0017.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0018.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0019.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0020.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0021.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0022.json create mode 100644 datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0023.json create mode 100644 datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01.json create mode 100644 datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01_0001.json create mode 100644 datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01_0002.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0001.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0002.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0003.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0004.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0005.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0006.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0007.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0008.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0009.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0010.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0011.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01_0001.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01_0002.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01_0001.json create mode 100644 datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01_0002.json create mode 100644 datasets/isb1/converted/extension_131k/code_131k1k/isb1_sess_cache_xlc1_text_shared_prefix_swarm_01.json create mode 100644 datasets/isb1/converted/extension_131k/code_131k1k_qwen3.5/isb1_hb_depth_cache_xlc1_text_shared_prefix_swarm_01.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0001.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0002.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0003.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0004.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0005.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0006.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0007.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0008.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0009.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0010.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0011.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01_0001.json create mode 100644 datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01_0002.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0001.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0002.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0003.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0004.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0005.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0006.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0007.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0008.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0009.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0010.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0011.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0013.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0014.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0015.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0016.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0017.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0018.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0019.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0020.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0021.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0022.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0023.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7_0001.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7_0002.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01_0004.json create mode 100644 datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01_0005.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0001.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0002.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0003.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0004.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0005.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0006.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0007.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0008.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0009.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0010.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0011.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01_0001.json create mode 100644 datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01_0002.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0001.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0002.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0003.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0004.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0005.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0006.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0007.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0008.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0009.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0010.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0011.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01_0001.json create mode 100644 datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01_0002.json create mode 100644 datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01.json create mode 100644 datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0001.json create mode 100644 datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0002.json create mode 100644 datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01.json create mode 100644 datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0001.json create mode 100644 datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0002.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0001.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0002.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0001.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0002.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0001.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0002.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0001.json create mode 100644 datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0002.json diff --git a/datasets/isb1/.gitattributes b/datasets/isb1/.gitattributes index 5998181c2..006356f24 100644 --- a/datasets/isb1/.gitattributes +++ b/datasets/isb1/.gitattributes @@ -1 +1,2 @@ exports/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true +converted/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true diff --git a/datasets/isb1/README.md b/datasets/isb1/README.md index 46a4fbbdc..9c39451d9 100644 --- a/datasets/isb1/README.md +++ b/datasets/isb1/README.md @@ -160,6 +160,23 @@ python tools/isb1_to_kvcache_tester.py \ --output-dir traces_isb1/ ``` +### Pre-converted sidecar + +This repo carries a pre-converted mirror at: + +- `datasets/isb1/converted/` + +Feed that mirror directly to `kv-cache-tester` with: + +```bash +python trace_replay_tester.py --trace-directory datasets/isb1/converted/ --tokenizer Qwen/Qwen2.5-Coder-32B-Instruct --block-size 64 +``` + +Mapping convention: + +- one trace file per ISB1 conversation/event +- each trace filename is prefixed with the source bundle id + ### Step 3 — replay against a running vLLM / SGLang server Using PR #993's own recipes (e.g. `benchmarks/single_node/multiturn_fp8_h200_trace_replay.sh`), diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01.json new file mode 100644 index 000000000..289035999 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7410a365bd5603bacd4aa99a362c2ac3bf2ad4d050d0d7856d06cd217736f1ab +size 2359 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0013.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0013.json new file mode 100644 index 000000000..81af45fd2 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0013.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe2fc1546b3b472234e631bc65c1704fffb6a41dafa65953e4f8eeacb70c9fa +size 2293 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0014.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0014.json new file mode 100644 index 000000000..4ccb09be9 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0014.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53309ad6e29880d35fa4a73d77747929119fc9c775d5c68f89497e7929bd190f +size 2335 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0015.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0015.json new file mode 100644 index 000000000..dbc37f825 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0015.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51be1bba71f5d3129a6f3e1ff879296c3ed440fb7637e1fbcca00b38488f2f0d +size 2342 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0016.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0016.json new file mode 100644 index 000000000..d95f27534 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0016.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6118fd3424465ab45b1402d07f5ff9ea8b3919086fb03bf88d0a9130ac9fbf +size 2360 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0017.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0017.json new file mode 100644 index 000000000..4f9ccaff3 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0017.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2ef537f9cd9db8e4cbb0b9960b35e7e4e3274f11831a4da52577fc1056b5337 +size 2294 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0018.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0018.json new file mode 100644 index 000000000..30aa92a74 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0018.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b430d16b147afbaa0f5a60291456e83db5e997a941632cb5540b6d5b453ee3bd +size 2336 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0019.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0019.json new file mode 100644 index 000000000..7629e156b --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0019.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54ddeb6a9c56a9ea50b6e82fe4807cf1d11452dade164b05810dcec53b7595b +size 2343 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0020.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0020.json new file mode 100644 index 000000000..ee9615fd8 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0020.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c916c5054d4e6cb99af4432297b20fb0c12cd6375cbec1b928dce7f2d6d741 +size 2360 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0021.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0021.json new file mode 100644 index 000000000..5b7edbd28 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0021.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae8aa565ef2627e37b602c520967c49a6b516e05538e5d6603b7c8c96c89820 +size 2294 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0022.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0022.json new file mode 100644 index 000000000..de43c48a5 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0022.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aadbb587997e22fb256fa251ad18a1e424c855f21f5f78483bdabe63e5c2bc31 +size 2336 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0023.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0023.json new file mode 100644 index 000000000..bb5cf37aa --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_chat_lc3_contract_review_01_0023.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e435fa2ce6fbd9a18d9a7ce107abeff46abde60fb7789ff435ab55e48ba34ddf +size 2343 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001.json new file mode 100644 index 000000000..f3c0a78b6 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab2871b3c45fff6f62cc9d8878b1d2ae8ecb39d2057ea6dbecba8d7da495f74 +size 8445 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0001.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0001.json new file mode 100644 index 000000000..82e834897 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf0ec347a86e01b6d90f5d149f26bc97b3d10984fd19815bc5db641d2745d23 +size 8258 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0002.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0002.json new file mode 100644 index 000000000..2f1f8efaf --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89dd56c6e1b04caeebb0a392fe7f5c9f79fa2fa95dc50569bb649a6092266327 +size 8377 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0003.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0003.json new file mode 100644 index 000000000..e8af57f2e --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0003.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c254daadad8c567aed44fb75b4e53b61b38816fe710a6d1e3ac7acef01aeab +size 8377 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0004.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0004.json new file mode 100644 index 000000000..5170f3057 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0004.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c517f0cb668eb570266b1dc9e6246872e22299f6e050315535019b08015ab202 +size 8446 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0005.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0005.json new file mode 100644 index 000000000..deb4e4bb5 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0005.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:497e35e3d78278b79047795ceccbd0ece37cbfcd21c7d3898cc70320eb924060 +size 8259 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0006.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0006.json new file mode 100644 index 000000000..6b694b51f --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0006.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c684232e36da2520cc9df65e69729da1cfc95f344897cf25616508b8b626908e +size 8378 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0007.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0007.json new file mode 100644 index 000000000..3d18a57dd --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0007.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56cf51825e79420f6081ff7005528ac7944ef29e3dedcbd9c37d49c3140d1673 +size 8378 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0008.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0008.json new file mode 100644 index 000000000..4404f7f07 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0008.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca32aafab78ee132141a572e7dce5d137dc3d94370bac3efe610ec46132f46d6 +size 8446 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0009.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0009.json new file mode 100644 index 000000000..86896b84a --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0009.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b12ab40701ebdc53f4da237c71107736252c3f25bdae88b028eda02673c308 +size 8259 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0010.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0010.json new file mode 100644 index 000000000..e247daa1d --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0010.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1393b46efdb0679a6e7c2decfc216cdbe05082118597b53d72e514aed5fc2826 +size 8378 diff --git a/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0011.json b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0011.json new file mode 100644 index 000000000..fab470726 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k/isb1_sess_tool_free_memory_resume_001_0011.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc31d4126d5eb6e54e9af4ba554cc5b71038de0b5e406b4f9b0a84e38f82ee5 +size 8378 diff --git a/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01.json b/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01.json new file mode 100644 index 000000000..c22f2fb06 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caa872caa9d33985a19954712466b6d46fe982aa9bee2a8bbb1b850be9ce2b30 +size 2372 diff --git a/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01_0001.json b/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01_0001.json new file mode 100644 index 000000000..41d93eb2e --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7620c05724a6b02392b5b95277fac6f28a0dea2edceeec5c28ddfcf19dccf1e7 +size 2373 diff --git a/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01_0002.json b/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01_0002.json new file mode 100644 index 000000000..536cd8776 --- /dev/null +++ b/datasets/isb1/converted/core/chat_8k1k_qwen3.5/isb1_sess_chat_lc3_contract_review_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c597f0ad3f46aad040beceac30921c0a774eddd0c225b6ecb4736cc85e7304 +size 2373 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01.json new file mode 100644 index 000000000..b2d78fc85 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228fdf2065df12f63481c4440db4c67c2616bda22ab2711025360baf29f6fb1f +size 2401 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0025.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0025.json new file mode 100644 index 000000000..8b38fca7d --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0025.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c373636a41b2387979e5b9727c34cb10becf29158ac57298bd257a3e33043dd +size 2335 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0026.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0026.json new file mode 100644 index 000000000..f77cb30fe --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0026.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:becdbb0fb66beea439832ca828d447b8898c922b2bcd9f6df8791da7d520958d +size 2377 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0027.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0027.json new file mode 100644 index 000000000..49ebc3f9e --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0027.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f9bfaebcdedf0623da29d4782fc26a1d76307c3b39dbd3e3fbbe3672b1aeb4 +size 2377 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0028.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0028.json new file mode 100644 index 000000000..2115ed500 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0028.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e6a9b7b6a3c186179d9734dc3c9eff99bcce8a42fb3c62319551c1cfe01e4b +size 2402 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0029.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0029.json new file mode 100644 index 000000000..b26c2ccb9 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0029.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99c14475b0023dfeb1ae12a23930601e7ecae719c6bcd22afd69fb94b9c27e90 +size 2336 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0030.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0030.json new file mode 100644 index 000000000..ce08beecd --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0030.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b616707f813b632f41b6a2dbfb7a8fa77758cecf8f62d6b7b7b192c9f0b0e57d +size 2378 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0031.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0031.json new file mode 100644 index 000000000..ecc7db393 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0031.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c1f9168bd06050007b11231eae3a4ce2a091d3b64bf34fed05447c55efc836 +size 2378 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0032.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0032.json new file mode 100644 index 000000000..1a645728d --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0032.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529dd8da2cb64862f8065283a1d2a0b2f75635931da935ce9db2482cf33bdc9c +size 2402 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0033.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0033.json new file mode 100644 index 000000000..f25844e95 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0033.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1dcefa12491d06065823f209a2770046ad8ba8641506d7bfceaa674aeecd97e +size 2336 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0034.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0034.json new file mode 100644 index 000000000..c8f36e610 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0034.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a6a137188bfb0fa12b8569de694d6446a7dd52f4481ce4e14ca3b5b0e550ff +size 2378 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0035.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0035.json new file mode 100644 index 000000000..2553d6608 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_code_ca1_agent_benchmark_plan_01_0035.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f313bea274207cc269c564f5800ca1b942430f127628e7127797deeec6d424 +size 2378 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001.json new file mode 100644 index 000000000..dfdb91bb7 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68dffcda05a8eab2e94918ae18eeb9b5a0fd2ea5ddac42490d214a0547f31bf +size 2784 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0001.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0001.json new file mode 100644 index 000000000..7c3dab242 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37c6699eb2f233157f6e8d26287aa8e0d6d98581cc36e7235771deca8ea3b92 +size 2707 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0002.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0002.json new file mode 100644 index 000000000..2683d82da --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4f8492fe921da1b3a515f6acc4fe4266214b2571e8fd9cf236f027eb469d0c +size 2756 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0003.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0003.json new file mode 100644 index 000000000..8a970849d --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0003.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:006ba691e4754f142ffde833e4d49c2b8038e414aeeaf1ea962ee0c0470d56d9 +size 2756 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0004.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0004.json new file mode 100644 index 000000000..adb92e285 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0004.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3baca5e56f471e05e34bd32b76520a94ddf9c5ef9b23dae6fbf7e2565f4f5ea +size 2785 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0005.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0005.json new file mode 100644 index 000000000..ed3894147 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0005.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7818e9a45227acfe746ecce353cb2d395b8f03f03f9365306df2f8c4a2fce6f2 +size 2708 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0006.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0006.json new file mode 100644 index 000000000..c44120a5e --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0006.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802d7b4d051b3c02f7219edad18540ef339735b35f55cebecd324784bc13f6ae +size 2757 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0007.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0007.json new file mode 100644 index 000000000..c1b6ab46b --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0007.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115595c48ca72295cb99deeff81bb6c9b25c877c0a7d72b59d05cb4995b6c276 +size 2757 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0008.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0008.json new file mode 100644 index 000000000..829e1e445 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0008.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19c8304459bed86d8c901b785edcff750451337868d0d0e373d819cdbb69660 +size 2785 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0009.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0009.json new file mode 100644 index 000000000..ba2d03d5d --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0009.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d4e3bfd6913cade38921bc52e7447445d957e1304cd133914fd50931142993 +size 2708 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0010.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0010.json new file mode 100644 index 000000000..e6594d3c6 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0010.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851ba58a552a39ea9cf5123ce7e642bf00c3d7756b65aafdc6aec9a730799c84 +size 2757 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0011.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0011.json new file mode 100644 index 000000000..b2bd898e4 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_debug_repair_repo_001_0011.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b00d4d905ea1161df0007acc8de86913173090352d9725a38f6a8d2b0fc21321 +size 2757 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982.json new file mode 100644 index 000000000..3c7d72a08 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7bced17c233960a2d61274ea342a2bfcddda9763feaee9c8de77abb865ce78 +size 3270 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0013.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0013.json new file mode 100644 index 000000000..fd98b87ac --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0013.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b86d7b3773d965c200f734e10aaeb0d5a58ccce907bc6d12a88141eda7e36263 +size 3182 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0014.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0014.json new file mode 100644 index 000000000..7876c3d0b --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0014.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435e61feb29c8527973126324c81aef085c3e27ca3a9570a8b08766c4b468e4d +size 3238 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0015.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0015.json new file mode 100644 index 000000000..4a8db8812 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0015.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b904ef36516f004c8e7e6bc082c4cb6fb1958a7f354a781a2fa67705bb0baacd +size 3238 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0016.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0016.json new file mode 100644 index 000000000..a2533de83 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0016.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021ac97814c5c44835b76d548f6ea0db809cfb26e918fa11c110cf5d70c67e7c +size 3271 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0017.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0017.json new file mode 100644 index 000000000..b42712a6c --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0017.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb660d330dff8500047fb45000c890a1e0a922dd24d4d0d5e669f31438e29db +size 3183 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0018.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0018.json new file mode 100644 index 000000000..448693c0f --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0018.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0dbfd8c652ea9c5e4686f19b7226c67e4632e00055ed941e91ae01d53e2f56 +size 3239 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0019.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0019.json new file mode 100644 index 000000000..60d6c72a6 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0019.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bee5128892e928afd957490a9d47e7dada73c296bc3c9f146d4c03ea9161a6b +size 3239 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0020.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0020.json new file mode 100644 index 000000000..cb66bdc3f --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0020.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56405fb996608d6373fb500d03d1f9617a7916bcbd7bbabf967e6b7db4ab6cec +size 3271 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0021.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0021.json new file mode 100644 index 000000000..4c79b8f57 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0021.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4447403d1c4d08cc334a458241bb85ea0eab4d29236a450bf3bad0bd4353565 +size 3183 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0022.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0022.json new file mode 100644 index 000000000..baad5558f --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0022.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6165657ee626fb9a507d9b39418187654411119d1434c39af71c5fc3f035f63 +size 3239 diff --git a/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0023.json b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0023.json new file mode 100644 index 000000000..f647a5fa8 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k/isb1_sess_offload_cliff_9982_0023.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b9fb5f30984d54379a4f8e72ab3c42f720a3c4399656868c20a716838e3164 +size 3239 diff --git a/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01.json b/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01.json new file mode 100644 index 000000000..166fdc3d7 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:022ac4792b506df3b42bd605fc796ca38c3114c91dcf0b3492c3f70a917071a2 +size 2407 diff --git a/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01_0001.json b/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01_0001.json new file mode 100644 index 000000000..e629502c4 --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b0b285c9dad4335c18945ab55e31d7bc0dda2d2109014903719c42aec4d39c5 +size 2408 diff --git a/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01_0002.json b/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01_0002.json new file mode 100644 index 000000000..59faaebdc --- /dev/null +++ b/datasets/isb1/converted/core/code_8k1k_qwen3.5/isb1_sess_code_ca1_agent_benchmark_plan_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d28f64eb09f50b8bf9e6e7f0291db3bc1c08db7b76cccfdbc0dcc114b360e04 +size 2408 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01.json new file mode 100644 index 000000000..323c64b7c --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bd56f01e5090d703d0526c11e96e9cbff3cf1c097c486211ece05acfbd3f6b1 +size 4471 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0001.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0001.json new file mode 100644 index 000000000..9c6173124 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f45f7941444d1a3c566163063418333ec3e62eb3cbd9bd029547f7f6f1c0a76 +size 4350 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0002.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0002.json new file mode 100644 index 000000000..793bd64c9 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd8c5b09579a9415d7d414a3896c3c4269df8099bf8a23f49c5eeb7103dd59c +size 4432 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0003.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0003.json new file mode 100644 index 000000000..931e7b086 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0003.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81b99077228bcb9c152b252688ef57f146273cc004abdca230e93dd630f2b9ee +size 4427 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0004.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0004.json new file mode 100644 index 000000000..e53fa1dde --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0004.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ab461192902708ec092d408e93416c7c0af125ba283aa61e7c2e0f137599a3 +size 4472 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0005.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0005.json new file mode 100644 index 000000000..831a7c9df --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0005.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd50f79726ace6b1c7da24c2ad958c4de3de7ab75e030ae0b9c3c9644e6c056d +size 4351 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0006.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0006.json new file mode 100644 index 000000000..2faf8ca27 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0006.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcc6eac63fe7778dc95f47b6d765897907fd47093aee224ba22ad6297bb0fc8f +size 4433 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0007.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0007.json new file mode 100644 index 000000000..586d05e6e --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0007.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dcbeacbe9c7cf8415da9141a5fa870d87c5b75b84b0721d91de68aeebe38509 +size 4428 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0008.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0008.json new file mode 100644 index 000000000..2edfcb63d --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0008.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3a7389ad3bc2647502821926867d13dfdd2e8100b228392198d5c0e2fd9351 +size 4472 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0009.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0009.json new file mode 100644 index 000000000..944e818d4 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0009.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec67230bd0ddc6525b348730ab869f6fbf87a839b64bbfdedbef50f3d52574d7 +size 4351 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0010.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0010.json new file mode 100644 index 000000000..b19f75e4f --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0010.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f401813df485734fcd7bda36aa17a8b81087005180c99c719f65e3c2e4112c0d +size 4433 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0011.json b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0011.json new file mode 100644 index 000000000..d859d3153 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k/isb1_sess_xlc1_text_resume_bridge_01_0011.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d177f9357a51503adbd8af9a410ebf8d5e56ccd9429fede9735a70e2f4c7d6e0 +size 4428 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01.json b/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01.json new file mode 100644 index 000000000..323c64b7c --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bd56f01e5090d703d0526c11e96e9cbff3cf1c097c486211ece05acfbd3f6b1 +size 4471 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01_0001.json b/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01_0001.json new file mode 100644 index 000000000..e53fa1dde --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ab461192902708ec092d408e93416c7c0af125ba283aa61e7c2e0f137599a3 +size 4472 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01_0002.json b/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01_0002.json new file mode 100644 index 000000000..2edfcb63d --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k_dsr1/isb1_sess_xlc1_text_resume_bridge_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3a7389ad3bc2647502821926867d13dfdd2e8100b228392198d5c0e2fd9351 +size 4472 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01.json b/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01.json new file mode 100644 index 000000000..1ce2fa5c1 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261ea0938d4768ea20dec329c076174d0f8a44a052e2f02acc9ac6361c8292fa +size 4487 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01_0001.json b/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01_0001.json new file mode 100644 index 000000000..cb27e8f77 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1788d56651677b95e4acc0ebc88a2218054aa373ca22390d569ea248f92d673d +size 4488 diff --git a/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01_0002.json b/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01_0002.json new file mode 100644 index 000000000..479e9b01e --- /dev/null +++ b/datasets/isb1/converted/extension_131k/chat_131k1k_qwen3.5/isb1_sess_xlc1_text_resume_bridge_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8a90183c998f8abbb8504daa48c89bf66dee898cceeba40a6c8ec9ef9b965c +size 4488 diff --git a/datasets/isb1/converted/extension_131k/code_131k1k/isb1_sess_cache_xlc1_text_shared_prefix_swarm_01.json b/datasets/isb1/converted/extension_131k/code_131k1k/isb1_sess_cache_xlc1_text_shared_prefix_swarm_01.json new file mode 100644 index 000000000..88cc7cfa8 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/code_131k1k/isb1_sess_cache_xlc1_text_shared_prefix_swarm_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd915ed136e1d16788da57d18708969b408d445c408183c65a59520b891d2b6 +size 263363 diff --git a/datasets/isb1/converted/extension_131k/code_131k1k_qwen3.5/isb1_hb_depth_cache_xlc1_text_shared_prefix_swarm_01.json b/datasets/isb1/converted/extension_131k/code_131k1k_qwen3.5/isb1_hb_depth_cache_xlc1_text_shared_prefix_swarm_01.json new file mode 100644 index 000000000..18c269447 --- /dev/null +++ b/datasets/isb1/converted/extension_131k/code_131k1k_qwen3.5/isb1_hb_depth_cache_xlc1_text_shared_prefix_swarm_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aae25baf94a0f8d1c5b4ca49fa684fa1844366b2a2bc0ade25b162f5ce8c773 +size 263379 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01.json new file mode 100644 index 000000000..1df87abf4 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81111f0fc9153920bbcdaec6e23f7fe4b539adfc68ddd211bbbc5f97f6f61bbf +size 5146 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0001.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0001.json new file mode 100644 index 000000000..27bf25cb7 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c8202aa5e0aad871827d94babcbb4f5d2b9a13fedb9119f4b4f5e6803ea0ed3 +size 5025 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0002.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0002.json new file mode 100644 index 000000000..dbf28066d --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfeb547b1f838f821d06043538f1ca358086fe719fdedf7579fe153da0d29019 +size 5102 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0003.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0003.json new file mode 100644 index 000000000..1f4519732 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0003.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e37be49b80578e31798c447baa3a5016baf6f10639abd7b7cfe0220af203ad18 +size 5109 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0004.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0004.json new file mode 100644 index 000000000..aadc25c8e --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0004.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f0b45305cb2a3328dc44b0ee32e202f09b063a6295391e990e2942ce4ad44d +size 5147 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0005.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0005.json new file mode 100644 index 000000000..57e9ab3dd --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0005.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bbafbac43e8a55570ff4e736708be054f18a10ef2a620b2eed47bdc35fe20a6 +size 5026 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0006.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0006.json new file mode 100644 index 000000000..2dc12c3a6 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0006.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06040e76b3f9b4b33f6bb8bb6ad850fab7a9d646dca1cbb5ff87afb60583e9d7 +size 5103 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0007.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0007.json new file mode 100644 index 000000000..e63e2e8fe --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0007.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cedb5842f6eaf58cdd062e97f95125dfdec76000df9685bb4a50907d6f8963d +size 5110 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0008.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0008.json new file mode 100644 index 000000000..c1c65b310 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0008.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae80514ebac0758d3b7092aaefd792c6fcc659cdbc1eb7567fd15abc833fa4d +size 5147 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0009.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0009.json new file mode 100644 index 000000000..9ed6d3bc8 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0009.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4b5750450b5f0d666cb6892ecc365aa2e673ee11ab68882789dbc31c97f807 +size 5026 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0010.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0010.json new file mode 100644 index 000000000..cde215170 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0010.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde875b3d31b7aec6d06166dcf938208f00bbdc26a47702228dd796911db6906 +size 5103 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0011.json b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0011.json new file mode 100644 index 000000000..b51664086 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k/isb1_sess_chat_lc2_resume_reasoning_01_0011.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5cb7cf3299c29aeef48f1903dc3c66dd8caef5833f737ff4528ed1fd28801d +size 5110 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01.json b/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01.json new file mode 100644 index 000000000..a79b872b3 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cfbc883664bfa60fa0c1ff7895c83b31d5e0551279544ce38db9e04e75cabe5 +size 5164 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01_0001.json b/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01_0001.json new file mode 100644 index 000000000..6e32fc5be --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbada52e4b8b50a488e77a9102866244b56620042f3c5d238817b80efe203b8 +size 5165 diff --git a/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01_0002.json b/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01_0002.json new file mode 100644 index 000000000..a8cd8c71b --- /dev/null +++ b/datasets/isb1/converted/extension_32k/chat_32k1k_qwen3.5/isb1_sess_chat_lc2_resume_reasoning_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3318dfcb10f5d990770616cf5d0f0538b4bf8da1d567f88dfdd5bea168112409 +size 5165 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7.json new file mode 100644 index 000000000..3dcf84566 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898dbe2c1a03f87a522cf33e310d710ac8223ed1f94b1ee8c7a9100fda9431b6 +size 2804 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0001.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0001.json new file mode 100644 index 000000000..08441a345 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aea5d8909200b74ecec71b915ee017ee9710f618fbe3a69e7f54abbbc8c100b +size 2727 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0002.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0002.json new file mode 100644 index 000000000..260690e2b --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1294b6545ed7b10372c6ef19cb3e8e0328d94bda306f8186362533ee40802c2 +size 2776 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0003.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0003.json new file mode 100644 index 000000000..af2479a77 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0003.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998fc4cdb3d991960ad7858d5231a921fefadd5bd02b2b306a472c81e2119c8d +size 2776 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0004.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0004.json new file mode 100644 index 000000000..ca8a964a5 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0004.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f214b18f52865b839ac93ceecbacee733a7071b84a7e571298eec5a6d50c85 +size 2805 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0005.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0005.json new file mode 100644 index 000000000..d479dd22b --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0005.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4adf2f4c220a7bc9416b513b5163a51e092dbc2417393ed67d426735a6fce922 +size 2728 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0006.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0006.json new file mode 100644 index 000000000..c9c098182 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0006.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b59c4809a87806e867d8e220204ca209ca529cbc7d2d5a652798034128d1a3d +size 2777 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0007.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0007.json new file mode 100644 index 000000000..4e27df91c --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0007.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e643421e2790c88f1bbc604ded96735f9091aa8bd3171aa69d277c557636256e +size 2777 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0008.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0008.json new file mode 100644 index 000000000..ff3c101f7 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0008.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a112a6c624844e5bef52cf1cb7523bd701b1529d6881ddcaed845eb3bc1d28c +size 2805 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0009.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0009.json new file mode 100644 index 000000000..7addccd14 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0009.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aae820940a72609eb6d89fd2bff155f951a2f3d6d2f3e246ee66795a7c7497ec +size 2728 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0010.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0010.json new file mode 100644 index 000000000..0ff8b11f8 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0010.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e796dc6e7d585511eb80b4b5ebe9d75c26f02f1806f5a65900d9e8844f2464 +size 2777 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0011.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0011.json new file mode 100644 index 000000000..48c92905b --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_2c2a96a7_0011.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f9a6774bac4f0c84077cf790e98f234ca8af29557b0de8375dec82d92ca7b69 +size 2777 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01.json new file mode 100644 index 000000000..bf05252e9 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ede46fa67491a65f6eaf65c2831c5db29fa1d97e547c6ff5f8dfda4a8ee3ab +size 2327 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0013.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0013.json new file mode 100644 index 000000000..9ac87542e --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0013.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd5748d6bbe1887f1e15805c926d5298d4f2e3c8c63d7eaa70bf33db78ed4e7 +size 2261 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0014.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0014.json new file mode 100644 index 000000000..6a18175c4 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0014.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6ca628bf59a22aca1980873f31e2aa9ea708e44a34bb719a17a85d4f9e75b23 +size 2303 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0015.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0015.json new file mode 100644 index 000000000..0e54cb5ea --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0015.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:037e492903c768d94e11464a4214bf1b131ed76d4b0193b3604df7a8b6479a12 +size 2303 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0016.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0016.json new file mode 100644 index 000000000..cf9c9f602 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0016.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42eb70b9fa39336c31220328e7db575f173af44e37884be2fb7b5e62a57c22ed +size 2328 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0017.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0017.json new file mode 100644 index 000000000..1b2d9bf04 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0017.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bb4ce1e1a68b68ce379ad09da2c09a57b05ce64a7bdfbc5e47defae8418da7d +size 2262 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0018.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0018.json new file mode 100644 index 000000000..8539ee4cb --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0018.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30d39c8419bebc2ca5ffa7ca2056d07b1e114d8fc76961e5cf3fe4aeeb07f5f3 +size 2304 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0019.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0019.json new file mode 100644 index 000000000..8c176a52f --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0019.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d4f89c5e481a5b93136053ced19de76d23ea8ad3bf195865cb7bcb62c777af +size 2304 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0020.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0020.json new file mode 100644 index 000000000..c8356ca18 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0020.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd27528ed68ab47637903d86c011a3a0074690966a8d0f8f35e56371f553581 +size 2328 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0021.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0021.json new file mode 100644 index 000000000..5580594fe --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0021.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9eb0005d487df58324b70d717923be60a89c5e5db89026a58f5aee7854244d5 +size 2262 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0022.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0022.json new file mode 100644 index 000000000..b2c99d2d5 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0022.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca1e74d84c28eec8161eab5fcd475f96bc0f76556adaa42f43403ad0ea9a65a +size 2304 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0023.json b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0023.json new file mode 100644 index 000000000..08ad2f5b9 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k/isb1_sess_doc_comp_fanout_01_0023.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b392ebeb6c0b86f2ef2e6fd1275e61d298c526d5dd32e3ba97b09c99531e3abf +size 2304 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7.json b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7.json new file mode 100644 index 000000000..65c643380 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f373c4011208b2d97a1e6a0a86accfdd904ebc833f46c5e5b200cc264c57f39 +size 2811 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7_0001.json b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7_0001.json new file mode 100644 index 000000000..b73906f19 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5879b6e1996c18fe78e80c2e9495ad84dad2e85aa2dcc307d90a6fdef416f7 +size 2812 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7_0002.json b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7_0002.json new file mode 100644 index 000000000..1735d3bdd --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_2c2a96a7_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c926e04d21d2648b6ccc196b14182e06a4593452c0d6c1bec4e37c7d98994dc9 +size 2812 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01.json b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01.json new file mode 100644 index 000000000..a81c959a0 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efb687c57a2246be7c98f1d502da42c1b565c0368b4292f476b9c2a78c91ba5 +size 2333 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01_0004.json b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01_0004.json new file mode 100644 index 000000000..ca01016a2 --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01_0004.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1cb0ba5bc2093d1525b09ae13e39b8db1a4fa436025cf389f2320765dee1afe +size 2334 diff --git a/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01_0005.json b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01_0005.json new file mode 100644 index 000000000..8703faadd --- /dev/null +++ b/datasets/isb1/converted/extension_32k/code_32k1k_qwen3.5/isb1_sess_doc_comp_fanout_01_0005.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c6a77ff4c9f1b5ad86f9fedb63790372850f11f7b9e8d420dfa140c084b2ed +size 2334 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01.json new file mode 100644 index 000000000..a14eb48da --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392dcd8298eeeca109a4c349fbcbd085c87bcd5aa32a5a06c593b8e3f4072f35 +size 7684 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0001.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0001.json new file mode 100644 index 000000000..6b3233162 --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f601f4fa6ac44b82555eac3a88396b98c3dd700c0a157743a799e0f65653cd +size 7519 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0002.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0002.json new file mode 100644 index 000000000..73e206072 --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75a018eb65ec3e3d3074e588c70ade1fb465ee21806a0b26c89c7508e5f6fab +size 7624 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0003.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0003.json new file mode 100644 index 000000000..240ad8dcc --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0003.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d8fd9574a23e0d71976e13023fc9073c281fe4a58109bffdd421c2cb6d53ecf +size 7631 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0004.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0004.json new file mode 100644 index 000000000..32fd5bded --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0004.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d7c11a14ab5754a53860d4c587a086926d69826ef1db2c103c033357fb53d4 +size 7685 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0005.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0005.json new file mode 100644 index 000000000..ff71c156d --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0005.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec0b81adb90c381e352c0a14f5c384a483f7b4bac68df8b2a0d606a88af1ae9f +size 7520 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0006.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0006.json new file mode 100644 index 000000000..c7699b66a --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0006.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23388ddf4ce42292403bc1c21a0009b3ed6fe7457bc726ad46e0691c63f49fe6 +size 7625 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0007.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0007.json new file mode 100644 index 000000000..f1920117c --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0007.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9f3dc90c8a293f2001cc01d3659b7b45b0c37225020f179b0599488d8344a7 +size 7632 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0008.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0008.json new file mode 100644 index 000000000..991e10709 --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0008.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39166450bd02636042142071bae92b87d3791d7d1ff3963484e4bf49a27bd1ff +size 7685 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0009.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0009.json new file mode 100644 index 000000000..c0a75c91b --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0009.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d323c45d881962162631d95c5dd529c74f82f3bfd6689dd1295b613b3c50de35 +size 7520 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0010.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0010.json new file mode 100644 index 000000000..758cac801 --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0010.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94674bf6b28a393a8c5a8da3d5479191d5c8735d6623d9708bba7e899a1fe3c6 +size 7625 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0011.json b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0011.json new file mode 100644 index 000000000..44aa5e7fb --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k/isb1_sess_chat_lc3_multi_day_strategy_01_0011.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17bdda94141044a3ea36ff67cf038366ed8ea0845ffcc918f6b47ac75c21c7d3 +size 7632 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01.json b/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01.json new file mode 100644 index 000000000..17e78bdae --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2be8596b4e2ebbd20afdbbb03ef7914f575924a98eee84d0acd5c35fbfae22d +size 7706 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01_0001.json b/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01_0001.json new file mode 100644 index 000000000..7cc9d4d51 --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:177b67dbf79b22bbd0760e063ef5569f72d9d9385a1d88c007ab4f5679c286e4 +size 7707 diff --git a/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01_0002.json b/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01_0002.json new file mode 100644 index 000000000..eee60953c --- /dev/null +++ b/datasets/isb1/converted/extension_64k/chat_64k1k_qwen3.5/isb1_sess_chat_lc3_multi_day_strategy_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ab968d40f31e4780c53775a7d51454217abf24040e3d8cd0709727badf5db6 +size 7707 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01.json new file mode 100644 index 000000000..bf9718acb --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e4d7e653c3f4ee4c422b8923b82580a5134be6550819d484eb5f5e0d51f457 +size 2746 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0001.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0001.json new file mode 100644 index 000000000..55e026e7c --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:278c37d6ce1fd114ab5e6bf0abce101db23dd8b7b32b237952bbadc40f92a487 +size 2669 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0002.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0002.json new file mode 100644 index 000000000..ad27fa0dd --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbeb3201f01eafeea2933baeec39072dddfd0b5a08812e9321da2dd5e823df31 +size 2718 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0003.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0003.json new file mode 100644 index 000000000..dbb7d2fee --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0003.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd61a8e0a839f576224db4308d131e2cd4da5d834fe2a728cdb954d7665f9172 +size 2725 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0004.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0004.json new file mode 100644 index 000000000..993d6845e --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0004.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fca2e437912b0f209119fb524b8896c782d4149e5237436e550ba661784a782 +size 2747 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0005.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0005.json new file mode 100644 index 000000000..061cb6600 --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0005.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f9bd4e2c84710766698e604e887383ae692176bdc08ffec80e210cc6b28e39 +size 2670 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0006.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0006.json new file mode 100644 index 000000000..b8bf217ea --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0006.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f641c5bc45b0be1938fb6067baa59f1cfe8687d09da1fe78e1dfde09d07784 +size 2719 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0007.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0007.json new file mode 100644 index 000000000..a22ff65e8 --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0007.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764b5d116a9ab258dbb3f39455823e04728171df2078edd8d13ef10d57ae2ebe +size 2726 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0008.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0008.json new file mode 100644 index 000000000..38109b7ab --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0008.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e9a919db6baad72001517ee6335641e9d03abbb24a03616ae388cd943ca1d0 +size 2747 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0009.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0009.json new file mode 100644 index 000000000..c256ddd8d --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0009.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126da8b3fed24e28358a905b3051b0e5f345f0f24ba96e06c1edebf55903d0bb +size 2670 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0010.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0010.json new file mode 100644 index 000000000..654a0368f --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0010.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8b21b503bd9bea0e8315b8658392b67b7b661fb41ebb1273764ba8fdf999f9 +size 2719 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0011.json b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0011.json new file mode 100644 index 000000000..5b491122d --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k/isb1_sess_optimizer_01_0011.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fb7638ce1fee522641a81199668fa15c34bb544c5daa8ba6b40c1e7198b51b +size 2726 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01.json b/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01.json new file mode 100644 index 000000000..4e5c9b718 --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfe48c31bb12a455d929a2abcdd15a4ccb865918c5fbd2b2809e7967fdecfad +size 2760 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01_0001.json b/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01_0001.json new file mode 100644 index 000000000..a02eca75f --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00b21670c05c9406b0b74883f8fe5586fb1c979ec550b174721631b0150b8d1 +size 2761 diff --git a/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01_0002.json b/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01_0002.json new file mode 100644 index 000000000..cbf22a9df --- /dev/null +++ b/datasets/isb1/converted/extension_64k/code_64k1k_qwen3.5/isb1_sess_optimizer_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe1f712db16fb8a7dd2186bd053f728179de328009c563f4683bcb2151a417d2 +size 2761 diff --git a/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01.json b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01.json new file mode 100644 index 000000000..c4c0876cf --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d451812144b3b944d058b8e7ede4fb7398f54bbbf8cf8b2f431985ee06fad118 +size 3299 diff --git a/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0001.json b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0001.json new file mode 100644 index 000000000..03a2dfb81 --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73df71fde60d50ee90c8042d52489c06af17c37537ff94b0dc172b3287929fb2 +size 3298 diff --git a/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0002.json b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0002.json new file mode 100644 index 000000000..db0d40be0 --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__chat_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f779829ac901f6ad41b269866f52dd77b42ecc6bda879af888e45169b61d26 +size 3299 diff --git a/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01.json b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01.json new file mode 100644 index 000000000..44bfb70d2 --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:511843bdadda58bb006e1829df648ade5c9740fd7c745a2a108f15a9cf820482 +size 2222198 diff --git a/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0001.json b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0001.json new file mode 100644 index 000000000..447093133 --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07072bc586a42f1b625b70f5fec9187b73b1c93cc4d6051466299ba57a80d3e0 +size 2222197 diff --git a/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0002.json b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0002.json new file mode 100644 index 000000000..74e3303ff --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_1m/inferencex_trace_replay__coding_qwen3.5_ulc2_1m_preview_v1/isb1_hb_depth_cache_ulc2_offload_cliff_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12dcf7565874dfb729b12d069fbbcefdff2d0a0cfb280f553dc63e71111cc1d +size 2222198 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01.json new file mode 100644 index 000000000..dde417aec --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6bbdedbcceb93c1f384fb00d18d506365709299ba6dfb149c6d638d659c982 +size 2234 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0001.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0001.json new file mode 100644 index 000000000..5072474af --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df24fdcda9a63909d62b78cbf9cf84882240ba25e3654970311baf3d6a89e6b9 +size 2235 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0002.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0002.json new file mode 100644 index 000000000..948b19e5d --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3158216c0d1572c28d1a003eebd889bbb0c56c0d4c29769ae983bfe37b83ea +size 2235 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01.json new file mode 100644 index 000000000..172bfd7b8 --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89f7153acbef8a83f78601ed655f5d3a0907feb30f18e2b4994b96cd703de94 +size 2269 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0001.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0001.json new file mode 100644 index 000000000..47dc53b26 --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:638194fafecfd1183f4e3787727c8bce03bded3d6237492c9db2fbcbe9d8226e +size 2268 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0002.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0002.json new file mode 100644 index 000000000..be419291d --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__chat_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec7a67c68bdb06918f0dd05d0fd5ed7c46ad3fc3600a65ec5278d569f6588bb +size 2269 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01.json new file mode 100644 index 000000000..dde417aec --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6bbdedbcceb93c1f384fb00d18d506365709299ba6dfb149c6d638d659c982 +size 2234 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0001.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0001.json new file mode 100644 index 000000000..5072474af --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df24fdcda9a63909d62b78cbf9cf84882240ba25e3654970311baf3d6a89e6b9 +size 2235 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0002.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0002.json new file mode 100644 index 000000000..948b19e5d --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_gptoss_xlc2_500k_preview_v1/isb1_sess_cache_xlc2_hot_cold_session_mix_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3158216c0d1572c28d1a003eebd889bbb0c56c0d4c29769ae983bfe37b83ea +size 2235 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01.json new file mode 100644 index 000000000..fdea31cc4 --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ef4a8272516a3adca7280055169382737ad70e3653201f92dfc892c91f92f5 +size 587823 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0001.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0001.json new file mode 100644 index 000000000..e83480d06 --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0001.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bbd11d756040feaf746e7835569ea62665c4eba011dc4e3db81c1c125672666 +size 587822 diff --git a/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0002.json b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0002.json new file mode 100644 index 000000000..dd2f2ac3a --- /dev/null +++ b/datasets/isb1/converted/preview/long_context_500k/inferencex_trace_replay__coding_qwen3.5_xlc2_500k_preview_v1/isb1_hb_depth_cache_xlc2_hot_cold_session_mix_01_0002.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4580b229e9622a0f50e1c4b9280d4bcfd9a1481fa57b8ffd154e9c35401d4ed0 +size 587823 From fd73c8a01859565b6f61584822d5c57ba579df2a Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:32:56 -0700 Subject: [PATCH 02/13] feat(isb1): add drop-in sweep config for kv-cache-tester (PR #993) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Schema-parity sibling of .github/configs/multiturn-agentic-trace.yaml with 16 ISB1 sweep cells across H200/B200/MI355X/H100 × multi-scale workloads (8k/32k/131k/500k-preview/1M-preview) × multi-model (Qwen3.5, DSR1). Follows Cam's exact tp / users / offload / ep schema. Consumers either merge these top-level keys into multiturn-agentic-trace.yaml or extend the sweep loader to glob multiturn-agentic-trace*.yaml. Co-Authored-By: Claude Opus 4.7 --- .../configs/multiturn-agentic-trace-isb1.yaml | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 .github/configs/multiturn-agentic-trace-isb1.yaml diff --git a/.github/configs/multiturn-agentic-trace-isb1.yaml b/.github/configs/multiturn-agentic-trace-isb1.yaml new file mode 100644 index 000000000..9921d3bc4 --- /dev/null +++ b/.github/configs/multiturn-agentic-trace-isb1.yaml @@ -0,0 +1,54 @@ +# ISB1 sweep cells for Cam's kv-cache-tester replay flow. +# Schema mirrors .github/configs/multiturn-agentic-trace.yaml. +# Merge these top-level keys into that file (or extend the sweep workflow +# to glob .github/configs/multiturn-agentic-trace*.yaml) to include ISB1 sweeps. +# 8k code cells map to datasets/isb1/converted/core/code_8k1k/. +# 32k chat cells map to datasets/isb1/converted/extension_32k/chat_32k1k*/. +# 131k code/chat cells map to datasets/isb1/converted/extension_131k/*_131k1k*/. +# 500k preview cells map to datasets/isb1/converted/preview/long_context_500k/. +# 1m preview cells map to datasets/isb1/converted/preview/long_context_1m/. +# Expected TRACE_DIR is either datasets/isb1/converted/ or one of those subdirs. + +h200-fp8-qwen3-isb1-code-8k: + tp2: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} + tp4: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} + +h200-fp8-qwen3-isb1-chat-32k: + tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]} + tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + +h200-fp8-qwen3-isb1-code-131k: + tp4: {users: [1, 2, 4, 8], offload: ["on", "off"]} + tp8: {users: [1, 2, 4, 8, 16], offload: ["on", "off"]} + +b200-fp4-dsr1-isb1-code-8k: + tp4: {ep: 4, users: [4, 8, 16, 32, 64, 128, 256], offload: ["on", "off"]} + tp8: {ep: 8, users: [8, 16, 32, 64, 128, 256, 512], offload: ["on", "off"]} + +b200-fp4-dsr1-isb1-chat-32k: + tp4: {ep: 4, users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + tp8: {ep: 8, users: [1, 2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} + +b200-fp4-dsr1-isb1-code-131k: + tp8: {ep: 8, users: [1, 2, 4, 8, 16], offload: ["on", "off"]} + +b200-fp4-qwen3-isb1-chat-500k-preview: + tp4: {users: [1, 2, 4], offload: ["on", "off"]} + tp8: {users: [1, 2, 4, 8], offload: ["on", "off"]} + +b200-fp4-qwen3-isb1-chat-1m-preview: + tp8: {users: [1, 2], offload: ["on", "off"]} + +mi355x-fp8-qwen3-isb1-code-8k: + tp2: {users: [2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + tp4: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} + +mi355x-fp8-qwen3-isb1-chat-32k: + tp4: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]} + +h100-fp8-qwen3-isb1-code-8k-lmcache: + tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]} + tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + +h200-fp8-qwen3-isb1-debug: + tp2: {users: [2], offload: ["off"]} From 119a037d29c11a5fb196247afc7a69c9265aa29e Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:35:30 -0700 Subject: [PATCH 03/13] feat(isb1): add kv-cache-tester trace schema validator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tools/validate_kvcache_tester_trace.py — stdlib-only CLI that validates any trace JSON against Cam's kv-cache-tester schema: required keys, block_size consistency, prefix-extending hash_ids, per-request fields. Runs against single files or directories; exit code 1 on any failure. Catches schema drift before submissions reach the sweep. Co-Authored-By: Claude Opus 4.7 --- datasets/isb1/README.md | 4 + tools/validate_kvcache_tester_trace.py | 369 +++++++++++++++++++++++++ 2 files changed, 373 insertions(+) create mode 100644 tools/validate_kvcache_tester_trace.py diff --git a/datasets/isb1/README.md b/datasets/isb1/README.md index 9c39451d9..aa3c4ef0e 100644 --- a/datasets/isb1/README.md +++ b/datasets/isb1/README.md @@ -248,6 +248,10 @@ Any failure of the above means the PR is not actually plumbed end-to-end for this bundle and should be reproduced against Cam's `trace_replay_tester.py` before being claimed as compatible. +### Validate before publishing + +Before publishing or mirroring `datasets/isb1/converted/`, run `python3 tools/validate_kvcache_tester_trace.py datasets/isb1/converted/` to catch schema drift early: missing required keys, invalid `block_size`, and broken prefix-extending `hash_ids` that would otherwise fail inside Cam's replay sweep. + --- ## HF publication diff --git a/tools/validate_kvcache_tester_trace.py b/tools/validate_kvcache_tester_trace.py new file mode 100644 index 000000000..850da4991 --- /dev/null +++ b/tools/validate_kvcache_tester_trace.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +"""Validate kv-cache-tester trace JSON files. + +Stdlib-only validator for the compact trace schema consumed by +`trace_replay_tester.py` / `normalize_trace()` in Cam's kv-cache-tester. +Supports validating a single JSON file or recursively walking a directory of +trace files. +""" + +from __future__ import annotations + +import argparse +import json +import math +import sys +from pathlib import Path +from typing import Any + +VALID_HASH_ID_SCOPES = {"local", "global"} +MANIFEST_FILENAMES = {"manifest.json"} +CHECK = "✓" +CROSS = "✗" +WARN = "!" + + +def _is_int(value: Any) -> bool: + return isinstance(value, int) and not isinstance(value, bool) + + +def _is_number(value: Any) -> bool: + return (isinstance(value, int) or isinstance(value, float)) and not isinstance(value, bool) + + +def _add_issue(bucket: list[str], message: str, max_issues: int) -> None: + if len(bucket) < max_issues: + bucket.append(message) + + +def _validate_string_list(value: Any, field_name: str, errors: list[str], max_issues: int) -> list[str] | None: + if not isinstance(value, list) or not value: + _add_issue(errors, f"{field_name} must be a non-empty list[str]", max_issues) + return None + for idx, item in enumerate(value): + if not isinstance(item, str): + _add_issue(errors, f"{field_name}[{idx}] must be str, got {type(item).__name__}", max_issues) + return value if len(errors) < max_issues else None + + +def _validate_flat_hash_ids( + hash_ids: list[Any], + *, + input_tokens: int, + block_size: int, + scope: str | None, + errors: list[str], + warnings: list[str], + max_issues: int, +) -> None: + expected_len = math.ceil(input_tokens / block_size) if input_tokens > 0 else 0 + if len(hash_ids) != expected_len: + _add_issue( + errors, + f"hash_ids length = {len(hash_ids)}, expected ceil(in={input_tokens} / block_size={block_size}) = {expected_len}", + max_issues, + ) + + if scope is None: + _add_issue( + warnings, + "hash_id_scope missing; cannot strictly validate flat hash_ids semantics", + max_issues, + ) + + for idx, value in enumerate(hash_ids): + if not _is_int(value): + _add_issue(errors, f"hash_ids[{idx}] must be int, got {type(value).__name__}", max_issues) + continue + if value <= 0: + _add_issue(errors, f"hash_ids[{idx}] = {value}, expected positive int", max_issues) + if scope == "local": + expected = idx + 1 + if value != expected: + _add_issue( + errors, + f"hash_ids[{idx}] = {value}, expected {expected} (prefix must extend by 1)", + max_issues, + ) + + +def _validate_nested_hash_ids( + hash_ids: list[Any], + *, + scope: str | None, + errors: list[str], + warnings: list[str], + max_issues: int, +) -> None: + if scope is None: + _add_issue( + warnings, + "hash_id_scope missing; cannot strictly validate nested hash_ids semantics", + max_issues, + ) + for outer_idx, group in enumerate(hash_ids): + if not isinstance(group, list): + _add_issue(errors, f"hash_ids[{outer_idx}] must be list[int], got {type(group).__name__}", max_issues) + continue + for inner_idx, value in enumerate(group): + if not _is_int(value): + _add_issue( + errors, + f"hash_ids[{outer_idx}][{inner_idx}] must be int, got {type(value).__name__}", + max_issues, + ) + continue + if value <= 0: + _add_issue(errors, f"hash_ids[{outer_idx}][{inner_idx}] = {value}, expected positive int", max_issues) + if scope == "local": + expected = inner_idx + 1 + if value != expected: + _add_issue( + errors, + f"hash_ids[{outer_idx}][{inner_idx}] = {value}, expected {expected} (prefix must extend by 1)", + max_issues, + ) + + +def _validate_request( + req: Any, + *, + request_idx: int, + block_size: int, + scope: str | None, + errors: list[str], + warnings: list[str], + max_issues: int, +) -> None: + prefix = f"requests[{request_idx}]" + if not isinstance(req, dict): + _add_issue(errors, f"{prefix} must be object, got {type(req).__name__}", max_issues) + return + + req_type = req.get("type") + if not isinstance(req_type, str): + _add_issue(errors, f"{prefix}.type must be str", max_issues) + + if req_type == "subagent": + return + + t_value = req.get("t") + if not _is_number(t_value): + _add_issue(errors, f"{prefix}.t must be float >= 0", max_issues) + elif float(t_value) < 0: + _add_issue(errors, f"{prefix}.t = {t_value}, expected >= 0", max_issues) + + input_tokens = req.get("in") + if not _is_int(input_tokens): + _add_issue(errors, f"{prefix}.in must be int >= 0", max_issues) + input_tokens = 0 + elif input_tokens < 0: + _add_issue(errors, f"{prefix}.in = {input_tokens}, expected >= 0", max_issues) + + output_tokens = req.get("out") + if not _is_int(output_tokens): + _add_issue(errors, f"{prefix}.out must be int >= 0", max_issues) + elif output_tokens < 0: + _add_issue(errors, f"{prefix}.out = {output_tokens}, expected >= 0", max_issues) + + hash_ids = req.get("hash_ids") + if not isinstance(hash_ids, list): + _add_issue(errors, f"{prefix}.hash_ids must be list[int] or list[list[int]]", max_issues) + else: + is_nested = bool(hash_ids) and all(isinstance(item, list) for item in hash_ids) + is_flat = not hash_ids or all(not isinstance(item, list) for item in hash_ids) + if is_nested: + _validate_nested_hash_ids( + hash_ids, + scope=scope, + errors=errors, + warnings=warnings, + max_issues=max_issues, + ) + elif is_flat: + _validate_flat_hash_ids( + hash_ids, + input_tokens=input_tokens, + block_size=block_size, + scope=scope, + errors=errors, + warnings=warnings, + max_issues=max_issues, + ) + else: + _add_issue(errors, f"{prefix}.hash_ids must not mix flat and nested entries", max_issues) + + optional_string_fields = ("model", "stop") + for field_name in optional_string_fields: + if field_name in req and not isinstance(req[field_name], str): + _add_issue(errors, f"{prefix}.{field_name} must be str", max_issues) + + optional_list_fields = ("input_types", "output_types") + for field_name in optional_list_fields: + if field_name in req: + value = req[field_name] + if not isinstance(value, list): + _add_issue(errors, f"{prefix}.{field_name} must be list[str]", max_issues) + continue + for idx, item in enumerate(value): + if not isinstance(item, str): + _add_issue(errors, f"{prefix}.{field_name}[{idx}] must be str", max_issues) + + optional_number_fields = ("api_time", "think_time") + for field_name in optional_number_fields: + if field_name in req: + value = req[field_name] + if not _is_number(value): + _add_issue(errors, f"{prefix}.{field_name} must be float", max_issues) + + +def validate_trace(trace: Any, *, max_issues: int) -> tuple[list[str], list[str]]: + errors: list[str] = [] + warnings: list[str] = [] + + if not isinstance(trace, dict): + return [f"top-level JSON must be object, got {type(trace).__name__}"], warnings + + trace_id = trace.get("id") + if not isinstance(trace_id, str): + _add_issue(errors, "id must be str", max_issues) + + _validate_string_list(trace.get("models"), "models", errors, max_issues) + + block_size = trace.get("block_size") + if not _is_int(block_size): + _add_issue(errors, "block_size must be int > 0", max_issues) + block_size = 1 + elif block_size <= 0: + _add_issue(errors, f"block_size = {block_size}, expected > 0", max_issues) + + requests = trace.get("requests") + if not isinstance(requests, list) or not requests: + _add_issue(errors, "requests must be a non-empty list", max_issues) + requests = [] + + scope = trace.get("hash_id_scope") + if scope is not None and scope not in VALID_HASH_ID_SCOPES: + _add_issue( + errors, + f"hash_id_scope = {scope!r}, expected one of {sorted(VALID_HASH_ID_SCOPES)}", + max_issues, + ) + scope = None + + for field_name in ("tool_tokens", "system_tokens"): + if field_name in trace: + value = trace[field_name] + if not _is_int(value): + _add_issue(errors, f"{field_name} must be int >= 0", max_issues) + elif value < 0: + _add_issue(errors, f"{field_name} = {value}, expected >= 0", max_issues) + + for idx, req in enumerate(requests): + if len(errors) >= max_issues and len(warnings) >= max_issues: + break + _validate_request( + req, + request_idx=idx, + block_size=block_size, + scope=scope, + errors=errors, + warnings=warnings, + max_issues=max_issues, + ) + + return errors, warnings + + +def iter_trace_files(path: Path) -> list[Path]: + if path.is_file(): + return [path] + if path.is_dir(): + files = [] + for candidate in sorted(path.rglob("*.json")): + if candidate.name in MANIFEST_FILENAMES: + continue + if candidate.is_file(): + files.append(candidate) + return files + raise FileNotFoundError(f"Path not found: {path}") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog="validate_kvcache_tester_trace.py", + description="Validate kv-cache-tester trace files or directories.", + ) + parser.add_argument("path", metavar="PATH", help="file or directory (recursive glob *.json when directory)") + parser.add_argument("--quiet", action="store_true", help="only print final summary") + parser.add_argument( + "--strict", + action="store_true", + help="treat warnings as errors (e.g. hash_ids scope missing)", + ) + parser.add_argument( + "--max-errors-per-file", + type=int, + default=5, + help="maximum errors reported per file (default: 5)", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv or sys.argv[1:]) + if args.max_errors_per_file <= 0: + print("--max-errors-per-file must be > 0", file=sys.stderr) + return 2 + + path = Path(args.path) + try: + files = iter_trace_files(path) + except FileNotFoundError as exc: + print(str(exc), file=sys.stderr) + return 2 + + if not files: + print("No trace JSON files found", file=sys.stderr) + return 2 + + valid_count = 0 + failed_count = 0 + + for file_path in files: + try: + trace = json.loads(file_path.read_text()) + except Exception as exc: + errors = [f"invalid JSON: {exc}"] + warnings: list[str] = [] + else: + errors, warnings = validate_trace(trace, max_issues=args.max_errors_per_file) + + effective_errors = list(errors) + if args.strict: + effective_errors.extend(warnings) + + if effective_errors: + failed_count += 1 + if not args.quiet: + print(f"{CROSS} {file_path}") + for issue in effective_errors[: args.max_errors_per_file]: + print(f" {issue}") + else: + valid_count += 1 + if warnings and not args.quiet: + print(f"{WARN} {file_path}") + for warning in warnings[: args.max_errors_per_file]: + print(f" {warning}") + + if failed_count == 0: + print(f"{CHECK} {valid_count} files valid | 0 failed") + return 0 + + plural = "files" if failed_count != 1 else "file" + print(f"{failed_count} {plural} failed validation") + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) From 5208886ebe299fac8191bb8cdb99593018837c5d Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:37:36 -0700 Subject: [PATCH 04/13] =?UTF-8?q?data(isb1):=20ship=20converted/manifest.j?= =?UTF-8?q?son=20=E2=80=94=20179-trace=20catalog?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-generated index with per-trace metadata: scale band, workload family, model family, token totals, and approximate cache hit rate (computed via Cam's normalize_trace walker). Enables sweep configs to filter or select trace subsets by metadata without loading every file. Co-Authored-By: Claude Opus 4.7 --- datasets/isb1/converted/manifest.json | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 datasets/isb1/converted/manifest.json diff --git a/datasets/isb1/converted/manifest.json b/datasets/isb1/converted/manifest.json new file mode 100644 index 000000000..2c9886e13 --- /dev/null +++ b/datasets/isb1/converted/manifest.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9694ac3d13014235c5e61f4c7cff955b9d924cad373695ed373896e22439c55c +size 93447 From 962634ee7ea208e7790fa9d65eb1c96c1cfbdcab Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:39:14 -0700 Subject: [PATCH 05/13] docs(isb1): HF publication recipe for kv-cache-tester hf_-- path datasets/isb1/HF_PUBLISH.md walks through publishing datasets/isb1/converted/ to Hugging Face at semianalysisai/isb1-cc-traces so Cam's trace_replay scripts can load ISB1 via TRACE_DIR=hf_semianalysisai--isb1-cc-traces with zero changes to his shell scripts (hf_-- handling at benchmarks/single_node/multiturn_fp4_b200_trace_replay.sh lines 54-58). Includes dataset card template, upload command, versioning recipe, and post-upload verification. Co-Authored-By: Claude Opus 4.7 --- datasets/isb1/HF_PUBLISH.md | 124 ++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 datasets/isb1/HF_PUBLISH.md diff --git a/datasets/isb1/HF_PUBLISH.md b/datasets/isb1/HF_PUBLISH.md new file mode 100644 index 000000000..a4f4f391f --- /dev/null +++ b/datasets/isb1/HF_PUBLISH.md @@ -0,0 +1,124 @@ +# HF publication recipe for ISB1 converted traces + +Mirror `datasets/isb1/converted/` to Hugging Face so Cam's +`TRACE_DIR=hf_--` path works immediately with kv-cache-tester. +Recommended target: `semianalysisai/isb1-cc-traces`. + +## 1. Target namespace + +- Dataset repo: `semianalysisai/isb1-cc-traces` +- Source directory: `datasets/isb1/converted/` +- Consumer contract: Cam's replay scripts interpret `hf_--` as a + Hugging Face dataset reference before calling `trace_replay_tester.py` + +## 2. Prereqs + +- `huggingface-cli >= 0.20` +- `HF_TOKEN` with write scope to the destination org +- Local validation already green: + `python3 tools/validate_kvcache_tester_trace.py datasets/isb1/converted/` + +Authenticate first: + +```bash +export HF_TOKEN=hf_xxx +huggingface-cli login --token "$HF_TOKEN" +``` + +## 3. Dataset card template + +Create the HF dataset `README.md` with this content: + +```markdown +--- +license: apache-2.0 +task_categories: [text-generation] +language: [en] +pretty_name: ISB1 Converted kv-cache-tester Traces +tags: [kv-cache, trace-replay, inference-benchmark, semianalysis, isb1] +--- + +# ISB1 Converted kv-cache-tester Traces + +This dataset mirrors `datasets/isb1/converted/` from SemiAnalysisAI/InferenceX +PR #1032 so Cam's kv-cache-tester replay flow from PR #993 can consume ISB1 +traces directly through the `hf_--` `TRACE_DIR` convention. + +## Contents + +- 179 pre-converted trace JSON files +- 8k / 32k / 64k / 131k / 500k preview / 1m preview coverage +- Kimi K2.5 / DSR1 / GPT-OSS / Qwen3.5 coverage +- `manifest.json` metadata catalog + +## Provenance + +- Source repo: `SemiAnalysisAI/InferenceX` +- Source PR: `#1032` +- Consumer workflow: `callanjfox/kv-cache-tester` PR `#993` +- License: Apache-2.0 +``` + +## 4. Upload command + +```bash +huggingface-cli upload \ + semianalysisai/isb1-cc-traces \ + datasets/isb1/converted/ \ + . \ + --repo-type dataset \ + --revision main +``` + +If the repo does not exist yet, create it in the HF UI first, then rerun the +upload. + +## 5. Cam's Slurm integration + +After publication, switch Cam's script from a local directory to the HF path: + +```bash +TRACE_DIR=hf_semianalysisai--isb1-cc-traces # replaces datasets/isb1/converted +``` + +That triggers the `hf_--` branch in Cam's PR #993 replay script +(`benchmarks/single_node/multiturn_fp4_b200_trace_replay.sh`, lines 54-58), +which rewrites the value into `--hf-dataset /` before invoking +`trace_replay_tester.py`. + +## 6. Versioning + +When new traces land: + +1. Regenerate `datasets/isb1/converted/manifest.json` +2. Re-run local validation on the converted directory +3. Upload the updated directory to HF `main` +4. Create a matching HF tag such as `v0.2.0` or `pr1032-r2` +5. Record the InferenceX commit SHA and HF revision together + +Consumers who need immutability should pin an HF revision instead of floating +on `main`. + +## 7. Verification + +```bash +rm -rf /tmp/verify +huggingface-cli download semianalysisai/isb1-cc-traces \ + --repo-type dataset \ + --local-dir /tmp/verify +python3 tools/validate_kvcache_tester_trace.py /tmp/verify +``` + +Expected result: + +- Download succeeds with all trace JSONs present +- Validator reports all converted traces passing +- Cam's replay wrapper accepts + `TRACE_DIR=hf_semianalysisai--isb1-cc-traces` with no shell-script changes + +## Notes + +- Publish converted artifacts and metadata only +- Keep the layout compatible with `trace_replay_tester.py` +- If the org name changes, update both the upload command and `TRACE_DIR` + example together From 4f12d3aa27261700303d185d4736ff8b6bcfd985 Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 21 Apr 2026 00:42:07 -0700 Subject: [PATCH 06/13] feat(isb1): add mooncake trace exporter --- tools/isb1_to_mooncake_trace.py | 548 +++++++++++++++++++++++++++ tools/test_isb1_to_mooncake_trace.py | 370 ++++++++++++++++++ tools/validate_mooncake_trace.py | 285 ++++++++++++++ 3 files changed, 1203 insertions(+) create mode 100644 tools/isb1_to_mooncake_trace.py create mode 100644 tools/test_isb1_to_mooncake_trace.py create mode 100644 tools/validate_mooncake_trace.py diff --git a/tools/isb1_to_mooncake_trace.py b/tools/isb1_to_mooncake_trace.py new file mode 100644 index 000000000..6550a579d --- /dev/null +++ b/tools/isb1_to_mooncake_trace.py @@ -0,0 +1,548 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +"""Convert ISB1 replay bundles into `mooncake_trace` JSONL files. + +Produces per-bundle JSONL files compatible with +`aiperf profile --custom-dataset-type mooncake_trace` input format. Works on +ISB1 export bundles shipped under `datasets/isb1/exports/**` and emits one +`.jsonl` file per input bundle. + +This shim is the ONLY glue between our ISB1 bundles and Cam's aiperf / +mooncake replay path. It does not import or execute any benchmark harness and +has no third-party dependencies: standard library only. + +Schema compatibility +-------------------- + +The `mooncake_trace` rows consumed by aiperf are JSON Lines entries with the +following shape: + + { + "session_id": "", + "model": "", # optional + "input": [ + {"role": "user", "content": "..."}, + {"role": "assistant", "content": "..."}, + ], + "output_length": 256, + "pre_gap": 1.5 # optional, seconds + } + +ISB1 exports store turn history in `events[].input_messages` with typed +`content_blocks`. This exporter flattens those blocks into plain text strings: +text blocks are passed through, code blocks are fenced with Markdown triple +backticks and a language tag when present. + +Known limitations +----------------- + +- ISB1 tool calls are encoded as text inside `tool` role turns rather than as + OpenAI `tool_calls` arrays. This exporter preserves that exact text as + `{"role": "tool", "content": "[tool_call: ...]"}` so request-pattern + fidelity is retained for KV-cache stress testing. +- Messages whose flattened content is empty are dropped. If an event has no + remaining non-empty messages after flattening, the event is skipped and a + warning is raised. +- If the same `session_id` appears across multiple bundles in one invocation, + later bundles are disambiguated by prefixing `session_id` with + `::` and a warning is printed. + +Usage +----- + +Single bundle: + + python tools/isb1_to_mooncake_trace.py \ + --input datasets/isb1/exports/core/chat_8k1k.json \ + --output-dir /tmp/mooncake/ + +Whole export tree: + + python tools/isb1_to_mooncake_trace.py \ + --input datasets/isb1/exports/ \ + --output-dir /tmp/mooncake/ + +Subset by glob: + + python tools/isb1_to_mooncake_trace.py \ + --input 'datasets/isb1/exports/core/*.json' \ + --output-dir /tmp/mooncake/ + +Dry-run validation: + + python tools/isb1_to_mooncake_trace.py \ + --input datasets/isb1/exports/core/code_8k1k.json \ + --output-dir /tmp/mooncake/ \ + --dry-run --verbose +""" + +from __future__ import annotations + +import argparse +import glob +import json +import sys +from pathlib import Path +from typing import Any + +SUPPORTED_ADAPTERS = {"inferencex_trace_replay"} +VALID_ROLES = {"user", "assistant", "system", "tool"} +MANIFEST_FILENAMES = {"manifest.json", "manifest_qwen3.5.json"} + + +class WarningTracker: + def __init__(self, *, verbose: bool) -> None: + self.verbose = verbose + self.count = 0 + + def warn(self, message: str) -> None: + self.count += 1 + if self.verbose: + print(f"WARN: {message}", file=sys.stderr) + + +def _looks_like_glob(raw: str) -> bool: + return any(ch in raw for ch in "*?[") + + +def _iter_bundle_files(input_spec: str) -> list[Path]: + if _looks_like_glob(input_spec): + matches = [Path(p).resolve() for p in sorted(glob.glob(input_spec, recursive=True))] + files = [p for p in matches if p.is_file()] + if files: + return files + raise FileNotFoundError(f"no files matched glob: {input_spec}") + + path = Path(input_spec).resolve() + if path.is_file(): + return [path] + if path.is_dir(): + files: list[Path] = [] + for candidate in sorted(path.rglob("*.json")): + if candidate.name in MANIFEST_FILENAMES: + continue + if "/prefixes/" in candidate.as_posix(): + continue + try: + payload = json.loads(candidate.read_text(encoding="utf-8")) + except Exception: + continue + if not isinstance(payload, dict): + continue + if payload.get("adapter_id") not in SUPPORTED_ADAPTERS: + continue + files.append(candidate.resolve()) + return files + raise FileNotFoundError(f"input path not found: {input_spec}") + + +def _safe_int(value: Any, *, field_name: str, context: str) -> int: + if isinstance(value, bool) or not isinstance(value, int): + raise ValueError(f"{context}: {field_name} must be int, got {type(value).__name__}") + return value + + +def _load_bundle(bundle_path: Path) -> dict[str, Any]: + try: + payload = json.loads(bundle_path.read_text(encoding="utf-8")) + except Exception as exc: + raise ValueError(f"failed to read ISB1 bundle {bundle_path}: {exc}") from exc + + if not isinstance(payload, dict): + raise ValueError(f"top-level bundle must be object in {bundle_path}") + + adapter_id = str(payload.get("adapter_id") or "") + if adapter_id not in SUPPORTED_ADAPTERS: + raise ValueError( + f"unsupported ISB1 adapter {adapter_id!r} in {bundle_path}. " + f"Expected one of {sorted(SUPPORTED_ADAPTERS)}." + ) + + bundle_id = payload.get("bundle_id") + if not isinstance(bundle_id, str) or not bundle_id: + raise ValueError(f"bundle_id missing or invalid in {bundle_path}") + + exports = payload.get("exports") + if not isinstance(exports, list) or not exports: + raise ValueError(f"exports must be a non-empty list in {bundle_path}") + + return payload + + +def _block_language(block: dict[str, Any]) -> str: + language = block.get("language") + if isinstance(language, str) and language: + return language + metadata = block.get("metadata") + if isinstance(metadata, dict): + meta_lang = metadata.get("language") + if isinstance(meta_lang, str) and meta_lang: + return meta_lang + return "" + + +def _flatten_blocks(blocks: list[Any]) -> str: + parts: list[str] = [] + for block in blocks: + if not isinstance(block, dict): + continue + block_type = str(block.get("type") or "text") + text = block.get("text") + text_value = "" if text is None else str(text) + if block_type == "text": + if text_value: + parts.append(text_value) + continue + if block_type == "code": + language = _block_language(block) + fence = f"```{language}\n{text_value}\n```" if language else f"```\n{text_value}\n```" + parts.append(fence) + continue + if text_value: + parts.append(text_value) + return "\n\n".join(parts) + + +def _flatten_message( + message: dict[str, Any], + *, + bundle_id: str, + export_idx: int, + event_idx: int, + message_idx: int, + warnings: WarningTracker, +) -> dict[str, str] | None: + role = str(message.get("role") or "") + if role not in VALID_ROLES: + raise ValueError( + f"bundle {bundle_id} export[{export_idx}] event[{event_idx}] " + f"message[{message_idx}] has unsupported role {role!r}" + ) + + if "content" in message and message.get("content") is not None: + if not isinstance(message.get("content"), str): + raise ValueError( + f"bundle {bundle_id} export[{export_idx}] event[{event_idx}] " + f"message[{message_idx}].content must be str or null" + ) + content = message["content"] + else: + blocks = message.get("content_blocks") + if blocks is None: + blocks = [] + if not isinstance(blocks, list): + raise ValueError( + f"bundle {bundle_id} export[{export_idx}] event[{event_idx}] " + f"message[{message_idx}].content_blocks must be list" + ) + content = _flatten_blocks(blocks) + + if content == "": + warnings.warn( + f"bundle {bundle_id} export[{export_idx}] event[{event_idx}] " + f"message[{message_idx}] flattened to empty content and was dropped" + ) + return None + + return {"role": role, "content": content} + + +def _session_id_for_event( + *, + bundle_id: str, + export_cell: dict[str, Any], + event: dict[str, Any], + seen_session_owners: dict[str, str], + warned_collisions: set[tuple[str, str]], + warnings: WarningTracker, +) -> str: + base_session_id = event.get("session_id") or export_cell.get("trace_id") + if not isinstance(base_session_id, str) or not base_session_id: + raise ValueError( + f"bundle {bundle_id} export {export_cell.get('trace_id')!r} has no usable session_id or trace_id" + ) + + owner = seen_session_owners.get(base_session_id) + if owner is None: + seen_session_owners[base_session_id] = bundle_id + return base_session_id + if owner == bundle_id: + return base_session_id + + collision_key = (base_session_id, bundle_id) + if collision_key not in warned_collisions: + warnings.warn( + f"session_id collision across bundles for {base_session_id!r}: " + f"first seen in {owner}, prefixing rows emitted from {bundle_id}" + ) + warned_collisions.add(collision_key) + return f"{bundle_id}::{base_session_id}" + + +def _event_pre_gap( + *, + bundle_id: str, + export_idx: int, + event_idx: int, + session_id: str, + arrival_time_offset_ms: int, + prior_offsets_ms: dict[str, int], + warnings: WarningTracker, +) -> float: + prior = prior_offsets_ms.get(session_id) + prior_offsets_ms[session_id] = arrival_time_offset_ms + if prior is None: + return 0.0 + + delta_ms = arrival_time_offset_ms - prior + if delta_ms < 0: + warnings.warn( + f"bundle {bundle_id} export[{export_idx}] event[{event_idx}] session {session_id!r} " + f"has negative arrival delta ({delta_ms} ms); clamping pre_gap to 0.0" + ) + return 0.0 + return delta_ms / 1000.0 + + +def _convert_bundle( + *, + bundle_path: Path, + include_model: bool, + include_pre_gap: bool, + seen_session_owners: dict[str, str], + warned_collisions: set[tuple[str, str]], + warnings: WarningTracker, +) -> tuple[str, list[dict[str, Any]], int]: + payload = _load_bundle(bundle_path) + bundle_id = str(payload["bundle_id"]) + rows: list[dict[str, Any]] = [] + emitted_sessions: set[str] = set() + prior_offsets_ms: dict[str, int] = {} + + for export_idx, export_cell in enumerate(payload.get("exports") or []): + if not isinstance(export_cell, dict): + raise ValueError(f"bundle {bundle_id} export[{export_idx}] must be object") + + canonical_model_id = export_cell.get("canonical_model_id") + if include_model and (not isinstance(canonical_model_id, str) or not canonical_model_id): + raise ValueError( + f"bundle {bundle_id} export[{export_idx}] missing canonical_model_id" + ) + + events = export_cell.get("events") + if not isinstance(events, list): + raise ValueError(f"bundle {bundle_id} export[{export_idx}] events must be list") + + for event_idx, event in enumerate(events): + if not isinstance(event, dict): + raise ValueError( + f"bundle {bundle_id} export[{export_idx}] event[{event_idx}] must be object" + ) + + context = f"bundle {bundle_id} export[{export_idx}] event[{event_idx}]" + output_length = event.get("target_output_tokens") + if output_length is None: + raise ValueError(f"{context}: missing target_output_tokens") + output_length = _safe_int( + output_length, + field_name="target_output_tokens", + context=context, + ) + if output_length <= 0: + raise ValueError(f"{context}: target_output_tokens must be > 0") + + arrival_time_offset_ms = event.get("arrival_time_offset_ms") + if arrival_time_offset_ms is None: + raise ValueError(f"{context}: missing arrival_time_offset_ms") + arrival_time_offset_ms = _safe_int( + arrival_time_offset_ms, + field_name="arrival_time_offset_ms", + context=context, + ) + + input_messages = event.get("input_messages") + if not isinstance(input_messages, list): + raise ValueError(f"{context}: input_messages must be list") + + flattened_messages: list[dict[str, str]] = [] + for message_idx, message in enumerate(input_messages): + if not isinstance(message, dict): + raise ValueError( + f"{context}: input_messages[{message_idx}] must be object" + ) + flattened = _flatten_message( + message, + bundle_id=bundle_id, + export_idx=export_idx, + event_idx=event_idx, + message_idx=message_idx, + warnings=warnings, + ) + if flattened is not None: + flattened_messages.append(flattened) + + if not flattened_messages: + warnings.warn(f"{context}: skipped event because every message flattened to empty content") + continue + + session_id = _session_id_for_event( + bundle_id=bundle_id, + export_cell=export_cell, + event=event, + seen_session_owners=seen_session_owners, + warned_collisions=warned_collisions, + warnings=warnings, + ) + emitted_sessions.add(session_id) + + row: dict[str, Any] = { + "session_id": session_id, + "input": flattened_messages, + "output_length": output_length, + } + if include_model: + row["model"] = str(canonical_model_id) + if include_pre_gap: + row["pre_gap"] = _event_pre_gap( + bundle_id=bundle_id, + export_idx=export_idx, + event_idx=event_idx, + session_id=session_id, + arrival_time_offset_ms=arrival_time_offset_ms, + prior_offsets_ms=prior_offsets_ms, + warnings=warnings, + ) + rows.append(row) + + return bundle_id, rows, len(emitted_sessions) + + +def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None: + payload = "\n".join(json.dumps(row, ensure_ascii=False) for row in rows) + if payload: + payload += "\n" + path.write_text(payload, encoding="utf-8") + + +def _parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog="isb1_to_mooncake_trace", + description=( + "Convert ISB1 replay bundles into mooncake_trace-compatible JSONL files." + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--input", + required=True, + help="Single ISB1 bundle JSON, directory, or glob pattern.", + ) + parser.add_argument( + "--output-dir", + type=Path, + required=True, + help="Destination directory for emitted .jsonl files.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Parse and validate inputs but do not write JSONL files.", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Print per-bundle progress and warnings.", + ) + parser.add_argument( + "--include-model", + action=argparse.BooleanOptionalAction, + default=True, + help="Include the model field in emitted rows (default: on).", + ) + parser.add_argument( + "--include-pre-gap", + action=argparse.BooleanOptionalAction, + default=True, + help="Include the pre_gap field in emitted rows (default: on).", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = _parse_args(argv if argv is not None else sys.argv[1:]) + + try: + bundle_paths = _iter_bundle_files(args.input) + except FileNotFoundError as exc: + print(str(exc), file=sys.stderr) + return 2 + + if not bundle_paths: + print("ERROR: no ISB1 bundles found", file=sys.stderr) + return 2 + + output_dir = args.output_dir.resolve() + if not args.dry_run: + output_dir.mkdir(parents=True, exist_ok=True) + + warnings = WarningTracker(verbose=args.verbose) + seen_session_owners: dict[str, str] = {} + warned_collisions: set[tuple[str, str]] = set() + + processed_bundles = 0 + rows_emitted = 0 + rows_written = 0 + sessions_emitted = 0 + errors = 0 + + for bundle_path in bundle_paths: + try: + bundle_id, rows, bundle_sessions = _convert_bundle( + bundle_path=bundle_path, + include_model=args.include_model, + include_pre_gap=args.include_pre_gap, + seen_session_owners=seen_session_owners, + warned_collisions=warned_collisions, + warnings=warnings, + ) + except (ValueError, FileNotFoundError) as exc: + print(f"ERROR: {bundle_path}: {exc}", file=sys.stderr) + errors += 1 + continue + + processed_bundles += 1 + rows_emitted += len(rows) + sessions_emitted += bundle_sessions + + out_path = output_dir / f"{bundle_id}.jsonl" + if not rows: + warnings.warn(f"bundle {bundle_id} emitted 0 rows after filtering and was not written") + elif not args.dry_run: + _write_jsonl(out_path, rows) + rows_written += len(rows) + + if args.verbose: + action = "would write" if args.dry_run else "wrote" + print( + f"ok {bundle_path}: {action} {len(rows)} row(s) " + f"for {bundle_sessions} session(s) -> {out_path}" + ) + + print( + f"done: {processed_bundles} bundle(s) processed; " + f"{rows_written} row(s) written; " + f"{sessions_emitted} session(s) emitted; " + f"{warnings.count} warning(s) raised" + ) + if args.dry_run: + print(f"note: dry-run enabled; {rows_emitted} row(s) validated and 0 written") + + if rows_emitted == 0: + print("ERROR: no mooncake rows emitted", file=sys.stderr) + return 1 + + return 0 if errors == 0 else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/test_isb1_to_mooncake_trace.py b/tools/test_isb1_to_mooncake_trace.py new file mode 100644 index 000000000..81b721a39 --- /dev/null +++ b/tools/test_isb1_to_mooncake_trace.py @@ -0,0 +1,370 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Contract tests for ``tools/isb1_to_mooncake_trace.py``. + +These tests lock the emitted mooncake JSONL row schema so ISB1 -> mooncake +conversion cannot silently drift from the `aiperf --custom-dataset-type +mooncake_trace` contract or from the assist-mode plan that introduced this +exporter. + +The test suite uses stdlib ``unittest`` only and exercises the exporter through +its public CLI entrypoint (`main`) so flag handling, warnings, summary output, +and on-disk artifacts are all covered. +""" + +from __future__ import annotations + +import io +import json +import tempfile +import unittest +from contextlib import redirect_stderr, redirect_stdout +from pathlib import Path + +from tools import isb1_to_mooncake_trace as exporter + + +def _text_block(text: str) -> dict: + return {"type": "text", "text": text} + + +def _code_block(text: str | None, *, language: str | None = None, metadata_language: str | None = None) -> dict: + block = {"type": "code", "text": text} + if language: + block["language"] = language + if metadata_language: + block["metadata"] = {"language": metadata_language} + return block + + +def _message(role: str, *, blocks: list[dict] | None = None, content: str | None = None) -> dict: + payload = {"role": role} + if content is not None: + payload["content"] = content + else: + payload["content_blocks"] = blocks or [] + return payload + + +def _event( + *, + session_id: str | None = "sess-1", + offset_ms: int = 0, + messages: list[dict] | None = None, + output_tokens: int | None = 16, +) -> dict: + payload = { + "arrival_time_offset_ms": offset_ms, + "input_messages": messages or [_message("user", blocks=[_text_block("hello")])], + } + if session_id is not None: + payload["session_id"] = session_id + if output_tokens is not None: + payload["target_output_tokens"] = output_tokens + return payload + + +def _export(trace_id: str, *, model: str = "model-a", events: list[dict] | None = None) -> dict: + return { + "trace_id": trace_id, + "canonical_model_id": model, + "events": events or [], + } + + +def _bundle(bundle_id: str, *, exports: list[dict] | None = None) -> dict: + return { + "adapter_id": "inferencex_trace_replay", + "schema_version": "0.1.0", + "bundle_id": bundle_id, + "exports": exports or [], + } + + +def _load_jsonl(path: Path) -> list[dict]: + lines = path.read_text(encoding="utf-8").splitlines() + return [json.loads(line) for line in lines] + + +class MooncakeExporterTests(unittest.TestCase): + def _run_main(self, input_spec: str, output_dir: Path, *extra_args: str) -> tuple[int, str, str]: + stdout = io.StringIO() + stderr = io.StringIO() + argv = ["--input", input_spec, "--output-dir", str(output_dir), *extra_args] + with redirect_stdout(stdout), redirect_stderr(stderr): + exit_code = exporter.main(argv) + return exit_code, stdout.getvalue(), stderr.getvalue() + + def _write_bundle(self, directory: Path, filename: str, payload: dict) -> Path: + path = directory / filename + path.write_text(json.dumps(payload), encoding="utf-8") + return path + + def test_single_turn_event_round_trip(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle = _bundle( + "bundle_single", + exports=[ + _export( + "trace-single", + events=[_event(session_id="sess-single", messages=[_message("user", blocks=[_text_block("hello world")])], output_tokens=42)], + ) + ], + ) + bundle_path = self._write_bundle(root, "single.json", bundle) + + exit_code, stdout, stderr = self._run_main(str(bundle_path), out_dir) + self.assertEqual(exit_code, 0, stderr) + self.assertIn("1 bundle(s) processed", stdout) + + rows = _load_jsonl(out_dir / "bundle_single.jsonl") + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0]["session_id"], "sess-single") + self.assertEqual(rows[0]["model"], "model-a") + self.assertEqual(rows[0]["output_length"], 42) + self.assertEqual(rows[0]["pre_gap"], 0.0) + self.assertEqual(rows[0]["input"], [{"role": "user", "content": "hello world"}]) + + def test_multi_turn_session_rows_grouped_by_session_id(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle = _bundle( + "bundle_grouped", + exports=[ + _export( + "trace-grouped", + events=[ + _event(session_id="sess-grouped", offset_ms=0, messages=[_message("user", blocks=[_text_block("turn one")])]), + _event(session_id="sess-grouped", offset_ms=1500, messages=[_message("assistant", blocks=[_text_block("turn two")])]), + ], + ) + ], + ) + bundle_path = self._write_bundle(root, "grouped.json", bundle) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) + self.assertEqual(exit_code, 0, stderr) + rows = _load_jsonl(out_dir / "bundle_grouped.jsonl") + self.assertEqual([row["session_id"] for row in rows], ["sess-grouped", "sess-grouped"]) + self.assertEqual(rows[0]["input"][0]["content"], "turn one") + self.assertEqual(rows[1]["input"][0]["content"], "turn two") + + def test_content_blocks_text_flattening(self) -> None: + blocks = [_text_block("alpha"), _text_block("beta")] + self.assertEqual(exporter._flatten_blocks(blocks), "alpha\n\nbeta") + + def test_content_blocks_code_flattening_fences_with_language(self) -> None: + blocks = [_code_block("print('hi')", language="python")] + self.assertEqual(exporter._flatten_blocks(blocks), "```python\nprint('hi')\n```") + + def test_mixed_text_and_code_blocks_in_one_message(self) -> None: + blocks = [_text_block("before"), _code_block("SELECT 1", metadata_language="sql")] + self.assertEqual(exporter._flatten_blocks(blocks), "before\n\n```sql\nSELECT 1\n```") + + def test_tool_role_message_is_preserved_as_is(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle = _bundle( + "bundle_tool", + exports=[ + _export( + "trace-tool", + events=[ + _event( + session_id="sess-tool", + messages=[_message("tool", blocks=[_text_block("[tool_call: ls -R repo/]")])], + ) + ], + ) + ], + ) + bundle_path = self._write_bundle(root, "tool.json", bundle) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) + self.assertEqual(exit_code, 0, stderr) + rows = _load_jsonl(out_dir / "bundle_tool.jsonl") + self.assertEqual(rows[0]["input"], [{"role": "tool", "content": "[tool_call: ls -R repo/]"}]) + + def test_empty_content_blocks_skip_event_with_warning(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle = _bundle( + "bundle_empty", + exports=[ + _export( + "trace-empty", + events=[ + _event(session_id="sess-empty", messages=[_message("user", blocks=[])]), + _event(session_id="sess-valid", offset_ms=1000, messages=[_message("user", blocks=[_text_block("still here")])]), + ], + ) + ], + ) + bundle_path = self._write_bundle(root, "empty.json", bundle) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir, "--verbose") + self.assertEqual(exit_code, 0, stderr) + self.assertIn("skipped event because every message flattened to empty content", stderr) + rows = _load_jsonl(out_dir / "bundle_empty.jsonl") + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0]["session_id"], "sess-valid") + + def test_pre_gap_delta_computation_across_consecutive_events(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle = _bundle( + "bundle_gap", + exports=[ + _export( + "trace-gap", + events=[ + _event(session_id="sess-gap", offset_ms=0), + _event(session_id="sess-gap", offset_ms=2500), + ], + ) + ], + ) + bundle_path = self._write_bundle(root, "gap.json", bundle) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) + self.assertEqual(exit_code, 0, stderr) + rows = _load_jsonl(out_dir / "bundle_gap.jsonl") + self.assertEqual(rows[1]["pre_gap"], 2.5) + + def test_first_event_in_session_has_zero_pre_gap(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle = _bundle("bundle_first", exports=[_export("trace-first", events=[_event(session_id="sess-first", offset_ms=9000)])]) + bundle_path = self._write_bundle(root, "first.json", bundle) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) + self.assertEqual(exit_code, 0, stderr) + rows = _load_jsonl(out_dir / "bundle_first.jsonl") + self.assertEqual(rows[0]["pre_gap"], 0.0) + + def test_negative_delta_is_clamped_to_zero_with_warning(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle = _bundle( + "bundle_negative_gap", + exports=[ + _export( + "trace-negative-gap", + events=[ + _event(session_id="sess-negative", offset_ms=5000), + _event(session_id="sess-negative", offset_ms=2000), + ], + ) + ], + ) + bundle_path = self._write_bundle(root, "negative.json", bundle) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir, "--verbose") + self.assertEqual(exit_code, 0, stderr) + self.assertIn("negative arrival delta", stderr) + rows = _load_jsonl(out_dir / "bundle_negative_gap.jsonl") + self.assertEqual(rows[1]["pre_gap"], 0.0) + + def test_no_include_model_flag_strips_model_field(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle_path = self._write_bundle(root, "model_off.json", _bundle("bundle_no_model", exports=[_export("trace-model-off", events=[_event(session_id="sess-no-model")])])) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir, "--no-include-model") + self.assertEqual(exit_code, 0, stderr) + rows = _load_jsonl(out_dir / "bundle_no_model.jsonl") + self.assertNotIn("model", rows[0]) + + def test_no_include_pre_gap_flag_strips_pre_gap_field(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle_path = self._write_bundle(root, "gap_off.json", _bundle("bundle_no_gap", exports=[_export("trace-gap-off", events=[_event(session_id="sess-no-gap")])])) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir, "--no-include-pre-gap") + self.assertEqual(exit_code, 0, stderr) + rows = _load_jsonl(out_dir / "bundle_no_gap.jsonl") + self.assertNotIn("pre_gap", rows[0]) + + def test_dry_run_writes_nothing_to_disk(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle_path = self._write_bundle(root, "dry.json", _bundle("bundle_dry", exports=[_export("trace-dry", events=[_event(session_id="sess-dry")])])) + + exit_code, stdout, stderr = self._run_main(str(bundle_path), out_dir, "--dry-run") + self.assertEqual(exit_code, 0, stderr) + self.assertIn("dry-run enabled", stdout) + self.assertFalse(out_dir.exists()) + + def test_missing_target_output_tokens_errors_clearly(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bad_bundle = _bundle( + "bundle_missing_output", + exports=[_export("trace-missing-output", events=[_event(session_id="sess-missing-output", output_tokens=None)])], + ) + bundle_path = self._write_bundle(root, "missing_output.json", bad_bundle) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) + self.assertEqual(exit_code, 1) + self.assertIn("missing target_output_tokens", stderr) + + def test_session_collision_across_bundles_warns_and_disambiguates(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle_one = _bundle("bundle_one", exports=[_export("trace-one", events=[_event(session_id="sess-collision")])]) + bundle_two = _bundle("bundle_two", exports=[_export("trace-two", events=[_event(session_id="sess-collision")])]) + self._write_bundle(root, "a_bundle.json", bundle_one) + self._write_bundle(root, "b_bundle.json", bundle_two) + + exit_code, _, stderr = self._run_main(str(root), out_dir, "--verbose") + self.assertEqual(exit_code, 0, stderr) + self.assertIn("session_id collision across bundles", stderr) + + rows_one = _load_jsonl(out_dir / "bundle_one.jsonl") + rows_two = _load_jsonl(out_dir / "bundle_two.jsonl") + self.assertEqual(rows_one[0]["session_id"], "sess-collision") + self.assertEqual(rows_two[0]["session_id"], "bundle_two::sess-collision") + + def test_directory_input_processes_multiple_bundles(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + self._write_bundle(root, "dir_a.json", _bundle("bundle_dir_a", exports=[_export("trace-dir-a", events=[_event(session_id="sess-dir-a")])])) + self._write_bundle(root, "dir_b.json", _bundle("bundle_dir_b", exports=[_export("trace-dir-b", events=[_event(session_id="sess-dir-b")])])) + + exit_code, stdout, stderr = self._run_main(str(root), out_dir) + self.assertEqual(exit_code, 0, stderr) + self.assertIn("2 bundle(s) processed", stdout) + self.assertTrue((out_dir / "bundle_dir_a.jsonl").exists()) + self.assertTrue((out_dir / "bundle_dir_b.jsonl").exists()) + + def test_fallback_to_trace_id_when_event_session_id_missing(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + out_dir = root / "out" + bundle = _bundle( + "bundle_trace_fallback", + exports=[_export("trace-fallback", events=[_event(session_id=None, messages=[_message("user", content="hi")])])], + ) + bundle_path = self._write_bundle(root, "trace_fallback.json", bundle) + + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) + self.assertEqual(exit_code, 0, stderr) + rows = _load_jsonl(out_dir / "bundle_trace_fallback.jsonl") + self.assertEqual(rows[0]["session_id"], "trace-fallback") + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/validate_mooncake_trace.py b/tools/validate_mooncake_trace.py new file mode 100644 index 000000000..cf139b020 --- /dev/null +++ b/tools/validate_mooncake_trace.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +"""Validate mooncake trace JSONL files. + +Stdlib-only validator for the compact row schema consumed by +`aiperf profile --custom-dataset-type mooncake_trace`. +Supports validating a single JSONL file, a directory of JSONL files, or a glob +pattern. +""" + +from __future__ import annotations + +import argparse +import glob +import json +import sys +from collections import Counter +from pathlib import Path +from typing import Any + +VALID_ROLES = {"user", "assistant", "system", "tool"} +REQUIRED_FIELDS = {"session_id", "input", "output_length"} +OPTIONAL_SUPERSET_FIELDS = {"model", "pre_gap"} +CHECK = "✓" +CROSS = "✗" +WARN = "!" + + +def _looks_like_glob(raw: str) -> bool: + return any(ch in raw for ch in "*?[") + + +def _is_int(value: Any) -> bool: + return isinstance(value, int) and not isinstance(value, bool) + + +def _is_number(value: Any) -> bool: + return (isinstance(value, int) or isinstance(value, float)) and not isinstance(value, bool) + + +def _add_issue(bucket: list[str], message: str, max_issues: int) -> None: + if len(bucket) < max_issues: + bucket.append(message) + + +def _iter_input_files(input_spec: str) -> list[Path]: + if _looks_like_glob(input_spec): + matches = [Path(p).resolve() for p in sorted(glob.glob(input_spec, recursive=True))] + files = [p for p in matches if p.is_file()] + if files: + return files + raise FileNotFoundError(f"no files matched glob: {input_spec}") + + path = Path(input_spec).resolve() + if path.is_file(): + return [path] + if path.is_dir(): + return [candidate.resolve() for candidate in sorted(path.rglob("*.jsonl")) if candidate.is_file()] + raise FileNotFoundError(f"input path not found: {input_spec}") + + +def _validate_message( + message: Any, + *, + line_no: int, + message_idx: int, + errors: list[str], + categories: Counter[str], + max_issues: int, +) -> None: + prefix = f"line {line_no} input[{message_idx}]" + if not isinstance(message, dict): + _add_issue(errors, f"{prefix} must be object", max_issues) + categories["message_not_object"] += 1 + return + + role = message.get("role") + if not isinstance(role, str) or role not in VALID_ROLES: + _add_issue( + errors, + f"{prefix}.role must be one of {sorted(VALID_ROLES)}", + max_issues, + ) + categories["invalid_role"] += 1 + + content = message.get("content") + if not isinstance(content, str): + _add_issue(errors, f"{prefix}.content must be str", max_issues) + categories["invalid_content"] += 1 + + +def validate_row( + row: Any, + *, + line_no: int, + allow_superset: bool, + strict: bool, + max_issues: int, +) -> tuple[list[str], list[str], Counter[str]]: + errors: list[str] = [] + warnings: list[str] = [] + categories: Counter[str] = Counter() + + if not isinstance(row, dict): + categories["row_not_object"] += 1 + return [f"line {line_no}: row must be object"], warnings, categories + + allowed_fields = set(REQUIRED_FIELDS) + if allow_superset: + allowed_fields.update(OPTIONAL_SUPERSET_FIELDS) + + unknown_fields = sorted(key for key in row.keys() if key not in allowed_fields) + if unknown_fields: + message = f"line {line_no}: unknown field(s): {', '.join(unknown_fields)}" + if strict: + categories["unknown_field"] += len(unknown_fields) + _add_issue(errors, message, max_issues) + else: + _add_issue(warnings, message, max_issues) + + session_id = row.get("session_id") + if not isinstance(session_id, str) or not session_id: + _add_issue(errors, f"line {line_no}: session_id must be non-empty str", max_issues) + categories["missing_session_id"] += 1 + + input_messages = row.get("input") + if not isinstance(input_messages, list) or not input_messages: + _add_issue(errors, f"line {line_no}: input must be a non-empty list", max_issues) + categories["invalid_input"] += 1 + input_messages = [] + + for message_idx, message in enumerate(input_messages): + if len(errors) >= max_issues: + break + _validate_message( + message, + line_no=line_no, + message_idx=message_idx, + errors=errors, + categories=categories, + max_issues=max_issues, + ) + + output_length = row.get("output_length") + if not _is_int(output_length) or int(output_length) <= 0: + _add_issue(errors, f"line {line_no}: output_length must be positive int", max_issues) + categories["invalid_output_length"] += 1 + + if "model" in row and not isinstance(row.get("model"), str): + _add_issue(errors, f"line {line_no}: model must be str", max_issues) + categories["invalid_model"] += 1 + + if "pre_gap" in row: + pre_gap = row.get("pre_gap") + if not _is_number(pre_gap): + _add_issue(errors, f"line {line_no}: pre_gap must be non-negative float", max_issues) + categories["invalid_pre_gap"] += 1 + elif float(pre_gap) < 0.0: + _add_issue(errors, f"line {line_no}: pre_gap must be >= 0.0", max_issues) + categories["invalid_pre_gap"] += 1 + + return errors, warnings, categories + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog="validate_mooncake_trace.py", + description="Validate mooncake trace JSONL files or directories.", + ) + parser.add_argument( + "--input", + required=True, + help="JSONL file, directory, or glob pattern to validate.", + ) + parser.add_argument( + "--strict", + action="store_true", + help="Reject unknown fields instead of warning on them.", + ) + parser.add_argument( + "--quiet", + action="store_true", + help="Only print the final summary.", + ) + parser.add_argument( + "--max-errors-per-file", + type=int, + default=5, + help="Maximum issues reported per file (default: 5).", + ) + parser.add_argument( + "--allow-superset", + action=argparse.BooleanOptionalAction, + default=True, + help="Allow the optional mooncake superset fields model and pre_gap (default: on).", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv or sys.argv[1:]) + if args.max_errors_per_file <= 0: + print("--max-errors-per-file must be > 0", file=sys.stderr) + return 2 + + try: + files = _iter_input_files(args.input) + except FileNotFoundError as exc: + print(str(exc), file=sys.stderr) + return 2 + + if not files: + print("No mooncake JSONL files found", file=sys.stderr) + return 2 + + files_checked = 0 + rows_scanned = 0 + failed_files = 0 + error_categories: Counter[str] = Counter() + + for file_path in files: + files_checked += 1 + file_errors: list[str] = [] + file_warnings: list[str] = [] + + try: + lines = file_path.read_text(encoding="utf-8").splitlines() + except Exception as exc: + file_errors.append(f"failed to read file: {exc}") + error_categories["file_read_error"] += 1 + lines = [] + + for line_no, raw_line in enumerate(lines, start=1): + if len(file_errors) >= args.max_errors_per_file: + break + rows_scanned += 1 + try: + row = json.loads(raw_line) + except Exception as exc: + _add_issue(file_errors, f"line {line_no}: invalid JSON: {exc}", args.max_errors_per_file) + error_categories["invalid_json"] += 1 + continue + + errors, warnings, categories = validate_row( + row, + line_no=line_no, + allow_superset=args.allow_superset, + strict=args.strict, + max_issues=args.max_errors_per_file, + ) + error_categories.update(categories) + for issue in errors: + _add_issue(file_errors, issue, args.max_errors_per_file) + for warning in warnings: + _add_issue(file_warnings, warning, args.max_errors_per_file) + + if file_errors: + failed_files += 1 + if not args.quiet: + print(f"{CROSS} {file_path}") + for issue in file_errors[: args.max_errors_per_file]: + print(f" {issue}") + elif file_warnings and not args.quiet: + print(f"{WARN} {file_path}") + for warning in file_warnings[: args.max_errors_per_file]: + print(f" {warning}") + elif not args.quiet: + print(f"{CHECK} {file_path}") + + if error_categories: + category_summary = ", ".join( + f"{key}={value}" for key, value in sorted(error_categories.items()) + ) + else: + category_summary = "none" + + print( + f"summary: {files_checked} file(s) checked; {rows_scanned} row(s) scanned; " + f"errors by category: {category_summary}" + ) + + return 0 if failed_files == 0 else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) From 7e1127bb59e77f0512ea1bbf28a0c81c70d868cf Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 21 Apr 2026 01:00:28 -0700 Subject: [PATCH 07/13] fix(isb1): align mooncake schema with aiperf MooncakeTrace model aiperf's pydantic MooncakeTrace requires the conversation list at `messages` (not `input`) and uses `delay` in milliseconds (not `pre_gap` in seconds). The original exporter would have failed aiperf's validation with `input_mode_count == 0` on every row and silently dropped pacing. - rename emitted field `input` -> `messages` - rename emitted field `pre_gap` (seconds) -> `delay` (milliseconds) - rename CLI flag --include-pre-gap -> --include-delay - rename helper _event_pre_gap -> _event_delay_ms (returns ms as float) - update validator REQUIRED_FIELDS and superset set - update tests (17/17 pass; multi-turn expected value 2500.0 ms) Verified on full 23-bundle re-export: 1226 rows, validator clean, row-2 of a multi-turn session emits delay=15000.0 ms as expected. Field reference: aiperf/src/aiperf/dataset/loader/models.py MooncakeTrace --- tools/isb1_to_mooncake_trace.py | 37 ++++++++++++++++++---------- tools/test_isb1_to_mooncake_trace.py | 26 +++++++++---------- tools/validate_mooncake_trace.py | 28 ++++++++++----------- 3 files changed, 51 insertions(+), 40 deletions(-) diff --git a/tools/isb1_to_mooncake_trace.py b/tools/isb1_to_mooncake_trace.py index 6550a579d..ce98a80bb 100644 --- a/tools/isb1_to_mooncake_trace.py +++ b/tools/isb1_to_mooncake_trace.py @@ -19,15 +19,20 @@ { "session_id": "", - "model": "", # optional - "input": [ + "model": "", # optional, extra field (ignored by aiperf) + "messages": [ {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}, ], "output_length": 256, - "pre_gap": 1.5 # optional, seconds + "delay": 1500 # optional, MILLISECONDS } +The field names above (`messages` and `delay`) are what aiperf's +`MooncakeTrace` pydantic model validates (see +`aiperf/dataset/loader/models.py`). `model` is retained for traceability +and accepted as an extra field by `AIPerfBaseModel(extra="allow")`. + ISB1 exports store turn history in `events[].input_messages` with typed `content_blocks`. This exporter flattens those blocks into plain text strings: text blocks are passed through, code blocks are fenced with Markdown triple @@ -279,7 +284,7 @@ def _session_id_for_event( return f"{bundle_id}::{base_session_id}" -def _event_pre_gap( +def _event_delay_ms( *, bundle_id: str, export_idx: int, @@ -289,6 +294,12 @@ def _event_pre_gap( prior_offsets_ms: dict[str, int], warnings: WarningTracker, ) -> float: + """Compute the inter-turn delay in MILLISECONDS for aiperf's `delay` field. + + aiperf's `MooncakeTrace.delay` is specified in milliseconds (see upstream + loader schema). First event in a session returns 0.0; negative deltas are + clamped to 0.0 with a warning. + """ prior = prior_offsets_ms.get(session_id) prior_offsets_ms[session_id] = arrival_time_offset_ms if prior is None: @@ -298,17 +309,17 @@ def _event_pre_gap( if delta_ms < 0: warnings.warn( f"bundle {bundle_id} export[{export_idx}] event[{event_idx}] session {session_id!r} " - f"has negative arrival delta ({delta_ms} ms); clamping pre_gap to 0.0" + f"has negative arrival delta ({delta_ms} ms); clamping delay to 0.0" ) return 0.0 - return delta_ms / 1000.0 + return float(delta_ms) def _convert_bundle( *, bundle_path: Path, include_model: bool, - include_pre_gap: bool, + include_delay: bool, seen_session_owners: dict[str, str], warned_collisions: set[tuple[str, str]], warnings: WarningTracker, @@ -397,13 +408,13 @@ def _convert_bundle( row: dict[str, Any] = { "session_id": session_id, - "input": flattened_messages, + "messages": flattened_messages, "output_length": output_length, } if include_model: row["model"] = str(canonical_model_id) - if include_pre_gap: - row["pre_gap"] = _event_pre_gap( + if include_delay: + row["delay"] = _event_delay_ms( bundle_id=bundle_id, export_idx=export_idx, event_idx=event_idx, @@ -460,10 +471,10 @@ def _parse_args(argv: list[str]) -> argparse.Namespace: help="Include the model field in emitted rows (default: on).", ) parser.add_argument( - "--include-pre-gap", + "--include-delay", action=argparse.BooleanOptionalAction, default=True, - help="Include the pre_gap field in emitted rows (default: on).", + help="Include the delay field (milliseconds) in emitted rows (default: on).", ) return parser.parse_args(argv) @@ -500,7 +511,7 @@ def main(argv: list[str] | None = None) -> int: bundle_id, rows, bundle_sessions = _convert_bundle( bundle_path=bundle_path, include_model=args.include_model, - include_pre_gap=args.include_pre_gap, + include_delay=args.include_delay, seen_session_owners=seen_session_owners, warned_collisions=warned_collisions, warnings=warnings, diff --git a/tools/test_isb1_to_mooncake_trace.py b/tools/test_isb1_to_mooncake_trace.py index 81b721a39..c31589989 100644 --- a/tools/test_isb1_to_mooncake_trace.py +++ b/tools/test_isb1_to_mooncake_trace.py @@ -123,8 +123,8 @@ def test_single_turn_event_round_trip(self) -> None: self.assertEqual(rows[0]["session_id"], "sess-single") self.assertEqual(rows[0]["model"], "model-a") self.assertEqual(rows[0]["output_length"], 42) - self.assertEqual(rows[0]["pre_gap"], 0.0) - self.assertEqual(rows[0]["input"], [{"role": "user", "content": "hello world"}]) + self.assertEqual(rows[0]["delay"], 0.0) + self.assertEqual(rows[0]["messages"], [{"role": "user", "content": "hello world"}]) def test_multi_turn_session_rows_grouped_by_session_id(self) -> None: with tempfile.TemporaryDirectory() as tmp: @@ -148,8 +148,8 @@ def test_multi_turn_session_rows_grouped_by_session_id(self) -> None: self.assertEqual(exit_code, 0, stderr) rows = _load_jsonl(out_dir / "bundle_grouped.jsonl") self.assertEqual([row["session_id"] for row in rows], ["sess-grouped", "sess-grouped"]) - self.assertEqual(rows[0]["input"][0]["content"], "turn one") - self.assertEqual(rows[1]["input"][0]["content"], "turn two") + self.assertEqual(rows[0]["messages"][0]["content"], "turn one") + self.assertEqual(rows[1]["messages"][0]["content"], "turn two") def test_content_blocks_text_flattening(self) -> None: blocks = [_text_block("alpha"), _text_block("beta")] @@ -186,7 +186,7 @@ def test_tool_role_message_is_preserved_as_is(self) -> None: exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) self.assertEqual(exit_code, 0, stderr) rows = _load_jsonl(out_dir / "bundle_tool.jsonl") - self.assertEqual(rows[0]["input"], [{"role": "tool", "content": "[tool_call: ls -R repo/]"}]) + self.assertEqual(rows[0]["messages"], [{"role": "tool", "content": "[tool_call: ls -R repo/]"}]) def test_empty_content_blocks_skip_event_with_warning(self) -> None: with tempfile.TemporaryDirectory() as tmp: @@ -213,7 +213,7 @@ def test_empty_content_blocks_skip_event_with_warning(self) -> None: self.assertEqual(len(rows), 1) self.assertEqual(rows[0]["session_id"], "sess-valid") - def test_pre_gap_delta_computation_across_consecutive_events(self) -> None: + def test_delay_ms_computation_across_consecutive_events(self) -> None: with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) out_dir = root / "out" @@ -234,9 +234,9 @@ def test_pre_gap_delta_computation_across_consecutive_events(self) -> None: exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) self.assertEqual(exit_code, 0, stderr) rows = _load_jsonl(out_dir / "bundle_gap.jsonl") - self.assertEqual(rows[1]["pre_gap"], 2.5) + self.assertEqual(rows[1]["delay"], 2500.0) - def test_first_event_in_session_has_zero_pre_gap(self) -> None: + def test_first_event_in_session_has_zero_delay(self) -> None: with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) out_dir = root / "out" @@ -246,7 +246,7 @@ def test_first_event_in_session_has_zero_pre_gap(self) -> None: exit_code, _, stderr = self._run_main(str(bundle_path), out_dir) self.assertEqual(exit_code, 0, stderr) rows = _load_jsonl(out_dir / "bundle_first.jsonl") - self.assertEqual(rows[0]["pre_gap"], 0.0) + self.assertEqual(rows[0]["delay"], 0.0) def test_negative_delta_is_clamped_to_zero_with_warning(self) -> None: with tempfile.TemporaryDirectory() as tmp: @@ -270,7 +270,7 @@ def test_negative_delta_is_clamped_to_zero_with_warning(self) -> None: self.assertEqual(exit_code, 0, stderr) self.assertIn("negative arrival delta", stderr) rows = _load_jsonl(out_dir / "bundle_negative_gap.jsonl") - self.assertEqual(rows[1]["pre_gap"], 0.0) + self.assertEqual(rows[1]["delay"], 0.0) def test_no_include_model_flag_strips_model_field(self) -> None: with tempfile.TemporaryDirectory() as tmp: @@ -283,16 +283,16 @@ def test_no_include_model_flag_strips_model_field(self) -> None: rows = _load_jsonl(out_dir / "bundle_no_model.jsonl") self.assertNotIn("model", rows[0]) - def test_no_include_pre_gap_flag_strips_pre_gap_field(self) -> None: + def test_no_include_delay_flag_strips_delay_field(self) -> None: with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) out_dir = root / "out" bundle_path = self._write_bundle(root, "gap_off.json", _bundle("bundle_no_gap", exports=[_export("trace-gap-off", events=[_event(session_id="sess-no-gap")])])) - exit_code, _, stderr = self._run_main(str(bundle_path), out_dir, "--no-include-pre-gap") + exit_code, _, stderr = self._run_main(str(bundle_path), out_dir, "--no-include-delay") self.assertEqual(exit_code, 0, stderr) rows = _load_jsonl(out_dir / "bundle_no_gap.jsonl") - self.assertNotIn("pre_gap", rows[0]) + self.assertNotIn("delay", rows[0]) def test_dry_run_writes_nothing_to_disk(self) -> None: with tempfile.TemporaryDirectory() as tmp: diff --git a/tools/validate_mooncake_trace.py b/tools/validate_mooncake_trace.py index cf139b020..b84e74429 100644 --- a/tools/validate_mooncake_trace.py +++ b/tools/validate_mooncake_trace.py @@ -18,8 +18,8 @@ from typing import Any VALID_ROLES = {"user", "assistant", "system", "tool"} -REQUIRED_FIELDS = {"session_id", "input", "output_length"} -OPTIONAL_SUPERSET_FIELDS = {"model", "pre_gap"} +REQUIRED_FIELDS = {"session_id", "messages", "output_length"} +OPTIONAL_SUPERSET_FIELDS = {"model", "delay"} CHECK = "✓" CROSS = "✗" WARN = "!" @@ -122,10 +122,10 @@ def validate_row( _add_issue(errors, f"line {line_no}: session_id must be non-empty str", max_issues) categories["missing_session_id"] += 1 - input_messages = row.get("input") + input_messages = row.get("messages") if not isinstance(input_messages, list) or not input_messages: - _add_issue(errors, f"line {line_no}: input must be a non-empty list", max_issues) - categories["invalid_input"] += 1 + _add_issue(errors, f"line {line_no}: messages must be a non-empty list", max_issues) + categories["invalid_messages"] += 1 input_messages = [] for message_idx, message in enumerate(input_messages): @@ -149,14 +149,14 @@ def validate_row( _add_issue(errors, f"line {line_no}: model must be str", max_issues) categories["invalid_model"] += 1 - if "pre_gap" in row: - pre_gap = row.get("pre_gap") - if not _is_number(pre_gap): - _add_issue(errors, f"line {line_no}: pre_gap must be non-negative float", max_issues) - categories["invalid_pre_gap"] += 1 - elif float(pre_gap) < 0.0: - _add_issue(errors, f"line {line_no}: pre_gap must be >= 0.0", max_issues) - categories["invalid_pre_gap"] += 1 + if "delay" in row: + delay = row.get("delay") + if not _is_number(delay): + _add_issue(errors, f"line {line_no}: delay must be a non-negative number (milliseconds)", max_issues) + categories["invalid_delay"] += 1 + elif float(delay) < 0.0: + _add_issue(errors, f"line {line_no}: delay must be >= 0.0", max_issues) + categories["invalid_delay"] += 1 return errors, warnings, categories @@ -191,7 +191,7 @@ def parse_args(argv: list[str]) -> argparse.Namespace: "--allow-superset", action=argparse.BooleanOptionalAction, default=True, - help="Allow the optional mooncake superset fields model and pre_gap (default: on).", + help="Allow the optional mooncake superset fields model and delay (default: on).", ) return parser.parse_args(argv) From 8a509064ed53ab8b7043eac69471f1f9dcde2095 Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 21 Apr 2026 03:15:53 -0700 Subject: [PATCH 08/13] isb1: ship non-preview mooncake JSONL bundles (LFS) 17 bundles / 1142 rows / 22 sessions across core/extension_32k/\nextension_64k/extension_131k. Preview lanes (500k, 1m) deferred per\nv1 plan. Manifest tracks per-bundle size, session count, scale band,\nand workload family.\n\nRan the exporter once in directory mode against datasets/isb1/exports/\nand pruned preview output before staging so v1 ships only the\nnon-preview bundles. This keeps the directory-mode manifest filtering\nbehavior and avoids the glob-mode manifest-selection footgun noted in\nthe plan. --- datasets/isb1/.gitattributes | 1 + .../core/chat_8k1k/isb1_core_chat_8k1k.jsonl | 3 + .../isb1_core_chat_8k1k_qwen3_5.jsonl | 3 + .../core/code_8k1k/isb1_core_code_8k1k.jsonl | 3 + .../isb1_core_code_8k1k_qwen3_5.jsonl | 3 + .../isb1_extension_131k_chat_131k1k.jsonl | 3 + ...isb1_extension_131k_chat_131k1k_dsr1.jsonl | 3 + ...1_extension_131k_chat_131k1k_qwen3_5.jsonl | 3 + ...isb1_extension_131k_vllm_code_131k1k.jsonl | 3 + ...ension_131k_vllm_code_131k1k_qwen3_5.jsonl | 3 + .../isb1_extension_32k_chat_32k1k.jsonl | 3 + ...sb1_extension_32k_chat_32k1k_qwen3_5.jsonl | 3 + .../isb1_extension_32k_code_32k1k.jsonl | 3 + ...sb1_extension_32k_code_32k1k_qwen3_5.jsonl | 3 + .../isb1_extension_64k_chat_64k1k.jsonl | 3 + ...sb1_extension_64k_chat_64k1k_qwen3_5.jsonl | 3 + .../isb1_extension_64k_code_64k1k.jsonl | 3 + ...sb1_extension_64k_code_64k1k_qwen3_5.jsonl | 3 + datasets/isb1/mooncake/manifest.json | 259 ++++++++++++++++++ 19 files changed, 311 insertions(+) create mode 100644 datasets/isb1/mooncake/core/chat_8k1k/isb1_core_chat_8k1k.jsonl create mode 100644 datasets/isb1/mooncake/core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl create mode 100644 datasets/isb1/mooncake/core/code_8k1k/isb1_core_code_8k1k.jsonl create mode 100644 datasets/isb1/mooncake/core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl create mode 100644 datasets/isb1/mooncake/extension_131k/chat_131k1k/isb1_extension_131k_chat_131k1k.jsonl create mode 100644 datasets/isb1/mooncake/extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl create mode 100644 datasets/isb1/mooncake/extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl create mode 100644 datasets/isb1/mooncake/extension_131k/code_131k1k/isb1_extension_131k_vllm_code_131k1k.jsonl create mode 100644 datasets/isb1/mooncake/extension_131k/code_131k1k_qwen3.5/isb1_extension_131k_vllm_code_131k1k_qwen3_5.jsonl create mode 100644 datasets/isb1/mooncake/extension_32k/chat_32k1k/isb1_extension_32k_chat_32k1k.jsonl create mode 100644 datasets/isb1/mooncake/extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl create mode 100644 datasets/isb1/mooncake/extension_32k/code_32k1k/isb1_extension_32k_code_32k1k.jsonl create mode 100644 datasets/isb1/mooncake/extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl create mode 100644 datasets/isb1/mooncake/extension_64k/chat_64k1k/isb1_extension_64k_chat_64k1k.jsonl create mode 100644 datasets/isb1/mooncake/extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl create mode 100644 datasets/isb1/mooncake/extension_64k/code_64k1k/isb1_extension_64k_code_64k1k.jsonl create mode 100644 datasets/isb1/mooncake/extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl create mode 100644 datasets/isb1/mooncake/manifest.json diff --git a/datasets/isb1/.gitattributes b/datasets/isb1/.gitattributes index 006356f24..3c235758f 100644 --- a/datasets/isb1/.gitattributes +++ b/datasets/isb1/.gitattributes @@ -1,2 +1,3 @@ exports/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true converted/**/*.json filter=lfs diff=lfs merge=lfs -text linguist-generated=true +mooncake/**/*.jsonl filter=lfs diff=lfs merge=lfs -text linguist-generated=true diff --git a/datasets/isb1/mooncake/core/chat_8k1k/isb1_core_chat_8k1k.jsonl b/datasets/isb1/mooncake/core/chat_8k1k/isb1_core_chat_8k1k.jsonl new file mode 100644 index 000000000..cd3a9a0e5 --- /dev/null +++ b/datasets/isb1/mooncake/core/chat_8k1k/isb1_core_chat_8k1k.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb11fb9e443e7e99b4892722dd04f041516eb8907ee280f5bf4fe45c8833e0b +size 1000005 diff --git a/datasets/isb1/mooncake/core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl new file mode 100644 index 000000000..e835b56bc --- /dev/null +++ b/datasets/isb1/mooncake/core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c81f79b04496c866d258a8a27e7588ced8cd1c831a52ee6cac4a5b8a0ec584b +size 28338 diff --git a/datasets/isb1/mooncake/core/code_8k1k/isb1_core_code_8k1k.jsonl b/datasets/isb1/mooncake/core/code_8k1k/isb1_core_code_8k1k.jsonl new file mode 100644 index 000000000..edc93f231 --- /dev/null +++ b/datasets/isb1/mooncake/core/code_8k1k/isb1_core_code_8k1k.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a23a5c477b3d6d533dfeb49a88b32c983966092a702c21abf997a5be547d85 +size 500685 diff --git a/datasets/isb1/mooncake/core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl new file mode 100644 index 000000000..e90ba7f07 --- /dev/null +++ b/datasets/isb1/mooncake/core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d880ef0667432805bf86d83a69766b56d0ae17efeea5c33323ed3592c88bdc1c +size 30372 diff --git a/datasets/isb1/mooncake/extension_131k/chat_131k1k/isb1_extension_131k_chat_131k1k.jsonl b/datasets/isb1/mooncake/extension_131k/chat_131k1k/isb1_extension_131k_chat_131k1k.jsonl new file mode 100644 index 000000000..87c9e24fb --- /dev/null +++ b/datasets/isb1/mooncake/extension_131k/chat_131k1k/isb1_extension_131k_chat_131k1k.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3d5e65ffa95472cc174a25e5095ef45c8b2d3d1e415c8152f57d4ff93b47ae +size 289119 diff --git a/datasets/isb1/mooncake/extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl b/datasets/isb1/mooncake/extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl new file mode 100644 index 000000000..256ad4d63 --- /dev/null +++ b/datasets/isb1/mooncake/extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f837a0500761e0592464cac5bba7758a53dbac22dcc98da205ae4b0bf22945e +size 73434 diff --git a/datasets/isb1/mooncake/extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl new file mode 100644 index 000000000..3f0ea9270 --- /dev/null +++ b/datasets/isb1/mooncake/extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c4e46499dd39efbde68145e150710da966d56deef39c432f059d81522e5009 +size 73542 diff --git a/datasets/isb1/mooncake/extension_131k/code_131k1k/isb1_extension_131k_vllm_code_131k1k.jsonl b/datasets/isb1/mooncake/extension_131k/code_131k1k/isb1_extension_131k_vllm_code_131k1k.jsonl new file mode 100644 index 000000000..0cfb005a8 --- /dev/null +++ b/datasets/isb1/mooncake/extension_131k/code_131k1k/isb1_extension_131k_vllm_code_131k1k.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5373acb3dbb0c3cbb091f838aefff6738d08f71b508f4d5499751e99912f4b7 +size 4896708 diff --git a/datasets/isb1/mooncake/extension_131k/code_131k1k_qwen3.5/isb1_extension_131k_vllm_code_131k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_131k/code_131k1k_qwen3.5/isb1_extension_131k_vllm_code_131k1k_qwen3_5.jsonl new file mode 100644 index 000000000..e3cb22d2d --- /dev/null +++ b/datasets/isb1/mooncake/extension_131k/code_131k1k_qwen3.5/isb1_extension_131k_vllm_code_131k1k_qwen3_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6b0f6633c2d5058858623975ce12161abd3415f5915b2c8cca6b04ac0e6634 +size 4896728 diff --git a/datasets/isb1/mooncake/extension_32k/chat_32k1k/isb1_extension_32k_chat_32k1k.jsonl b/datasets/isb1/mooncake/extension_32k/chat_32k1k/isb1_extension_32k_chat_32k1k.jsonl new file mode 100644 index 000000000..fdf1c1b6a --- /dev/null +++ b/datasets/isb1/mooncake/extension_32k/chat_32k1k/isb1_extension_32k_chat_32k1k.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e8963cdd5dcf1e905a73ff803872186ea154d1c0a9a4bf3ce5b191f2a016ce +size 480063 diff --git a/datasets/isb1/mooncake/extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl new file mode 100644 index 000000000..32833670d --- /dev/null +++ b/datasets/isb1/mooncake/extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe68098a53f9dc45ea1245d286dba60d8eb60ae128544b109e6206408bc629a +size 121224 diff --git a/datasets/isb1/mooncake/extension_32k/code_32k1k/isb1_extension_32k_code_32k1k.jsonl b/datasets/isb1/mooncake/extension_32k/code_32k1k/isb1_extension_32k_code_32k1k.jsonl new file mode 100644 index 000000000..f5ab90fb3 --- /dev/null +++ b/datasets/isb1/mooncake/extension_32k/code_32k1k/isb1_extension_32k_code_32k1k.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:191336fefbc545f008090551fe5456d2e694b58c23f21e39bf9fc917237bb058 +size 252231 diff --git a/datasets/isb1/mooncake/extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl new file mode 100644 index 000000000..ad897bef3 --- /dev/null +++ b/datasets/isb1/mooncake/extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01212fc97c2c9d7214df408753e19a19f17d4c52a57232ed1f0d2fc24a75f289 +size 64266 diff --git a/datasets/isb1/mooncake/extension_64k/chat_64k1k/isb1_extension_64k_chat_64k1k.jsonl b/datasets/isb1/mooncake/extension_64k/chat_64k1k/isb1_extension_64k_chat_64k1k.jsonl new file mode 100644 index 000000000..cca5d7e7a --- /dev/null +++ b/datasets/isb1/mooncake/extension_64k/chat_64k1k/isb1_extension_64k_chat_64k1k.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07dc77028ba457e98cffb58f4fb32bc0b2aa124eda8808b6827d047265f9ea4a +size 884355 diff --git a/datasets/isb1/mooncake/extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl new file mode 100644 index 000000000..fda8b2c27 --- /dev/null +++ b/datasets/isb1/mooncake/extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c79eafd3d6b1207f3a2fba8b52abd06efd9376fbbc08c609f09c851eed04973d +size 222834 diff --git a/datasets/isb1/mooncake/extension_64k/code_64k1k/isb1_extension_64k_code_64k1k.jsonl b/datasets/isb1/mooncake/extension_64k/code_64k1k/isb1_extension_64k_code_64k1k.jsonl new file mode 100644 index 000000000..52915fff7 --- /dev/null +++ b/datasets/isb1/mooncake/extension_64k/code_64k1k/isb1_extension_64k_code_64k1k.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c628ca60e46d28f2c7e5fe8b9e769e0517635e12a594c19777f450c54df8810e +size 144939 diff --git a/datasets/isb1/mooncake/extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl new file mode 100644 index 000000000..f462e5227 --- /dev/null +++ b/datasets/isb1/mooncake/extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff29c23002f479f006ccc6de01a081b083be628ec4a2a025afc1e00aea5761c +size 36906 diff --git a/datasets/isb1/mooncake/manifest.json b/datasets/isb1/mooncake/manifest.json new file mode 100644 index 000000000..906b69db0 --- /dev/null +++ b/datasets/isb1/mooncake/manifest.json @@ -0,0 +1,259 @@ +{ + "schema_version": "1.0.0", + "generated_at": "2026-04-21T10:12:10Z", + "generator": "tools/isb1_to_mooncake_trace.py + one-shot inline walker", + "dataset_type": "mooncake_trace", + "total_traces": 17, + "total_requests": 1142, + "total_sessions": 22, + "total_jsonl_bytes": 13995749, + "traces": [ + { + "relative_path": "core/chat_8k1k/isb1_core_chat_8k1k.jsonl", + "bundle_id": "isb1_core_chat_8k1k", + "source_export": "datasets/isb1/exports/core/chat_8k1k.json", + "models": [ + "deepseek_r1_0528", + "glm_5", + "gpt_oss_120b", + "minimax_m2_5" + ], + "num_requests": 228, + "session_count": 2, + "scale_band": "8k", + "workload_family": "chat", + "jsonl_bytes": 1000005 + }, + { + "relative_path": "core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl", + "bundle_id": "isb1_core_chat_8k1k_qwen3_5", + "source_export": "datasets/isb1/exports/core/chat_8k1k_qwen3.5.json", + "models": [ + "qwen3_5_397b_a17b" + ], + "num_requests": 12, + "session_count": 1, + "scale_band": "8k", + "workload_family": "chat", + "jsonl_bytes": 28338 + }, + { + "relative_path": "core/code_8k1k/isb1_core_code_8k1k.jsonl", + "bundle_id": "isb1_core_code_8k1k", + "source_export": "datasets/isb1/exports/core/code_8k1k.json", + "models": [ + "deepseek_r1_0528", + "glm_5", + "gpt_oss_120b", + "minimax_m2_5" + ], + "num_requests": 180, + "session_count": 3, + "scale_band": "8k", + "workload_family": "code", + "jsonl_bytes": 500685 + }, + { + "relative_path": "core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl", + "bundle_id": "isb1_core_code_8k1k_qwen3_5", + "source_export": "datasets/isb1/exports/core/code_8k1k_qwen3.5.json", + "models": [ + "qwen3_5_397b_a17b" + ], + "num_requests": 12, + "session_count": 1, + "scale_band": "8k", + "workload_family": "code", + "jsonl_bytes": 30372 + }, + { + "relative_path": "extension_131k/chat_131k1k/isb1_extension_131k_chat_131k1k.jsonl", + "bundle_id": "isb1_extension_131k_chat_131k1k", + "source_export": "datasets/isb1/exports/extension_131k/chat_131k1k.json", + "models": [ + "deepseek_r1_0528", + "glm_5", + "gpt_oss_120b", + "minimax_m2_5" + ], + "num_requests": 108, + "session_count": 1, + "scale_band": "131k", + "workload_family": "chat", + "jsonl_bytes": 289119 + }, + { + "relative_path": "extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl", + "bundle_id": "isb1_extension_131k_chat_131k1k_dsr1", + "source_export": "datasets/isb1/exports/extension_131k/chat_131k1k_dsr1.json", + "models": [ + "deepseek_r1_0528" + ], + "num_requests": 27, + "session_count": 1, + "scale_band": "131k", + "workload_family": "chat", + "jsonl_bytes": 73434 + }, + { + "relative_path": "extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl", + "bundle_id": "isb1_extension_131k_chat_131k1k_qwen3_5", + "source_export": "datasets/isb1/exports/extension_131k/chat_131k1k_qwen3.5.json", + "models": [ + "qwen3_5_397b_a17b" + ], + "num_requests": 27, + "session_count": 1, + "scale_band": "131k", + "workload_family": "chat", + "jsonl_bytes": 73542 + }, + { + "relative_path": "extension_131k/code_131k1k/isb1_extension_131k_vllm_code_131k1k.jsonl", + "bundle_id": "isb1_extension_131k_vllm_code_131k1k", + "source_export": "datasets/isb1/exports/extension_131k/code_131k1k.json", + "models": [ + "deepseek_r1_0528", + "glm_5", + "gpt_oss_120b", + "minimax_m2_5", + "kimi_k2_5", + "qwen3_5_397b_a17b" + ], + "num_requests": 4, + "session_count": 1, + "scale_band": "131k", + "workload_family": "code", + "jsonl_bytes": 4896708 + }, + { + "relative_path": "extension_131k/code_131k1k_qwen3.5/isb1_extension_131k_vllm_code_131k1k_qwen3_5.jsonl", + "bundle_id": "isb1_extension_131k_vllm_code_131k1k_qwen3_5", + "source_export": "datasets/isb1/exports/extension_131k/code_131k1k_qwen3.5.json", + "models": [ + "qwen3_5_397b_a17b" + ], + "num_requests": 4, + "session_count": 1, + "scale_band": "131k", + "workload_family": "code", + "jsonl_bytes": 4896728 + }, + { + "relative_path": "extension_32k/chat_32k1k/isb1_extension_32k_chat_32k1k.jsonl", + "bundle_id": "isb1_extension_32k_chat_32k1k", + "source_export": "datasets/isb1/exports/extension_32k/chat_32k1k.json", + "models": [ + "deepseek_r1_0528", + "glm_5", + "gpt_oss_120b", + "minimax_m2_5" + ], + "num_requests": 108, + "session_count": 1, + "scale_band": "32k", + "workload_family": "chat", + "jsonl_bytes": 480063 + }, + { + "relative_path": "extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl", + "bundle_id": "isb1_extension_32k_chat_32k1k_qwen3_5", + "source_export": "datasets/isb1/exports/extension_32k/chat_32k1k_qwen3.5.json", + "models": [ + "qwen3_5_397b_a17b" + ], + "num_requests": 27, + "session_count": 1, + "scale_band": "32k", + "workload_family": "chat", + "jsonl_bytes": 121224 + }, + { + "relative_path": "extension_32k/code_32k1k/isb1_extension_32k_code_32k1k.jsonl", + "bundle_id": "isb1_extension_32k_code_32k1k", + "source_export": "datasets/isb1/exports/extension_32k/code_32k1k.json", + "models": [ + "deepseek_r1_0528", + "glm_5", + "gpt_oss_120b", + "minimax_m2_5" + ], + "num_requests": 108, + "session_count": 2, + "scale_band": "32k", + "workload_family": "code", + "jsonl_bytes": 252231 + }, + { + "relative_path": "extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl", + "bundle_id": "isb1_extension_32k_code_32k1k_qwen3_5", + "source_export": "datasets/isb1/exports/extension_32k/code_32k1k_qwen3.5.json", + "models": [ + "qwen3_5_397b_a17b" + ], + "num_requests": 27, + "session_count": 2, + "scale_band": "32k", + "workload_family": "code", + "jsonl_bytes": 64266 + }, + { + "relative_path": "extension_64k/chat_64k1k/isb1_extension_64k_chat_64k1k.jsonl", + "bundle_id": "isb1_extension_64k_chat_64k1k", + "source_export": "datasets/isb1/exports/extension_64k/chat_64k1k.json", + "models": [ + "deepseek_r1_0528", + "glm_5", + "gpt_oss_120b", + "minimax_m2_5" + ], + "num_requests": 156, + "session_count": 1, + "scale_band": "64k", + "workload_family": "chat", + "jsonl_bytes": 884355 + }, + { + "relative_path": "extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl", + "bundle_id": "isb1_extension_64k_chat_64k1k_qwen3_5", + "source_export": "datasets/isb1/exports/extension_64k/chat_64k1k_qwen3.5.json", + "models": [ + "qwen3_5_397b_a17b" + ], + "num_requests": 39, + "session_count": 1, + "scale_band": "64k", + "workload_family": "chat", + "jsonl_bytes": 222834 + }, + { + "relative_path": "extension_64k/code_64k1k/isb1_extension_64k_code_64k1k.jsonl", + "bundle_id": "isb1_extension_64k_code_64k1k", + "source_export": "datasets/isb1/exports/extension_64k/code_64k1k.json", + "models": [ + "deepseek_r1_0528", + "glm_5", + "gpt_oss_120b", + "minimax_m2_5" + ], + "num_requests": 60, + "session_count": 1, + "scale_band": "64k", + "workload_family": "code", + "jsonl_bytes": 144939 + }, + { + "relative_path": "extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl", + "bundle_id": "isb1_extension_64k_code_64k1k_qwen3_5", + "source_export": "datasets/isb1/exports/extension_64k/code_64k1k_qwen3.5.json", + "models": [ + "qwen3_5_397b_a17b" + ], + "num_requests": 15, + "session_count": 1, + "scale_band": "64k", + "workload_family": "code", + "jsonl_bytes": 36906 + } + ] +} From 055767988fe3ffe40b191f0974b904380e6366bd Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 21 Apr 2026 03:21:59 -0700 Subject: [PATCH 09/13] isb1: add mooncake sweep config (8k / 32k / 131k) Schema mirrors multiturn-agentic-trace-isb1.yaml. TP x users x offload surfaces match the kv-cache-tester sibling. Preview lanes (500k, 1m) intentionally omitted in v1. --- ...multiturn-agentic-trace-isb1-mooncake.yaml | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/configs/multiturn-agentic-trace-isb1-mooncake.yaml diff --git a/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml b/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml new file mode 100644 index 000000000..c88d1ccb8 --- /dev/null +++ b/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml @@ -0,0 +1,34 @@ +# ISB1 sweep cells for Cam's aiperf / mooncake_trace replay flow. +# Schema mirrors .github/configs/multiturn-agentic-trace.yaml and +# .github/configs/multiturn-agentic-trace-isb1.yaml. +# 8k code cells expect MOONCAKE_INPUT=datasets/isb1/mooncake/core/code_8k1k/. +# 32k chat cells expect MOONCAKE_INPUT=datasets/isb1/mooncake/extension_32k/chat_32k1k*/. +# 131k code cells expect MOONCAKE_INPUT=datasets/isb1/mooncake/extension_131k/*_131k1k*/. +# Preview 500k / 1m lanes are intentionally omitted in v1. + +h100-fp8-qwen3-isb1-mooncake-code-8k-lmcache: + tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]} + tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + +h200-fp8-qwen3-isb1-mooncake-code-8k-lmcache: + tp2: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} + tp4: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} + +h200-fp8-qwen3-isb1-mooncake-chat-32k-lmcache: + tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]} + tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + +h200-fp8-qwen3-isb1-mooncake-code-131k-lmcache: + tp4: {users: [1, 2, 4, 8], offload: ["on", "off"]} + tp8: {users: [1, 2, 4, 8, 16], offload: ["on", "off"]} + +b200-fp4-dsr1-isb1-mooncake-code-8k-lmcache: + tp4: {ep: 4, users: [4, 8, 16, 32, 64, 128, 256], offload: ["on", "off"]} + tp8: {ep: 8, users: [8, 16, 32, 64, 128, 256, 512], offload: ["on", "off"]} + +b200-fp4-dsr1-isb1-mooncake-chat-32k-lmcache: + tp4: {ep: 4, users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + tp8: {ep: 8, users: [1, 2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} + +b200-fp4-dsr1-isb1-mooncake-code-131k-lmcache: + tp8: {ep: 8, users: [1, 2, 4, 8, 16], offload: ["on", "off"]} From 3f8ffd1e1dac48d092f1d8e95236d167b24bcead Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 21 Apr 2026 03:23:03 -0700 Subject: [PATCH 10/13] docs: operator recipe for MOONCAKE_INPUT (aiperf mooncake_trace) Docs-only recipe for pulling isb1/mooncake/ JSONLs through the upstream aiperf harness. Preserves legacy sammshen/lmcache-agentic-traces fallback when MOONCAKE_INPUT is unset. --- datasets/isb1/RECIPE_MOONCAKE.md | 151 +++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 datasets/isb1/RECIPE_MOONCAKE.md diff --git a/datasets/isb1/RECIPE_MOONCAKE.md b/datasets/isb1/RECIPE_MOONCAKE.md new file mode 100644 index 000000000..1db7d33b3 --- /dev/null +++ b/datasets/isb1/RECIPE_MOONCAKE.md @@ -0,0 +1,151 @@ +# Operator recipe for `MOONCAKE_INPUT` + +> **These diffs target upstream harness files (`benchmarks/single_node/multiturn_fp8_*_lmcache_aiperf.sh`, `.github/workflows/benchmark-multiturn-tmpl.yml`, `.github/workflows/multiturn-sweep.yml`). Do NOT apply locally — they are for operators pulling this data into the upstream harness.** + +## MOONCAKE_INPUT semantics + +Recommended `MOONCAKE_INPUT` support covers three cases: + +1. **Local dir** + `MOONCAKE_INPUT=datasets/isb1/mooncake/core/code_8k1k` + Concatenate local `*.jsonl` files into the single `$TRACE_FILE` the harness already passes to `aiperf profile --custom-dataset-type mooncake_trace`. + +2. **HF repo or repo subdir** + `MOONCAKE_INPUT=hf_semianalysisai--isb1-cc-traces/mooncake/core/code_8k1k` + Download the dataset repo, optionally target a subdirectory, then concatenate matched cached `*.jsonl` files into `$TRACE_FILE`. + +3. **Unset** + Preserve the current `sammshen/lmcache-agentic-traces` download path and parquet/jsonl fallback so existing sweeps continue to work when `MOONCAKE_INPUT` is not provided. + +## Shell script hunk + +Apply this hunk to the H100 shell script. **H200 and B200 use the same dataset-materialization hunk.** + +```diff +--- a/benchmarks/single_node/multiturn_fp8_h100_lmcache_aiperf.sh ++++ b/benchmarks/single_node/multiturn_fp8_h100_lmcache_aiperf.sh +@@ -9,6 +9,9 @@ + # Required env vars: + # MODEL, TP, USERS, OFFLOAD_MODE, TOTAL_CPU_DRAM_GB, RESULT_DIR + # Optional: ++# MOONCAKE_INPUT: ++# - /path/to/mooncake/subdir ++# - hf_--[/optional/subdir] + # PORT (default 8888), REQUEST_TIMEOUT (default 3600) + # DURATION (if set, runs for this many seconds; otherwise runs to completion) +@@ -60,16 +63,37 @@ mkdir -p "$RESULT_DIR" +-# ---- Download and convert LMCache traces to mooncake format ---------------- +-echo "Downloading LMCache traces..." +-hf download sammshen/lmcache-agentic-traces --repo-type dataset +- +-echo "Converting LMCache traces to mooncake format..." ++# ---- Resolve and materialize mooncake traces -------------------------------- ++if [[ -n "${MOONCAKE_INPUT:-}" && "${MOONCAKE_INPUT}" == hf_* ]]; then ++ HF_SPEC="${MOONCAKE_INPUT#hf_}" ++ HF_REPO_SPEC="${HF_SPEC%%/*}" ++ HF_REPO="${HF_REPO_SPEC/--//}" ++ hf download "$HF_REPO" --repo-type dataset ++fi ++ ++echo "Materializing mooncake traces..." + python3 -c " + import json, glob, os + hf_cache = os.environ.get('HF_HUB_CACHE', os.path.expanduser('~/.cache/huggingface/hub')) +-# Find the downloaded parquet/jsonl files in the HF cache +-candidates = glob.glob(os.path.join(hf_cache, 'datasets--sammshen--lmcache-agentic-traces', '**', '*.parquet'), recursive=True) +-if not candidates: +- candidates = glob.glob(os.path.join(hf_cache, 'datasets--sammshen--lmcache-agentic-traces', '**', '*.jsonl'), recursive=True) +-if not candidates: +- # Fallback: use datasets library to load from cache ++ ++src = os.environ.get('MOONCAKE_INPUT', '').strip() ++jsonl_candidates = [] ++if src.startswith('hf_'): ++ hf_spec = src[3:] ++ repo_spec, _, subdir = hf_spec.partition('/') ++ dataset = repo_spec.replace('--', '/', 1) ++ repo_cache = f'datasets--{dataset.replace('/', '--')}' ++ pattern = os.path.join(hf_cache, repo_cache, '**', subdir, '*.jsonl') if subdir else os.path.join(hf_cache, repo_cache, '**', '*.jsonl') ++ jsonl_candidates = sorted(glob.glob(pattern, recursive=True)) ++elif src: ++ jsonl_candidates = sorted(glob.glob(os.path.join(src, '*.jsonl'))) ++ ++out_path = '$TRACE_FILE' ++if jsonl_candidates: ++ with open(out_path, 'w', encoding='utf-8') as out: ++ for path in jsonl_candidates: ++ out.write(open(path, encoding='utf-8').read()) ++ print(f'Concatenated {len(jsonl_candidates)} JSONL file(s) into {out_path}') ++ raise SystemExit(0) ++ ++candidates = glob.glob(os.path.join(hf_cache, 'datasets--sammshen--lmcache-agentic-traces', '**', '*.parquet'), recursive=True) ++if not candidates: ++ candidates = glob.glob(os.path.join(hf_cache, 'datasets--sammshen--lmcache-agentic-traces', '**', '*.jsonl'), recursive=True) ++if not candidates: + from datasets import load_dataset + ds = load_dataset('sammshen/lmcache-agentic-traces', split='train') + rows = list(ds) +@@ -85,7 +109,6 @@ else: +- out_path = '$TRACE_FILE' + sessions = set() + skipped = 0 + with open(out_path, 'w') as f: +``` + +## Workflow YAML hunks + +### `benchmark-multiturn-tmpl.yml` + +```diff +--- a/.github/workflows/benchmark-multiturn-tmpl.yml ++++ b/.github/workflows/benchmark-multiturn-tmpl.yml +@@ -72,6 +72,11 @@ on: + trace-dir: + description: "Override trace directory (relative to kv-cache-tester dir)" + required: false + type: string + default: '' ++ mooncake-input: ++ description: "Override mooncake JSONL source (local dir or hf_--[/subdir])" ++ required: false ++ type: string ++ default: '' +@@ -101,6 +106,7 @@ env: + HASH_BLOCK_MODE: ${{ inputs.hash-block-mode }} + TRACE_DIR: ${{ inputs.trace-dir }} ++ MOONCAKE_INPUT: ${{ inputs.mooncake-input }} + DEBUG_TRACE: ${{ inputs.debug-trace }} + NO_MAX_TOKENS: ${{ inputs.no-max-tokens }} +``` + +### `multiturn-sweep.yml` + +```diff +--- a/.github/workflows/multiturn-sweep.yml ++++ b/.github/workflows/multiturn-sweep.yml +@@ -105,6 +105,11 @@ on: + trace_dir: + description: 'Override trace directory (e.g. traces, traces_neon). Relative to kv-cache-tester dir.' + required: false + default: '' + type: string ++ mooncake_input: ++ description: 'Override mooncake JSONL source (local dir or hf_--[/subdir]).' ++ required: false ++ default: '' ++ type: string +@@ -219,6 +224,7 @@ jobs: + ignore-eos: ${{ inputs.ignore_eos }} + hash-block-mode: ${{ inputs.hash_block_mode }} + trace-dir: ${{ inputs.trace_dir }} ++ mooncake-input: ${{ inputs.mooncake_input }} + debug-trace: ${{ inputs.debug_trace }} + no-max-tokens: ${{ inputs.no_max_tokens }} +``` + +## How to consume + +- Pull replay inputs from `datasets/isb1/mooncake/`. +- Use the first-wave sweep cells in `.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml`. +- Point `MOONCAKE_INPUT` at either a local subtree or an `hf_--[/subdir]` dataset path. +- Leave `MOONCAKE_INPUT` unset to preserve the legacy `sammshen/lmcache-agentic-traces` fallback. From 46d6d0fe074db38a1c7147345e72aebd2572c10f Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 21 Apr 2026 03:33:00 -0700 Subject: [PATCH 11/13] aiperf: offline smoke for MooncakeTraceDatasetLoader Validates exporter output against the pinned aiperf loader without a GPU or inference server. Asserts row/session counts, delay values, raw_messages propagation, and cross-bundle session prefix survival. --- tools/smoke_aiperf_mooncake.py | 170 +++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 tools/smoke_aiperf_mooncake.py diff --git a/tools/smoke_aiperf_mooncake.py b/tools/smoke_aiperf_mooncake.py new file mode 100644 index 000000000..bf89ad5a2 --- /dev/null +++ b/tools/smoke_aiperf_mooncake.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +"""Offline smoke test for aiperf MooncakeTraceDatasetLoader.""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +DEFAULT_AIPERF_SRC = os.environ.get("AIPERF_SRC") or ( + "/tmp/cam-pr993-full/experimental/multiturn/vllm_benchmark/aiperf/src" +) + + +class SmokeFailure(RuntimeError): + """Raised when the offline smoke check fails.""" + + +class PromptGeneratorStub: + """Minimal prompt-generator surface required by the aiperf loader.""" + + def __init__(self) -> None: + self.tokenizer = SimpleNamespace(resolved_name="offline-smoke-tokenizer") + self._decoded_cache: dict[tuple[Any, ...], str] = {} + + def generate(self, *, mean: int, stddev: int = 0, hash_ids: list[int] | None = None) -> str: + return f"[generated mean={mean} stddev={stddev} hash_ids={len(hash_ids or [])}]" + + def _build_token_sequence(self, input_length: int, hash_ids: list[int] | None, block_size: int) -> list[int]: + blocks = max(1, input_length // max(block_size, 1)) + return list(range(blocks + len(hash_ids or []))) + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Offline smoke test for MooncakeTraceDatasetLoader.") + parser.add_argument("--input", required=True, help="Mooncake JSONL file or directory of JSONL files.") + parser.add_argument( + "--aiperf-src", + default=DEFAULT_AIPERF_SRC, + help=f"Path to aiperf src/ (default: AIPERF_SRC env if set, otherwise {DEFAULT_AIPERF_SRC}).", + ) + return parser.parse_args(argv) + + +def iter_jsonl_files(input_spec: str) -> list[Path]: + path = Path(input_spec).resolve() + if path.is_file(): + return [path] + if path.is_dir(): + files = sorted(candidate.resolve() for candidate in path.rglob("*.jsonl")) + if files: + return files + raise SmokeFailure(f"No JSONL files found under {input_spec}") + + +def load_rows(path: Path) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + with path.open(encoding="utf-8") as handle: + for line_no, raw_line in enumerate(handle, start=1): + line = raw_line.strip() + if not line: + continue + try: + row = json.loads(line) + except json.JSONDecodeError as exc: + raise SmokeFailure(f"{path}: line {line_no} is invalid JSON: {exc}") from exc + if not isinstance(row, dict): + raise SmokeFailure(f"{path}: line {line_no} must decode to a JSON object") + rows.append(row) + if not rows: + raise SmokeFailure(f"{path}: no rows found") + return rows + + +def import_aiperf(aiperf_src: Path): + if sys.version_info < (3, 10): + raise SmokeFailure("aiperf import requires Python 3.10+; rerun this script with python3.10 or newer") + if not aiperf_src.is_dir(): + raise SmokeFailure(f"aiperf src path not found: {aiperf_src}") + sys.path.insert(0, str(aiperf_src)) + try: + from aiperf.common.config import EndpointConfig, UserConfig # type: ignore + from aiperf.dataset.loader import base_trace_loader # type: ignore + from aiperf.dataset.loader.mooncake_trace import MooncakeTraceDatasetLoader # type: ignore + except Exception as exc: # pragma: no cover - environment-specific + raise SmokeFailure( + f"failed to import aiperf from {aiperf_src}: {exc}. Use an environment with aiperf dependencies installed." + ) from exc + + base_trace_loader.parallel_decode = lambda token_sequences, *_a, **_k: [ + f"[decoded tokens={len(seq)}]" for seq in token_sequences + ] + return EndpointConfig, UserConfig, MooncakeTraceDatasetLoader + + +def assert_session_ids(raw_rows: list[dict[str, Any]], dataset: dict[str, list[Any]], conversations: list[Any], path: Path) -> None: + prefixed = { + row["session_id"] + for row in raw_rows + if isinstance(row.get("session_id"), str) and "::" in row["session_id"] + } + if not prefixed: + return + dataset_ids = set(dataset.keys()) + conversation_ids = {conversation.session_id for conversation in conversations} + if missing := sorted(prefixed - dataset_ids): + raise SmokeFailure(f"{path}: prefixed session IDs missing from dataset: {missing}") + if missing := sorted(prefixed - conversation_ids): + raise SmokeFailure(f"{path}: prefixed session IDs missing from conversations: {missing}") + + +def assert_turns(dataset: dict[str, list[Any]], conversations: list[Any], path: Path) -> tuple[Any, Any]: + conversation_map = {conversation.session_id: conversation for conversation in conversations} + ordered_delays: list[Any] = [] + for session_id, traces in dataset.items(): + conversation = conversation_map.get(session_id) + if conversation is None: + raise SmokeFailure(f"{path}: missing conversation for session {session_id}") + if len(conversation.turns) != len(traces): + raise SmokeFailure(f"{path}: session {session_id} trace/turn count mismatch") + for trace, turn in zip(traces, conversation.turns, strict=True): + if getattr(trace, "messages", None) is not None and turn.raw_messages != trace.messages: + raise SmokeFailure(f"{path}: raw_messages mismatch for session {session_id}") + if turn.delay != getattr(trace, "delay", None): + raise SmokeFailure(f"{path}: delay mismatch for session {session_id}") + ordered_delays.append(turn.delay) + if not ordered_delays: + raise SmokeFailure(f"{path}: no turn delays captured") + return ordered_delays[0], ordered_delays[-1] + + +def smoke_file(path: Path, EndpointConfig, UserConfig, MooncakeTraceDatasetLoader) -> None: + raw_rows = load_rows(path) + loader = MooncakeTraceDatasetLoader( + filename=str(path), + user_config=UserConfig(endpoint=EndpointConfig(model_names=["offline-smoke-model"])), + prompt_generator=PromptGeneratorStub(), + ) + dataset = loader.load_dataset() + total_rows = sum(len(traces) for traces in dataset.values()) + total_sessions = len(dataset) + if total_rows <= 0 or total_sessions <= 0: + raise SmokeFailure(f"{path}: expected positive row and session counts") + conversations = loader.convert_to_conversations(dataset) + if len(conversations) != total_sessions: + raise SmokeFailure(f"{path}: session count mismatch after conversation conversion") + assert_session_ids(raw_rows, dataset, conversations, path) + first_delay, last_delay = assert_turns(dataset, conversations, path) + print(f"OK {path}: rows={total_rows} sessions={total_sessions} first_delay={first_delay} last_delay={last_delay}") + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv or sys.argv[1:]) + try: + files = iter_jsonl_files(args.input) + EndpointConfig, UserConfig, MooncakeTraceDatasetLoader = import_aiperf(Path(args.aiperf_src).resolve()) + for file_path in files: + smoke_file(file_path, EndpointConfig, UserConfig, MooncakeTraceDatasetLoader) + except SmokeFailure as exc: + print(f"FAIL: {exc}", file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 8fe2b807afde6843661be7c2910da2c8e5df491c Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 21 Apr 2026 11:24:52 -0700 Subject: [PATCH 12/13] docs(isb1): mark mooncake recipe and sweep yaml as speculative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reframes fork PR #2 centerpiece — the canonical ask is the opt-in corpus at datasets/isb1/mooncake/ consumed via existing --custom-dataset-type mooncake_trace flows. The recipe and the ISB-1-mooncake sweep YAML remain for operators who elect to patch their harness, but are explicitly non-blocking and depend on unmerged upstream patches. No code changes. No removal. Label-only to clarify PR framing per deep-investigation-report.md §Answer 1. Co-Authored-By: Claude Opus 4.7 --- .../multiturn-agentic-trace-isb1-mooncake.yaml | 12 ++++++++++++ datasets/isb1/RECIPE_MOONCAKE.md | 2 ++ 2 files changed, 14 insertions(+) diff --git a/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml b/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml index c88d1ccb8..1e74cb196 100644 --- a/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml +++ b/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml @@ -1,3 +1,15 @@ +# ====================================================================== +# STATUS: SPECULATIVE / NON-BLOCKING +# ---------------------------------------------------------------------- +# This file is NOT the canonical PR framing for fork PR #2. It depends on +# an unmerged upstream patch that wires MOONCAKE_INPUT through +# .github/workflows/benchmark-multiturn-tmpl.yml. Do NOT reference from +# workflows until that path is approved upstream. See +# datasets/isb1/RECIPE_MOONCAKE.md for the speculative patch set, or +# https://github.com/OCWC22/InferenceX/pull/2 for the canonical +# opt-in framing (corpus at datasets/isb1/mooncake/ consumed via existing +# --custom-dataset-type mooncake_trace, zero harness patches required). +# ====================================================================== # ISB1 sweep cells for Cam's aiperf / mooncake_trace replay flow. # Schema mirrors .github/configs/multiturn-agentic-trace.yaml and # .github/configs/multiturn-agentic-trace-isb1.yaml. diff --git a/datasets/isb1/RECIPE_MOONCAKE.md b/datasets/isb1/RECIPE_MOONCAKE.md index 1db7d33b3..1db21295d 100644 --- a/datasets/isb1/RECIPE_MOONCAKE.md +++ b/datasets/isb1/RECIPE_MOONCAKE.md @@ -1,5 +1,7 @@ # Operator recipe for `MOONCAKE_INPUT` +> **Status: speculative / non-blocking ops-note. Not a PR ask.** This file documents *one possible* way upstream operators could wire the ISB-1 mooncake corpus into Cam's aiperf scripts. It is NOT the canonical PR framing. The canonical framing is the "opt-in second mooncake corpus" described in [fork PR #2](https://github.com/OCWC22/InferenceX/pull/2) — operators run `--custom-dataset-type mooncake_trace` against `datasets/isb1/mooncake/` with their existing `MOONCAKE_INPUT` plumbing; no upstream patches required. +> > **These diffs target upstream harness files (`benchmarks/single_node/multiturn_fp8_*_lmcache_aiperf.sh`, `.github/workflows/benchmark-multiturn-tmpl.yml`, `.github/workflows/multiturn-sweep.yml`). Do NOT apply locally — they are for operators pulling this data into the upstream harness.** ## MOONCAKE_INPUT semantics From b31f7c19fc68310ed7835ee6274bf9ff612c649f Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:41:51 -0700 Subject: [PATCH 13/13] feat(isb1): preserve ISL in mooncake flattener + noprefix cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - preserve non-text ISB1 token_count blocks in the mooncake flattener and cover TABLE / TOOL_OUTPUT / BLOCK fallback behavior in the focused exporter test - regenerate the 17 manifest-listed mooncake bundles and refresh manifest provenance/stats (jsonl bytes 13995749 -> 13988708; requests 1142 -> 1142; sessions 22 -> 22); smoke not run locally — no aiperf install (missing cyclopts in local aiperf env) - add noprefix offload cells plus header docs to the speculative mooncake sweep YAML; preview bundles remain deferred --- ...multiturn-agentic-trace-isb1-mooncake.yaml | 24 ++++++++++++------- .../isb1_core_chat_8k1k_qwen3_5.jsonl | 4 ++-- .../isb1_core_code_8k1k_qwen3_5.jsonl | 4 ++-- ...isb1_extension_131k_chat_131k1k_dsr1.jsonl | 4 ++-- ...1_extension_131k_chat_131k1k_qwen3_5.jsonl | 4 ++-- ...sb1_extension_32k_chat_32k1k_qwen3_5.jsonl | 4 ++-- ...sb1_extension_32k_code_32k1k_qwen3_5.jsonl | 4 ++-- ...sb1_extension_64k_chat_64k1k_qwen3_5.jsonl | 4 ++-- ...sb1_extension_64k_code_64k1k_qwen3_5.jsonl | 4 ++-- datasets/isb1/mooncake/manifest.json | 23 +++++++++--------- tools/isb1_to_mooncake_trace.py | 23 ++++++++++++++++-- tools/test_isb1_to_mooncake_trace.py | 12 ++++++++++ 12 files changed, 77 insertions(+), 37 deletions(-) diff --git a/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml b/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml index 1e74cb196..95704b999 100644 --- a/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml +++ b/.github/configs/multiturn-agentic-trace-isb1-mooncake.yaml @@ -17,22 +17,30 @@ # 32k chat cells expect MOONCAKE_INPUT=datasets/isb1/mooncake/extension_32k/chat_32k1k*/. # 131k code cells expect MOONCAKE_INPUT=datasets/isb1/mooncake/extension_131k/*_131k1k*/. # Preview 500k / 1m lanes are intentionally omitted in v1. +# +# offload values: +# on — KV offload enabled (VLLM_USE_SIMPLE_KV_OFFLOAD=1) +# off — KV offload disabled (baseline) +# noprefix — offload off AND --no-enable-prefix-caching (clean-cache floor). +# Cam's h100 lane already wires the flag in +# multiturn_fp8_h100_lmcache_aiperf.sh:123-126; these cells just +# surface the third mode so the sweep generator emits it. h100-fp8-qwen3-isb1-mooncake-code-8k-lmcache: - tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]} - tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off", "noprefix"]} + tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off", "noprefix"]} h200-fp8-qwen3-isb1-mooncake-code-8k-lmcache: - tp2: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} - tp4: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off"]} + tp2: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off", "noprefix"]} + tp4: {users: [2, 4, 8, 16, 32, 64, 128], offload: ["on", "off", "noprefix"]} h200-fp8-qwen3-isb1-mooncake-chat-32k-lmcache: - tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off"]} - tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off"]} + tp2: {users: [1, 2, 4, 8, 16, 32], offload: ["on", "off", "noprefix"]} + tp4: {users: [1, 2, 4, 8, 16, 32, 64], offload: ["on", "off", "noprefix"]} h200-fp8-qwen3-isb1-mooncake-code-131k-lmcache: - tp4: {users: [1, 2, 4, 8], offload: ["on", "off"]} - tp8: {users: [1, 2, 4, 8, 16], offload: ["on", "off"]} + tp4: {users: [1, 2, 4, 8], offload: ["on", "off", "noprefix"]} + tp8: {users: [1, 2, 4, 8, 16], offload: ["on", "off", "noprefix"]} b200-fp4-dsr1-isb1-mooncake-code-8k-lmcache: tp4: {ep: 4, users: [4, 8, 16, 32, 64, 128, 256], offload: ["on", "off"]} diff --git a/datasets/isb1/mooncake/core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl index e835b56bc..ec92ed509 100644 --- a/datasets/isb1/mooncake/core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl +++ b/datasets/isb1/mooncake/core/chat_8k1k_qwen3.5/isb1_core_chat_8k1k_qwen3_5.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c81f79b04496c866d258a8a27e7588ced8cd1c831a52ee6cac4a5b8a0ec584b -size 28338 +oid sha256:33b80963dc04191749f21b632324ed45fcfcbc3e03528b432a0d4cfb827d6779 +size 27990 diff --git a/datasets/isb1/mooncake/core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl index e90ba7f07..32e2e7a69 100644 --- a/datasets/isb1/mooncake/core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl +++ b/datasets/isb1/mooncake/core/code_8k1k_qwen3.5/isb1_core_code_8k1k_qwen3_5.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d880ef0667432805bf86d83a69766b56d0ae17efeea5c33323ed3592c88bdc1c -size 30372 +oid sha256:9d775784ace99e69f1bf24b8eb0704543498cd5ca20df0a04ebfd7d3bea47409 +size 30024 diff --git a/datasets/isb1/mooncake/extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl b/datasets/isb1/mooncake/extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl index 256ad4d63..c713fd455 100644 --- a/datasets/isb1/mooncake/extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl +++ b/datasets/isb1/mooncake/extension_131k/chat_131k1k_dsr1/isb1_extension_131k_chat_131k1k_dsr1.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f837a0500761e0592464cac5bba7758a53dbac22dcc98da205ae4b0bf22945e -size 73434 +oid sha256:e5cabb592bdb80352b55b7b62f967917def8f40414a12d6957fa8a9c3f758171 +size 72408 diff --git a/datasets/isb1/mooncake/extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl index 3f0ea9270..1dd298a10 100644 --- a/datasets/isb1/mooncake/extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl +++ b/datasets/isb1/mooncake/extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0c4e46499dd39efbde68145e150710da966d56deef39c432f059d81522e5009 -size 73542 +oid sha256:3b7f6a11f426bdc83496254b71ac91ca3b46174f6133b133bfd68a6f34e9c877 +size 72435 diff --git a/datasets/isb1/mooncake/extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl index 32833670d..4f6833601 100644 --- a/datasets/isb1/mooncake/extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl +++ b/datasets/isb1/mooncake/extension_32k/chat_32k1k_qwen3.5/isb1_extension_32k_chat_32k1k_qwen3_5.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7fe68098a53f9dc45ea1245d286dba60d8eb60ae128544b109e6206408bc629a -size 121224 +oid sha256:c67699d2081c81e37d34f28ee1d505216e222cec9474150a34821f7e6e626f68 +size 120171 diff --git a/datasets/isb1/mooncake/extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl index ad897bef3..8220960fa 100644 --- a/datasets/isb1/mooncake/extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl +++ b/datasets/isb1/mooncake/extension_32k/code_32k1k_qwen3.5/isb1_extension_32k_code_32k1k_qwen3_5.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01212fc97c2c9d7214df408753e19a19f17d4c52a57232ed1f0d2fc24a75f289 -size 64266 +oid sha256:3839e013999bf0191f1f2f090a063815cb98959db6ca52f4bbccbc2e89886cdc +size 63213 diff --git a/datasets/isb1/mooncake/extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl index fda8b2c27..d42685e0c 100644 --- a/datasets/isb1/mooncake/extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl +++ b/datasets/isb1/mooncake/extension_64k/chat_64k1k_qwen3.5/isb1_extension_64k_chat_64k1k_qwen3_5.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c79eafd3d6b1207f3a2fba8b52abd06efd9376fbbc08c609f09c851eed04973d -size 222834 +oid sha256:05ab661dc94a1301e7531ba00f729a8c1028578ea2f56b7b62f7d1427ed6bb59 +size 221313 diff --git a/datasets/isb1/mooncake/extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl b/datasets/isb1/mooncake/extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl index f462e5227..0cfc5bcab 100644 --- a/datasets/isb1/mooncake/extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl +++ b/datasets/isb1/mooncake/extension_64k/code_64k1k_qwen3.5/isb1_extension_64k_code_64k1k_qwen3_5.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ff29c23002f479f006ccc6de01a081b083be628ec4a2a025afc1e00aea5761c -size 36906 +oid sha256:e668c4e2428e8166b4a63b3f9ad80211cb48abc7cb2596440ba532f0c83fb45d +size 36321 diff --git a/datasets/isb1/mooncake/manifest.json b/datasets/isb1/mooncake/manifest.json index 906b69db0..093098dec 100644 --- a/datasets/isb1/mooncake/manifest.json +++ b/datasets/isb1/mooncake/manifest.json @@ -1,12 +1,13 @@ { - "schema_version": "1.0.0", - "generated_at": "2026-04-21T10:12:10Z", + "schema_version": "1.1.0", + "flattener_version": "1.1.0", + "generated_at": "2026-04-22T04:39:40Z", "generator": "tools/isb1_to_mooncake_trace.py + one-shot inline walker", "dataset_type": "mooncake_trace", "total_traces": 17, "total_requests": 1142, "total_sessions": 22, - "total_jsonl_bytes": 13995749, + "total_jsonl_bytes": 13988708, "traces": [ { "relative_path": "core/chat_8k1k/isb1_core_chat_8k1k.jsonl", @@ -35,7 +36,7 @@ "session_count": 1, "scale_band": "8k", "workload_family": "chat", - "jsonl_bytes": 28338 + "jsonl_bytes": 27990 }, { "relative_path": "core/code_8k1k/isb1_core_code_8k1k.jsonl", @@ -64,7 +65,7 @@ "session_count": 1, "scale_band": "8k", "workload_family": "code", - "jsonl_bytes": 30372 + "jsonl_bytes": 30024 }, { "relative_path": "extension_131k/chat_131k1k/isb1_extension_131k_chat_131k1k.jsonl", @@ -93,7 +94,7 @@ "session_count": 1, "scale_band": "131k", "workload_family": "chat", - "jsonl_bytes": 73434 + "jsonl_bytes": 72408 }, { "relative_path": "extension_131k/chat_131k1k_qwen3.5/isb1_extension_131k_chat_131k1k_qwen3_5.jsonl", @@ -106,7 +107,7 @@ "session_count": 1, "scale_band": "131k", "workload_family": "chat", - "jsonl_bytes": 73542 + "jsonl_bytes": 72435 }, { "relative_path": "extension_131k/code_131k1k/isb1_extension_131k_vllm_code_131k1k.jsonl", @@ -166,7 +167,7 @@ "session_count": 1, "scale_band": "32k", "workload_family": "chat", - "jsonl_bytes": 121224 + "jsonl_bytes": 120171 }, { "relative_path": "extension_32k/code_32k1k/isb1_extension_32k_code_32k1k.jsonl", @@ -195,7 +196,7 @@ "session_count": 2, "scale_band": "32k", "workload_family": "code", - "jsonl_bytes": 64266 + "jsonl_bytes": 63213 }, { "relative_path": "extension_64k/chat_64k1k/isb1_extension_64k_chat_64k1k.jsonl", @@ -224,7 +225,7 @@ "session_count": 1, "scale_band": "64k", "workload_family": "chat", - "jsonl_bytes": 222834 + "jsonl_bytes": 221313 }, { "relative_path": "extension_64k/code_64k1k/isb1_extension_64k_code_64k1k.jsonl", @@ -253,7 +254,7 @@ "session_count": 1, "scale_band": "64k", "workload_family": "code", - "jsonl_bytes": 36906 + "jsonl_bytes": 36321 } ] } diff --git a/tools/isb1_to_mooncake_trace.py b/tools/isb1_to_mooncake_trace.py index ce98a80bb..0997f74df 100644 --- a/tools/isb1_to_mooncake_trace.py +++ b/tools/isb1_to_mooncake_trace.py @@ -186,17 +186,32 @@ def _block_language(block: dict[str, Any]) -> str: return "" +def _block_placeholder(block: dict[str, Any]) -> str | None: + token_count = block.get("token_count") + if isinstance(token_count, bool) or not isinstance(token_count, int): + return None + + block_type = block.get("type") + label = block_type.upper() if isinstance(block_type, str) and block_type else "BLOCK" + return f"[{label} token_count={token_count}]" + + def _flatten_blocks(blocks: list[Any]) -> str: parts: list[str] = [] for block in blocks: if not isinstance(block, dict): continue - block_type = str(block.get("type") or "text") + raw_block_type = block.get("type") + block_type = raw_block_type if isinstance(raw_block_type, str) and raw_block_type else None text = block.get("text") text_value = "" if text is None else str(text) - if block_type == "text": + if block_type in {None, "text"}: if text_value: parts.append(text_value) + elif block_type is None: + placeholder = _block_placeholder(block) + if placeholder: + parts.append(placeholder) continue if block_type == "code": language = _block_language(block) @@ -205,6 +220,10 @@ def _flatten_blocks(blocks: list[Any]) -> str: continue if text_value: parts.append(text_value) + continue + placeholder = _block_placeholder(block) + if placeholder: + parts.append(placeholder) return "\n\n".join(parts) diff --git a/tools/test_isb1_to_mooncake_trace.py b/tools/test_isb1_to_mooncake_trace.py index c31589989..b16e03cc3 100644 --- a/tools/test_isb1_to_mooncake_trace.py +++ b/tools/test_isb1_to_mooncake_trace.py @@ -163,6 +163,18 @@ def test_mixed_text_and_code_blocks_in_one_message(self) -> None: blocks = [_text_block("before"), _code_block("SELECT 1", metadata_language="sql")] self.assertEqual(exporter._flatten_blocks(blocks), "before\n\n```sql\nSELECT 1\n```") + def test_non_text_blocks_without_text_use_token_count_placeholders(self) -> None: + blocks = [ + {"type": "table", "token_count": 512}, + {"type": "tool_output", "token_count": 128}, + {"type": None, "token_count": 64}, + {"type": "image"}, + ] + self.assertEqual( + exporter._flatten_blocks(blocks), + "[TABLE token_count=512]\n\n[TOOL_OUTPUT token_count=128]\n\n[BLOCK token_count=64]", + ) + def test_tool_role_message_is_preserved_as_is(self) -> None: with tempfile.TemporaryDirectory() as tmp: root = Path(tmp)