From c8a52efbd00e8ae39000b4012cfda21feca72812 Mon Sep 17 00:00:00 2001 From: Dennis Palatov Date: Sat, 18 Apr 2026 13:27:57 -0700 Subject: [PATCH] gameplay_capture: carry known_good action as supervised label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Dennis's observation: the gameplay records ARE supervised training triples, but we were only capturing what our baseline PROPOSED, not what actually was the right move. The winning trace's per-step action is by definition a good next action — encode it in metadata so downstream training can use it as the teacher signal. New metadata fields on every gameplay record: - known_good_action: int (GameAction value that the winning trace took) - known_good_data: Dict|None (click coords for action=6, else None) - known_good_level: int (game level at this step) What this unlocks for training: - Router BC: (state → baseline_dispatch) [already worked] - Action prediction: (state → known_good_action) [NEW] - Motor-skill BC by demonstration: (state × skill_params → known_good_action) [NEW] - Outcome-weighted shaping: sample_weight ∝ game_outcome.won [NEW] - Backprop through chained components using the winning action as the terminal-loss target [NEW] 'This is what SAGE should do next to evaluate what it proposes next' — the proposal and the ground truth are now both in every record. Tests: 2 new (18 total). Verify known_good_action matches trace action exactly, click-step known_good_data carries coords, levels pass through. Backward compat: old consumers that don't look at the new fields are unaffected. RouterRecord schema unchanged (metadata is open dict). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../thalamic_router/gameplay_capture.py | 15 +++++++- .../tests/test_gameplay_capture.py | 37 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/sage/cognition/thalamic_router/gameplay_capture.py b/sage/cognition/thalamic_router/gameplay_capture.py index ae8402a15..33c03e904 100644 --- a/sage/cognition/thalamic_router/gameplay_capture.py +++ b/sage/cognition/thalamic_router/gameplay_capture.py @@ -375,7 +375,16 @@ def run(self) -> CaptureResult: self.errors.append(f"step {step.index}: decide failed: {e!r}") continue - # Build the record with provenance metadata + # Build the record with provenance metadata. + # + # `known_good_*` fields are the supervised-training labels. Since + # this trace is a WIN, the action actually taken at this tick is + # by definition a good next action. This lets downstream training + # use the record as a supervised triple: + # state → (baseline-proposed dispatch) [router BC] + # state → known_good_action [action prediction] + # state × skill_params → known_good_action [motor-skill BC] + # Plus outcome-weighted shaping: sample weight ∝ game_outcome.won. metadata = { "source": "gameplay", "game": self.trace.game, @@ -385,6 +394,10 @@ def run(self) -> CaptureResult: "step_index": step.index, "level": step.level, "synthetic_kernel_state": True, + # Supervised labels from the winning trace + "known_good_action": step.action, + "known_good_data": step.data, + "known_good_level": step.level, } record = RouterRecord( router_input=router_input, diff --git a/sage/cognition/thalamic_router/tests/test_gameplay_capture.py b/sage/cognition/thalamic_router/tests/test_gameplay_capture.py index 01220162c..ade50148e 100644 --- a/sage/cognition/thalamic_router/tests/test_gameplay_capture.py +++ b/sage/cognition/thalamic_router/tests/test_gameplay_capture.py @@ -186,6 +186,10 @@ def test_capture_end_to_end_emits_records(tmp_path): assert rec.metadata.get("game") == "testgame" assert rec.metadata.get("synthetic_kernel_state") is True assert "game_outcome" in rec.metadata + # Supervised-training labels + assert "known_good_action" in rec.metadata + assert rec.metadata["known_good_action"] in (1, 3, 6) # from _make_trace_3_steps + assert "known_good_level" in rec.metadata def test_capture_writes_to_partition(tmp_path): @@ -271,3 +275,36 @@ def test_capture_all_records_have_valid_router_output(tmp_path): for rec in capture.records: ok, reason = rec.router_output.validate() assert ok, f"invalid output: {reason}" + + +def test_capture_known_good_labels_match_trace_actions(tmp_path): + """Each record's known_good_action must equal the corresponding TraceStep.action.""" + trace = _make_trace_3_steps() + writer = RouterDatasetWriter(base_dir=tmp_path / "dataset", + machine="cbp", compress=False) + capture = GameplayCapture(trace=trace, writer=writer, machine="cbp", + env_factory=_make_env_factory()) + capture.run() + writer.close() + assert len(capture.records) == 3 + assert capture.records[0].metadata["known_good_action"] == 1 # UP + assert capture.records[1].metadata["known_good_action"] == 3 # LEFT + assert capture.records[2].metadata["known_good_action"] == 6 # CLICK + # Click step carries click data + assert capture.records[2].metadata["known_good_data"] == {"x": 10, "y": 20} + # Non-click steps have None data + assert capture.records[0].metadata["known_good_data"] is None + + +def test_capture_known_good_levels_match_trace_levels(tmp_path): + """known_good_level passes through the trace's per-step level.""" + trace = _make_trace_3_steps() + writer = RouterDatasetWriter(base_dir=tmp_path / "dataset", + machine="cbp", compress=False) + capture = GameplayCapture(trace=trace, writer=writer, machine="cbp", + env_factory=_make_env_factory()) + capture.run() + writer.close() + assert capture.records[0].metadata["known_good_level"] == 0 + assert capture.records[1].metadata["known_good_level"] == 0 + assert capture.records[2].metadata["known_good_level"] == 1