From 59341e30c801cbfa88152611df2e76887863c48d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:23:43 +0000 Subject: [PATCH 1/9] Initial plan From 2b3362d5ce9d5e7b91c4715c4420e536f92074dd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:30:03 +0000 Subject: [PATCH 2/9] Add validation for reserved keywords in checkpoint encoding/decoding Co-authored-by: TaoChenOSU <12570346+TaoChenOSU@users.noreply.github.com> --- .../_workflows/_checkpoint_encoding.py | 37 ++++++ .../tests/workflow/test_checkpoint_decode.py | 114 ++++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py index 8c52c7a521..d5a1a7f970 100644 --- a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py +++ b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py @@ -92,6 +92,16 @@ def _enc(v: Any, stack: set[int], depth: int) -> Any: return _CYCLE_SENTINEL stack.add(oid) try: + # Check for reserved marker keys that could cause deserialization issues + has_model_marker = MODEL_MARKER in v_dict + has_dataclass_marker = DATACLASS_MARKER in v_dict + has_value_key = "value" in v_dict + if (has_model_marker or has_dataclass_marker) and has_value_key: + marker = MODEL_MARKER if has_model_marker else DATACLASS_MARKER + raise ValueError( + f"Cannot encode dict containing reserved checkpoint marker key '{marker}' " + f"with 'value' key. These keys are reserved for polymorphic serialization." + ) json_dict: dict[str, Any] = {} for k_any, val_any in v_dict.items(): # type: ignore[assignment] k_str: str = str(k_any) @@ -146,6 +156,12 @@ def decode_checkpoint_value(value: Any) -> Any: cls = None if cls is not None: + # Verify the class actually supports the model protocol + if not _class_supports_model_protocol(cls): + logger.debug( + f"Class {type_key} does not support model protocol; returning raw value" + ) + return decoded_payload if strategy == "to_dict" and hasattr(cls, "from_dict"): with contextlib.suppress(Exception): return cls.from_dict(decoded_payload) @@ -169,6 +185,12 @@ def decode_checkpoint_value(value: Any) -> Any: if module is None: module = importlib.import_module(module_name) cls_dc: Any = getattr(module, class_name) + # Verify the class is actually a dataclass + if not is_dataclass(cls_dc): + logger.debug( + f"Class {type_key_dc} is not a dataclass; returning raw value" + ) + return decoded_raw constructed = _instantiate_checkpoint_dataclass(cls_dc, decoded_raw) if constructed is not None: return constructed @@ -204,6 +226,21 @@ def _supports_model_protocol(obj: object) -> bool: return (has_to_dict and has_from_dict) or (has_to_json and has_from_json) +def _class_supports_model_protocol(cls: type[Any]) -> bool: + """Check if a class type supports the model serialization protocol. + + This is similar to _supports_model_protocol but works on classes directly, + used for validation during deserialization. + """ + has_to_dict = hasattr(cls, "to_dict") and callable(getattr(cls, "to_dict", None)) + has_from_dict = hasattr(cls, "from_dict") and callable(getattr(cls, "from_dict", None)) + + has_to_json = hasattr(cls, "to_json") and callable(getattr(cls, "to_json", None)) + has_from_json = hasattr(cls, "from_json") and callable(getattr(cls, "from_json", None)) + + return (has_to_dict and has_from_dict) or (has_to_json and has_from_json) + + def _import_qualified_name(qualname: str) -> type[Any] | None: if ":" not in qualname: return None diff --git a/python/packages/core/tests/workflow/test_checkpoint_decode.py b/python/packages/core/tests/workflow/test_checkpoint_decode.py index b126eafacf..64b29e2fac 100644 --- a/python/packages/core/tests/workflow/test_checkpoint_decode.py +++ b/python/packages/core/tests/workflow/test_checkpoint_decode.py @@ -3,7 +3,11 @@ from dataclasses import dataclass # noqa: I001 from typing import Any, cast +import pytest + from agent_framework._workflows._checkpoint_encoding import ( + DATACLASS_MARKER, + MODEL_MARKER, decode_checkpoint_value, encode_checkpoint_value, ) @@ -126,3 +130,113 @@ def test_encode_decode_nested_structures() -> None: assert response.data == "first response" assert isinstance(response.original_request, SampleRequest) assert response.original_request.request_id == "req-1" + + +def test_encode_raises_error_for_reserved_model_marker_with_value() -> None: + """Test that encoding a dict with MODEL_MARKER and 'value' keys raises an error.""" + malicious_dict = { + MODEL_MARKER: "some.module:FakeClass", + "value": {"data": "test"}, + "strategy": "to_dict", + } + + with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): + encode_checkpoint_value(malicious_dict) + + +def test_encode_raises_error_for_reserved_dataclass_marker_with_value() -> None: + """Test that encoding a dict with DATACLASS_MARKER and 'value' keys raises an error.""" + malicious_dict = { + DATACLASS_MARKER: "some.module:FakeClass", + "value": {"field1": "test"}, + } + + with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): + encode_checkpoint_value(malicious_dict) + + +def test_encode_allows_marker_key_without_value_key() -> None: + """Test that encoding a dict with only the marker key (no 'value') is allowed.""" + # This should not raise, as it doesn't match the marker pattern + dict_with_marker_only = { + MODEL_MARKER: "some.module:FakeClass", + "other_key": "test", + } + encoded = encode_checkpoint_value(dict_with_marker_only) + assert MODEL_MARKER in encoded + assert "other_key" in encoded + + +def test_encode_allows_value_key_without_marker_key() -> None: + """Test that encoding a dict with only 'value' key (no marker) is allowed.""" + dict_with_value_only = { + "value": {"data": "test"}, + "other_key": "test", + } + encoded = encode_checkpoint_value(dict_with_value_only) + assert "value" in encoded + assert "other_key" in encoded + + +class NotADataclass: + """A regular class that is not a dataclass.""" + + def __init__(self, value: str) -> None: + self.value = value + + def get_value(self) -> str: + return self.value + + +class NotAModel: + """A regular class that does not support the model protocol.""" + + def __init__(self, value: str) -> None: + self.value = value + + def get_value(self) -> str: + return self.value + + +def test_decode_rejects_non_dataclass_with_dataclass_marker() -> None: + """Test that decode returns raw value when marked class is not a dataclass.""" + # Manually construct a payload that claims NotADataclass is a dataclass + fake_payload = { + DATACLASS_MARKER: f"{NotADataclass.__module__}:{NotADataclass.__name__}", + "value": {"value": "test_value"}, + } + + decoded = decode_checkpoint_value(fake_payload) + + # Should return the raw decoded value, not an instance of NotADataclass + assert isinstance(decoded, dict) + assert decoded["value"] == "test_value" + + +def test_decode_rejects_non_model_with_model_marker() -> None: + """Test that decode returns raw value when marked class doesn't support model protocol.""" + # Manually construct a payload that claims NotAModel supports the model protocol + fake_payload = { + MODEL_MARKER: f"{NotAModel.__module__}:{NotAModel.__name__}", + "strategy": "to_dict", + "value": {"value": "test_value"}, + } + + decoded = decode_checkpoint_value(fake_payload) + + # Should return the raw decoded value, not an instance of NotAModel + assert isinstance(decoded, dict) + assert decoded["value"] == "test_value" + + +def test_encode_raises_for_nested_dict_with_reserved_keys() -> None: + """Test that encoding fails for nested dicts containing reserved marker patterns.""" + nested_data = { + "outer": { + MODEL_MARKER: "some.module:FakeClass", + "value": {"data": "test"}, + } + } + + with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): + encode_checkpoint_value(nested_data) From ea2022d63497374c1aaeb19bd64c6f79d547cde1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:31:43 +0000 Subject: [PATCH 3/9] Refactor to eliminate duplicate code in model protocol detection Co-authored-by: TaoChenOSU <12570346+TaoChenOSU@users.noreply.github.com> --- .../_workflows/_checkpoint_encoding.py | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py index d5a1a7f970..97ef4caab5 100644 --- a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py +++ b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py @@ -210,27 +210,12 @@ def decode_checkpoint_value(value: Any) -> Any: return value -def _supports_model_protocol(obj: object) -> bool: - """Detect objects that expose dictionary serialization hooks.""" - try: - obj_type: type[Any] = type(obj) - except Exception: - return False - - has_to_dict = hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict", None)) # type: ignore[arg-type] - has_from_dict = hasattr(obj_type, "from_dict") and callable(getattr(obj_type, "from_dict", None)) - - has_to_json = hasattr(obj, "to_json") and callable(getattr(obj, "to_json", None)) # type: ignore[arg-type] - has_from_json = hasattr(obj_type, "from_json") and callable(getattr(obj_type, "from_json", None)) - - return (has_to_dict and has_from_dict) or (has_to_json and has_from_json) - - def _class_supports_model_protocol(cls: type[Any]) -> bool: """Check if a class type supports the model serialization protocol. - This is similar to _supports_model_protocol but works on classes directly, - used for validation during deserialization. + Checks for pairs of serialization/deserialization methods: + - to_dict/from_dict + - to_json/from_json """ has_to_dict = hasattr(cls, "to_dict") and callable(getattr(cls, "to_dict", None)) has_from_dict = hasattr(cls, "from_dict") and callable(getattr(cls, "from_dict", None)) @@ -241,6 +226,16 @@ def _class_supports_model_protocol(cls: type[Any]) -> bool: return (has_to_dict and has_from_dict) or (has_to_json and has_from_json) +def _supports_model_protocol(obj: object) -> bool: + """Detect objects that expose dictionary serialization hooks.""" + try: + obj_type: type[Any] = type(obj) + except Exception: + return False + + return _class_supports_model_protocol(obj_type) + + def _import_qualified_name(qualname: str) -> type[Any] | None: if ":" not in qualname: return None From c7605a05469c1e839e0a876bfe6b1e1d90393d97 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:35:41 +0000 Subject: [PATCH 4/9] Fix pyright type narrowing issue for dataclass check Co-authored-by: TaoChenOSU <12570346+TaoChenOSU@users.noreply.github.com> --- .../core/agent_framework/_workflows/_checkpoint_encoding.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py index 97ef4caab5..d50270f152 100644 --- a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py +++ b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py @@ -185,10 +185,10 @@ def decode_checkpoint_value(value: Any) -> Any: if module is None: module = importlib.import_module(module_name) cls_dc: Any = getattr(module, class_name) - # Verify the class is actually a dataclass - if not is_dataclass(cls_dc): + # Verify the class is actually a dataclass type (not an instance) + if not isinstance(cls_dc, type) or not is_dataclass(cls_dc): logger.debug( - f"Class {type_key_dc} is not a dataclass; returning raw value" + f"Class {type_key_dc} is not a dataclass type; returning raw value" ) return decoded_raw constructed = _instantiate_checkpoint_dataclass(cls_dc, decoded_raw) From 5b4489b45ce10a3243ea2f0f6141f09baeeecb75 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 17:14:50 +0000 Subject: [PATCH 5/9] Add comprehensive unit tests for checkpoint encoding Co-authored-by: TaoChenOSU <12570346+TaoChenOSU@users.noreply.github.com> --- .../tests/workflow/test_checkpoint_encode.py | 414 ++++++++++++++++++ 1 file changed, 414 insertions(+) create mode 100644 python/packages/core/tests/workflow/test_checkpoint_encode.py diff --git a/python/packages/core/tests/workflow/test_checkpoint_encode.py b/python/packages/core/tests/workflow/test_checkpoint_encode.py new file mode 100644 index 0000000000..4c18ba4694 --- /dev/null +++ b/python/packages/core/tests/workflow/test_checkpoint_encode.py @@ -0,0 +1,414 @@ +# Copyright (c) Microsoft. All rights reserved. + +from dataclasses import dataclass +from typing import Any + +import pytest + +from agent_framework._workflows._checkpoint_encoding import ( + _CYCLE_SENTINEL, + DATACLASS_MARKER, + MODEL_MARKER, + encode_checkpoint_value, +) + + +@dataclass +class SimpleDataclass: + """A simple dataclass for testing encoding.""" + + name: str + value: int + + +@dataclass +class NestedDataclass: + """A dataclass with nested dataclass field.""" + + outer_name: str + inner: SimpleDataclass + + +class ModelWithToDict: + """A class that implements to_dict/from_dict protocol.""" + + def __init__(self, data: str) -> None: + self.data = data + + def to_dict(self) -> dict[str, Any]: + return {"data": self.data} + + @classmethod + def from_dict(cls, d: dict[str, Any]) -> "ModelWithToDict": + return cls(data=d["data"]) + + +class ModelWithToJson: + """A class that implements to_json/from_json protocol.""" + + def __init__(self, data: str) -> None: + self.data = data + + def to_json(self) -> str: + return f'{{"data": "{self.data}"}}' + + @classmethod + def from_json(cls, json_str: str) -> "ModelWithToJson": + import json + + d = json.loads(json_str) + return cls(data=d["data"]) + + +class UnknownObject: + """A class that doesn't support any serialization protocol.""" + + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return f"UnknownObject({self.value})" + + +# --- Tests for primitive encoding --- + + +def test_encode_string() -> None: + """Test encoding a string value.""" + result = encode_checkpoint_value("hello") + assert result == "hello" + + +def test_encode_integer() -> None: + """Test encoding an integer value.""" + result = encode_checkpoint_value(42) + assert result == 42 + + +def test_encode_float() -> None: + """Test encoding a float value.""" + result = encode_checkpoint_value(3.14) + assert result == 3.14 + + +def test_encode_boolean_true() -> None: + """Test encoding a True boolean value.""" + result = encode_checkpoint_value(True) + assert result is True + + +def test_encode_boolean_false() -> None: + """Test encoding a False boolean value.""" + result = encode_checkpoint_value(False) + assert result is False + + +def test_encode_none() -> None: + """Test encoding a None value.""" + result = encode_checkpoint_value(None) + assert result is None + + +# --- Tests for collection encoding --- + + +def test_encode_empty_dict() -> None: + """Test encoding an empty dictionary.""" + result = encode_checkpoint_value({}) + assert result == {} + + +def test_encode_simple_dict() -> None: + """Test encoding a simple dictionary with primitive values.""" + data = {"name": "test", "count": 5, "active": True} + result = encode_checkpoint_value(data) + assert result == {"name": "test", "count": 5, "active": True} + + +def test_encode_dict_with_non_string_keys() -> None: + """Test encoding a dictionary with non-string keys (converted to strings).""" + data = {1: "one", 2: "two"} + result = encode_checkpoint_value(data) + assert result == {"1": "one", "2": "two"} + + +def test_encode_empty_list() -> None: + """Test encoding an empty list.""" + result = encode_checkpoint_value([]) + assert result == [] + + +def test_encode_simple_list() -> None: + """Test encoding a simple list with primitive values.""" + data = [1, 2, 3, "four"] + result = encode_checkpoint_value(data) + assert result == [1, 2, 3, "four"] + + +def test_encode_tuple() -> None: + """Test encoding a tuple (converted to list).""" + data = (1, 2, 3) + result = encode_checkpoint_value(data) + assert result == [1, 2, 3] + + +def test_encode_set() -> None: + """Test encoding a set (converted to list).""" + data = {1, 2, 3} + result = encode_checkpoint_value(data) + assert isinstance(result, list) + assert sorted(result) == [1, 2, 3] + + +def test_encode_nested_dict() -> None: + """Test encoding a nested dictionary structure.""" + data = { + "outer": { + "inner": { + "value": 42, + } + } + } + result = encode_checkpoint_value(data) + assert result == {"outer": {"inner": {"value": 42}}} + + +def test_encode_list_of_dicts() -> None: + """Test encoding a list containing dictionaries.""" + data = [{"a": 1}, {"b": 2}] + result = encode_checkpoint_value(data) + assert result == [{"a": 1}, {"b": 2}] + + +# --- Tests for dataclass encoding --- + + +def test_encode_simple_dataclass() -> None: + """Test encoding a simple dataclass.""" + obj = SimpleDataclass(name="test", value=42) + result = encode_checkpoint_value(obj) + + assert isinstance(result, dict) + assert DATACLASS_MARKER in result + assert "value" in result + assert result["value"] == {"name": "test", "value": 42} + + +def test_encode_nested_dataclass() -> None: + """Test encoding a dataclass with nested dataclass fields.""" + inner = SimpleDataclass(name="inner", value=10) + outer = NestedDataclass(outer_name="outer", inner=inner) + result = encode_checkpoint_value(outer) + + assert isinstance(result, dict) + assert DATACLASS_MARKER in result + assert "value" in result + + outer_value = result["value"] + assert outer_value["outer_name"] == "outer" + assert DATACLASS_MARKER in outer_value["inner"] + + +def test_encode_list_of_dataclasses() -> None: + """Test encoding a list containing dataclass instances.""" + data = [ + SimpleDataclass(name="first", value=1), + SimpleDataclass(name="second", value=2), + ] + result = encode_checkpoint_value(data) + + assert isinstance(result, list) + assert len(result) == 2 + for item in result: + assert DATACLASS_MARKER in item + + +def test_encode_dict_with_dataclass_values() -> None: + """Test encoding a dictionary with dataclass values.""" + data = { + "item1": SimpleDataclass(name="first", value=1), + "item2": SimpleDataclass(name="second", value=2), + } + result = encode_checkpoint_value(data) + + assert isinstance(result, dict) + assert DATACLASS_MARKER in result["item1"] + assert DATACLASS_MARKER in result["item2"] + + +# --- Tests for model protocol encoding --- + + +def test_encode_model_with_to_dict() -> None: + """Test encoding an object implementing to_dict/from_dict protocol.""" + obj = ModelWithToDict(data="test_data") + result = encode_checkpoint_value(obj) + + assert isinstance(result, dict) + assert MODEL_MARKER in result + assert result["strategy"] == "to_dict" + assert result["value"] == {"data": "test_data"} + + +def test_encode_model_with_to_json() -> None: + """Test encoding an object implementing to_json/from_json protocol.""" + obj = ModelWithToJson(data="test_data") + result = encode_checkpoint_value(obj) + + assert isinstance(result, dict) + assert MODEL_MARKER in result + assert result["strategy"] == "to_json" + assert '"data": "test_data"' in result["value"] + + +# --- Tests for unknown object encoding --- + + +def test_encode_unknown_object_fallback_to_string() -> None: + """Test that unknown objects are encoded as strings.""" + obj = UnknownObject(value="test") + result = encode_checkpoint_value(obj) + + assert isinstance(result, str) + assert "UnknownObject" in result + + +# --- Tests for cycle detection --- + + +def test_encode_dict_with_self_reference() -> None: + """Test that dict self-references are detected and handled.""" + data: dict[str, Any] = {"name": "test"} + data["self"] = data # Create circular reference + + result = encode_checkpoint_value(data) + assert result["name"] == "test" + assert result["self"] == _CYCLE_SENTINEL + + +def test_encode_list_with_self_reference() -> None: + """Test that list self-references are detected and handled.""" + data: list[Any] = [1, 2] + data.append(data) # Create circular reference + + result = encode_checkpoint_value(data) + assert result[0] == 1 + assert result[1] == 2 + assert result[2] == _CYCLE_SENTINEL + + +# --- Tests for reserved keyword validation --- + + +def test_encode_dict_with_model_marker_and_value_raises() -> None: + """Test that encoding a dict with MODEL_MARKER and 'value' raises ValueError.""" + malicious_dict = { + MODEL_MARKER: "some.module:FakeClass", + "value": {"data": "test"}, + } + + with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): + encode_checkpoint_value(malicious_dict) + + +def test_encode_dict_with_dataclass_marker_and_value_raises() -> None: + """Test that encoding a dict with DATACLASS_MARKER and 'value' raises ValueError.""" + malicious_dict = { + DATACLASS_MARKER: "some.module:FakeClass", + "value": {"field": "test"}, + } + + with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): + encode_checkpoint_value(malicious_dict) + + +def test_encode_nested_dict_with_reserved_keys_raises() -> None: + """Test that encoding nested dict with reserved keys raises ValueError.""" + nested_data = { + "outer": { + MODEL_MARKER: "some.module:FakeClass", + "value": {"data": "test"}, + } + } + + with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): + encode_checkpoint_value(nested_data) + + +def test_encode_allows_marker_without_value() -> None: + """Test that a dict with marker key but without 'value' key is allowed.""" + data = { + MODEL_MARKER: "some.module:SomeClass", + "other_key": "allowed", + } + result = encode_checkpoint_value(data) + assert MODEL_MARKER in result + assert result["other_key"] == "allowed" + + +def test_encode_allows_value_without_marker() -> None: + """Test that a dict with 'value' key but without marker is allowed.""" + data = { + "value": {"nested": "data"}, + "other_key": "allowed", + } + result = encode_checkpoint_value(data) + assert "value" in result + assert result["other_key"] == "allowed" + + +# --- Tests for max depth protection --- + + +def test_encode_deep_nesting_triggers_max_depth() -> None: + """Test that very deep nesting triggers max depth protection.""" + # Create a deeply nested structure (over 100 levels) + data: dict[str, Any] = {"level": 0} + current = data + for i in range(105): + current["nested"] = {"level": i + 1} + current = current["nested"] + + result = encode_checkpoint_value(data) + + # Navigate to find the max_depth sentinel + current_result = result + found_max_depth = False + for _ in range(110): + if isinstance(current_result, dict) and "nested" in current_result: + current_result = current_result["nested"] + if current_result == "": + found_max_depth = True + break + else: + break + + assert found_max_depth, "Expected sentinel to be found in deeply nested structure" + + +# --- Tests for mixed complex structures --- + + +def test_encode_complex_mixed_structure() -> None: + """Test encoding a complex structure with mixed types.""" + data = { + "string_value": "hello", + "int_value": 42, + "float_value": 3.14, + "bool_value": True, + "none_value": None, + "list_value": [1, 2, 3], + "nested_dict": {"a": 1, "b": 2}, + "dataclass_value": SimpleDataclass(name="test", value=100), + } + + result = encode_checkpoint_value(data) + + assert result["string_value"] == "hello" + assert result["int_value"] == 42 + assert result["float_value"] == 3.14 + assert result["bool_value"] is True + assert result["none_value"] is None + assert result["list_value"] == [1, 2, 3] + assert result["nested_dict"] == {"a": 1, "b": 2} + assert DATACLASS_MARKER in result["dataclass_value"] From 610dddeff85272a3e0060b6336175796012e2b22 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 21:57:46 +0000 Subject: [PATCH 6/9] Remove serialization-time reserved keyword validation to fix failing tests The serialization-time validation was too aggressive and blocked legitimate use cases where encoded data was being re-encoded. Security is now enforced only at deserialization time by validating that classes marked with DATACLASS_MARKER are actual dataclasses and classes marked with MODEL_MARKER actually support the model protocol. Co-authored-by: TaoChenOSU <12570346+TaoChenOSU@users.noreply.github.com> --- .../_workflows/_checkpoint_encoding.py | 10 ---- .../tests/workflow/test_checkpoint_decode.py | 56 +++++++++---------- .../tests/workflow/test_checkpoint_encode.py | 51 ++++++++++------- 3 files changed, 56 insertions(+), 61 deletions(-) diff --git a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py index d50270f152..0517ae46f8 100644 --- a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py +++ b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py @@ -92,16 +92,6 @@ def _enc(v: Any, stack: set[int], depth: int) -> Any: return _CYCLE_SENTINEL stack.add(oid) try: - # Check for reserved marker keys that could cause deserialization issues - has_model_marker = MODEL_MARKER in v_dict - has_dataclass_marker = DATACLASS_MARKER in v_dict - has_value_key = "value" in v_dict - if (has_model_marker or has_dataclass_marker) and has_value_key: - marker = MODEL_MARKER if has_model_marker else DATACLASS_MARKER - raise ValueError( - f"Cannot encode dict containing reserved checkpoint marker key '{marker}' " - f"with 'value' key. These keys are reserved for polymorphic serialization." - ) json_dict: dict[str, Any] = {} for k_any, val_any in v_dict.items(): # type: ignore[assignment] k_str: str = str(k_any) diff --git a/python/packages/core/tests/workflow/test_checkpoint_decode.py b/python/packages/core/tests/workflow/test_checkpoint_decode.py index 64b29e2fac..431c70cc3c 100644 --- a/python/packages/core/tests/workflow/test_checkpoint_decode.py +++ b/python/packages/core/tests/workflow/test_checkpoint_decode.py @@ -3,7 +3,6 @@ from dataclasses import dataclass # noqa: I001 from typing import Any, cast -import pytest from agent_framework._workflows._checkpoint_encoding import ( DATACLASS_MARKER, @@ -132,32 +131,8 @@ def test_encode_decode_nested_structures() -> None: assert response.original_request.request_id == "req-1" -def test_encode_raises_error_for_reserved_model_marker_with_value() -> None: - """Test that encoding a dict with MODEL_MARKER and 'value' keys raises an error.""" - malicious_dict = { - MODEL_MARKER: "some.module:FakeClass", - "value": {"data": "test"}, - "strategy": "to_dict", - } - - with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): - encode_checkpoint_value(malicious_dict) - - -def test_encode_raises_error_for_reserved_dataclass_marker_with_value() -> None: - """Test that encoding a dict with DATACLASS_MARKER and 'value' keys raises an error.""" - malicious_dict = { - DATACLASS_MARKER: "some.module:FakeClass", - "value": {"field1": "test"}, - } - - with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): - encode_checkpoint_value(malicious_dict) - - def test_encode_allows_marker_key_without_value_key() -> None: """Test that encoding a dict with only the marker key (no 'value') is allowed.""" - # This should not raise, as it doesn't match the marker pattern dict_with_marker_only = { MODEL_MARKER: "some.module:FakeClass", "other_key": "test", @@ -178,6 +153,22 @@ def test_encode_allows_value_key_without_marker_key() -> None: assert "other_key" in encoded +def test_encode_allows_marker_with_value_key() -> None: + """Test that encoding a dict with marker and 'value' keys is allowed. + + This is allowed because legitimate encoded data may contain these keys, + and security is enforced at deserialization time by validating class types. + """ + dict_with_both = { + MODEL_MARKER: "some.module:SomeClass", + "value": {"data": "test"}, + "strategy": "to_dict", + } + encoded = encode_checkpoint_value(dict_with_both) + assert MODEL_MARKER in encoded + assert "value" in encoded + + class NotADataclass: """A regular class that is not a dataclass.""" @@ -229,14 +220,19 @@ def test_decode_rejects_non_model_with_model_marker() -> None: assert decoded["value"] == "test_value" -def test_encode_raises_for_nested_dict_with_reserved_keys() -> None: - """Test that encoding fails for nested dicts containing reserved marker patterns.""" +def test_encode_allows_nested_dict_with_marker_keys() -> None: + """Test that encoding allows nested dicts containing marker patterns. + + Security is enforced at deserialization time, not serialization time, + so legitimate encoded data can contain markers at any nesting level. + """ nested_data = { "outer": { - MODEL_MARKER: "some.module:FakeClass", + MODEL_MARKER: "some.module:SomeClass", "value": {"data": "test"}, } } - with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): - encode_checkpoint_value(nested_data) + encoded = encode_checkpoint_value(nested_data) + assert "outer" in encoded + assert MODEL_MARKER in encoded["outer"] diff --git a/python/packages/core/tests/workflow/test_checkpoint_encode.py b/python/packages/core/tests/workflow/test_checkpoint_encode.py index 4c18ba4694..3f4db1f864 100644 --- a/python/packages/core/tests/workflow/test_checkpoint_encode.py +++ b/python/packages/core/tests/workflow/test_checkpoint_encode.py @@ -3,8 +3,6 @@ from dataclasses import dataclass from typing import Any -import pytest - from agent_framework._workflows._checkpoint_encoding import ( _CYCLE_SENTINEL, DATACLASS_MARKER, @@ -297,42 +295,53 @@ def test_encode_list_with_self_reference() -> None: assert result[2] == _CYCLE_SENTINEL -# --- Tests for reserved keyword validation --- +# --- Tests for reserved keyword handling --- +# Note: Security is enforced at deserialization time by validating class types, +# not at serialization time. This allows legitimate encoded data to be re-encoded. + +def test_encode_allows_dict_with_model_marker_and_value() -> None: + """Test that encoding a dict with MODEL_MARKER and 'value' is allowed. -def test_encode_dict_with_model_marker_and_value_raises() -> None: - """Test that encoding a dict with MODEL_MARKER and 'value' raises ValueError.""" - malicious_dict = { - MODEL_MARKER: "some.module:FakeClass", + Security is enforced at deserialization time, not serialization time. + """ + data = { + MODEL_MARKER: "some.module:SomeClass", "value": {"data": "test"}, } + result = encode_checkpoint_value(data) + assert MODEL_MARKER in result + assert "value" in result - with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): - encode_checkpoint_value(malicious_dict) +def test_encode_allows_dict_with_dataclass_marker_and_value() -> None: + """Test that encoding a dict with DATACLASS_MARKER and 'value' is allowed. -def test_encode_dict_with_dataclass_marker_and_value_raises() -> None: - """Test that encoding a dict with DATACLASS_MARKER and 'value' raises ValueError.""" - malicious_dict = { - DATACLASS_MARKER: "some.module:FakeClass", + Security is enforced at deserialization time, not serialization time. + """ + data = { + DATACLASS_MARKER: "some.module:SomeClass", "value": {"field": "test"}, } + result = encode_checkpoint_value(data) + assert DATACLASS_MARKER in result + assert "value" in result - with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): - encode_checkpoint_value(malicious_dict) +def test_encode_allows_nested_dict_with_marker_keys() -> None: + """Test that encoding nested dict with marker keys is allowed. -def test_encode_nested_dict_with_reserved_keys_raises() -> None: - """Test that encoding nested dict with reserved keys raises ValueError.""" + Security is enforced at deserialization time, not serialization time. + """ nested_data = { "outer": { - MODEL_MARKER: "some.module:FakeClass", + MODEL_MARKER: "some.module:SomeClass", "value": {"data": "test"}, } } - - with pytest.raises(ValueError, match="Cannot encode dict containing reserved checkpoint marker key"): - encode_checkpoint_value(nested_data) + result = encode_checkpoint_value(nested_data) + assert "outer" in result + assert MODEL_MARKER in result["outer"] def test_encode_allows_marker_without_value() -> None: From 64446f98935b7b2f2df07b4bff3011372a5eacd5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 22:24:08 +0000 Subject: [PATCH 7/9] Apply ruff formatting to checkpoint encoding file Co-authored-by: TaoChenOSU <12570346+TaoChenOSU@users.noreply.github.com> --- .../agent_framework/_workflows/_checkpoint_encoding.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py index 0517ae46f8..516a4547a0 100644 --- a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py +++ b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py @@ -148,9 +148,7 @@ def decode_checkpoint_value(value: Any) -> Any: if cls is not None: # Verify the class actually supports the model protocol if not _class_supports_model_protocol(cls): - logger.debug( - f"Class {type_key} does not support model protocol; returning raw value" - ) + logger.debug(f"Class {type_key} does not support model protocol; returning raw value") return decoded_payload if strategy == "to_dict" and hasattr(cls, "from_dict"): with contextlib.suppress(Exception): @@ -177,9 +175,7 @@ def decode_checkpoint_value(value: Any) -> Any: cls_dc: Any = getattr(module, class_name) # Verify the class is actually a dataclass type (not an instance) if not isinstance(cls_dc, type) or not is_dataclass(cls_dc): - logger.debug( - f"Class {type_key_dc} is not a dataclass type; returning raw value" - ) + logger.debug(f"Class {type_key_dc} is not a dataclass type; returning raw value") return decoded_raw constructed = _instantiate_checkpoint_dataclass(cls_dc, decoded_raw) if constructed is not None: From f515b880dc4dd5960202f36319ceed51b9a85bc2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 21 Jan 2026 23:18:37 +0000 Subject: [PATCH 8/9] Changes before error encountered Co-authored-by: TaoChenOSU <12570346+TaoChenOSU@users.noreply.github.com> --- .../_workflows/_checkpoint_encoding.py | 42 +++++++++- .../tests/workflow/test_checkpoint_decode.py | 23 ++++-- .../tests/workflow/test_checkpoint_encode.py | 81 ++++++++++++++----- 3 files changed, 114 insertions(+), 32 deletions(-) diff --git a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py index 516a4547a0..00920d25ed 100644 --- a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py +++ b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py @@ -10,6 +10,7 @@ # Checkpoint serialization helpers MODEL_MARKER = "__af_model__" DATACLASS_MARKER = "__af_dataclass__" +PRESERVED_MARKER = "__af_preserved__" # Guards to prevent runaway recursion while encoding arbitrary user data _MAX_ENCODE_DEPTH = 100 @@ -92,9 +93,22 @@ def _enc(v: Any, stack: set[int], depth: int) -> Any: return _CYCLE_SENTINEL stack.add(oid) try: - json_dict: dict[str, Any] = {} + # Check if this dict looks like a marker pattern (has marker key + "value" key) + # If so, preserve it to prevent confusion during deserialization + has_model_marker = MODEL_MARKER in v_dict + has_dataclass_marker = DATACLASS_MARKER in v_dict + has_value_key = "value" in v_dict + if (has_model_marker or has_dataclass_marker) and has_value_key: + # This is user data that looks like a marker pattern - preserve it + json_dict: dict[str, Any] = {} + for k_any, val_any in v_dict.items(): # type: ignore[assignment] + k_str: str = str(k_any) + json_dict[k_str] = _enc(val_any, stack, depth + 1) + return {PRESERVED_MARKER: True, "value": json_dict} + + json_dict = {} for k_any, val_any in v_dict.items(): # type: ignore[assignment] - k_str: str = str(k_any) + k_str = str(k_any) json_dict[k_str] = _enc(val_any, stack, depth + 1) return json_dict finally: @@ -132,6 +146,13 @@ def decode_checkpoint_value(value: Any) -> Any: """Recursively decode values previously encoded by encode_checkpoint_value.""" if isinstance(value, dict): value_dict = cast(dict[str, Any], value) # encoded form always uses string keys + + # Handle preserved marker first - this was user data that looked like a marker pattern + if PRESERVED_MARKER in value_dict and "value" in value_dict: + preserved_value = value_dict.get("value") + # Decode as a regular dict without marker interpretation + return _decode_as_regular_dict(preserved_value) + # Structured model marker handling if MODEL_MARKER in value_dict and "value" in value_dict: type_key: str | None = value_dict.get(MODEL_MARKER) # type: ignore[assignment] @@ -196,6 +217,23 @@ def decode_checkpoint_value(value: Any) -> Any: return value +def _decode_as_regular_dict(value: Any) -> Any: + """Decode value as a regular dict/list without marker interpretation. + + Used to recover preserved user data that looked like marker patterns. + """ + if isinstance(value, dict): + value_dict = cast(dict[str, Any], value) + decoded: dict[str, Any] = {} + for k_any, v_any in value_dict.items(): + decoded[k_any] = _decode_as_regular_dict(v_any) + return decoded + if isinstance(value, list): + value_list: list[Any] = value # type: ignore[assignment] + return [_decode_as_regular_dict(v_any) for v_any in value_list] + return value + + def _class_supports_model_protocol(cls: type[Any]) -> bool: """Check if a class type supports the model serialization protocol. diff --git a/python/packages/core/tests/workflow/test_checkpoint_decode.py b/python/packages/core/tests/workflow/test_checkpoint_decode.py index 431c70cc3c..224bf74ec8 100644 --- a/python/packages/core/tests/workflow/test_checkpoint_decode.py +++ b/python/packages/core/tests/workflow/test_checkpoint_decode.py @@ -7,6 +7,7 @@ from agent_framework._workflows._checkpoint_encoding import ( DATACLASS_MARKER, MODEL_MARKER, + PRESERVED_MARKER, decode_checkpoint_value, encode_checkpoint_value, ) @@ -154,10 +155,10 @@ def test_encode_allows_value_key_without_marker_key() -> None: def test_encode_allows_marker_with_value_key() -> None: - """Test that encoding a dict with marker and 'value' keys is allowed. + """Test that encoding a dict with marker and 'value' keys wraps it in preservation envelope. - This is allowed because legitimate encoded data may contain these keys, - and security is enforced at deserialization time by validating class types. + User data that looks like a marker pattern is preserved to prevent confusion + during deserialization. """ dict_with_both = { MODEL_MARKER: "some.module:SomeClass", @@ -165,8 +166,12 @@ def test_encode_allows_marker_with_value_key() -> None: "strategy": "to_dict", } encoded = encode_checkpoint_value(dict_with_both) - assert MODEL_MARKER in encoded + # Should be wrapped in preservation envelope + assert PRESERVED_MARKER in encoded + assert encoded[PRESERVED_MARKER] is True assert "value" in encoded + # Original dict is inside the envelope + assert MODEL_MARKER in encoded["value"] class NotADataclass: @@ -221,10 +226,10 @@ def test_decode_rejects_non_model_with_model_marker() -> None: def test_encode_allows_nested_dict_with_marker_keys() -> None: - """Test that encoding allows nested dicts containing marker patterns. + """Test that encoding wraps nested dicts containing marker patterns in preservation envelope. - Security is enforced at deserialization time, not serialization time, - so legitimate encoded data can contain markers at any nesting level. + User data that looks like marker patterns is preserved at any nesting level + to prevent confusion during deserialization. """ nested_data = { "outer": { @@ -235,4 +240,6 @@ def test_encode_allows_nested_dict_with_marker_keys() -> None: encoded = encode_checkpoint_value(nested_data) assert "outer" in encoded - assert MODEL_MARKER in encoded["outer"] + # Nested dict should be wrapped in preservation envelope + assert PRESERVED_MARKER in encoded["outer"] + assert encoded["outer"][PRESERVED_MARKER] is True diff --git a/python/packages/core/tests/workflow/test_checkpoint_encode.py b/python/packages/core/tests/workflow/test_checkpoint_encode.py index 3f4db1f864..7f2d4bfb9e 100644 --- a/python/packages/core/tests/workflow/test_checkpoint_encode.py +++ b/python/packages/core/tests/workflow/test_checkpoint_encode.py @@ -7,6 +7,8 @@ _CYCLE_SENTINEL, DATACLASS_MARKER, MODEL_MARKER, + PRESERVED_MARKER, + decode_checkpoint_value, encode_checkpoint_value, ) @@ -296,43 +298,44 @@ def test_encode_list_with_self_reference() -> None: # --- Tests for reserved keyword handling --- -# Note: Security is enforced at deserialization time by validating class types, -# not at serialization time. This allows legitimate encoded data to be re-encoded. +# User data containing marker keys + "value" is preserved in a special envelope +# during serialization and recovered during deserialization. -def test_encode_allows_dict_with_model_marker_and_value() -> None: - """Test that encoding a dict with MODEL_MARKER and 'value' is allowed. - - Security is enforced at deserialization time, not serialization time. - """ +def test_encode_preserves_dict_with_model_marker_and_value() -> None: + """Test that user dict with MODEL_MARKER and 'value' is preserved in envelope.""" data = { MODEL_MARKER: "some.module:SomeClass", "value": {"data": "test"}, } result = encode_checkpoint_value(data) - assert MODEL_MARKER in result + # Should be wrapped in preservation envelope + assert PRESERVED_MARKER in result + assert result[PRESERVED_MARKER] is True assert "value" in result + # The inner value should contain the original dict + assert MODEL_MARKER in result["value"] + assert result["value"]["value"] == {"data": "test"} -def test_encode_allows_dict_with_dataclass_marker_and_value() -> None: - """Test that encoding a dict with DATACLASS_MARKER and 'value' is allowed. - - Security is enforced at deserialization time, not serialization time. - """ +def test_encode_preserves_dict_with_dataclass_marker_and_value() -> None: + """Test that user dict with DATACLASS_MARKER and 'value' is preserved in envelope.""" data = { DATACLASS_MARKER: "some.module:SomeClass", "value": {"field": "test"}, } result = encode_checkpoint_value(data) - assert DATACLASS_MARKER in result + # Should be wrapped in preservation envelope + assert PRESERVED_MARKER in result + assert result[PRESERVED_MARKER] is True assert "value" in result + # The inner value should contain the original dict + assert DATACLASS_MARKER in result["value"] + assert result["value"]["value"] == {"field": "test"} -def test_encode_allows_nested_dict_with_marker_keys() -> None: - """Test that encoding nested dict with marker keys is allowed. - - Security is enforced at deserialization time, not serialization time. - """ +def test_encode_preserves_nested_dict_with_marker_keys() -> None: + """Test that nested dict with marker keys is preserved in envelope.""" nested_data = { "outer": { MODEL_MARKER: "some.module:SomeClass", @@ -341,27 +344,61 @@ def test_encode_allows_nested_dict_with_marker_keys() -> None: } result = encode_checkpoint_value(nested_data) assert "outer" in result - assert MODEL_MARKER in result["outer"] + # The nested dict should be wrapped in preservation envelope + assert PRESERVED_MARKER in result["outer"] + assert result["outer"][PRESERVED_MARKER] is True + + +def test_decode_recovers_preserved_dict_with_model_marker() -> None: + """Test that preserved dict with MODEL_MARKER is recovered correctly.""" + original_data = { + MODEL_MARKER: "some.module:SomeClass", + "value": {"data": "test"}, + } + encoded = encode_checkpoint_value(original_data) + decoded = decode_checkpoint_value(encoded) + # Should recover the original dict structure + assert MODEL_MARKER in decoded + assert decoded[MODEL_MARKER] == "some.module:SomeClass" + assert decoded["value"] == {"data": "test"} + + +def test_decode_recovers_preserved_dict_with_dataclass_marker() -> None: + """Test that preserved dict with DATACLASS_MARKER is recovered correctly.""" + original_data = { + DATACLASS_MARKER: "some.module:SomeClass", + "value": {"field": "test"}, + } + encoded = encode_checkpoint_value(original_data) + decoded = decode_checkpoint_value(encoded) + # Should recover the original dict structure + assert DATACLASS_MARKER in decoded + assert decoded[DATACLASS_MARKER] == "some.module:SomeClass" + assert decoded["value"] == {"field": "test"} def test_encode_allows_marker_without_value() -> None: - """Test that a dict with marker key but without 'value' key is allowed.""" + """Test that a dict with marker key but without 'value' key is NOT preserved.""" data = { MODEL_MARKER: "some.module:SomeClass", "other_key": "allowed", } result = encode_checkpoint_value(data) + # Should NOT be wrapped (no "value" key present) + assert PRESERVED_MARKER not in result assert MODEL_MARKER in result assert result["other_key"] == "allowed" def test_encode_allows_value_without_marker() -> None: - """Test that a dict with 'value' key but without marker is allowed.""" + """Test that a dict with 'value' key but without marker is NOT preserved.""" data = { "value": {"nested": "data"}, "other_key": "allowed", } result = encode_checkpoint_value(data) + # Should NOT be wrapped (no marker key present) + assert PRESERVED_MARKER not in result assert "value" in result assert result["other_key"] == "allowed" From b28b6f9f279fedc99c30916907a8dff23ce60209 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Wed, 21 Jan 2026 16:54:29 -0800 Subject: [PATCH 9/9] Revert "Changes before error encountered" This reverts commit f515b880dc4dd5960202f36319ceed51b9a85bc2. --- .../_workflows/_checkpoint_encoding.py | 42 +--------- .../tests/workflow/test_checkpoint_decode.py | 23 ++---- .../tests/workflow/test_checkpoint_encode.py | 81 +++++-------------- 3 files changed, 32 insertions(+), 114 deletions(-) diff --git a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py index 00920d25ed..516a4547a0 100644 --- a/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py +++ b/python/packages/core/agent_framework/_workflows/_checkpoint_encoding.py @@ -10,7 +10,6 @@ # Checkpoint serialization helpers MODEL_MARKER = "__af_model__" DATACLASS_MARKER = "__af_dataclass__" -PRESERVED_MARKER = "__af_preserved__" # Guards to prevent runaway recursion while encoding arbitrary user data _MAX_ENCODE_DEPTH = 100 @@ -93,22 +92,9 @@ def _enc(v: Any, stack: set[int], depth: int) -> Any: return _CYCLE_SENTINEL stack.add(oid) try: - # Check if this dict looks like a marker pattern (has marker key + "value" key) - # If so, preserve it to prevent confusion during deserialization - has_model_marker = MODEL_MARKER in v_dict - has_dataclass_marker = DATACLASS_MARKER in v_dict - has_value_key = "value" in v_dict - if (has_model_marker or has_dataclass_marker) and has_value_key: - # This is user data that looks like a marker pattern - preserve it - json_dict: dict[str, Any] = {} - for k_any, val_any in v_dict.items(): # type: ignore[assignment] - k_str: str = str(k_any) - json_dict[k_str] = _enc(val_any, stack, depth + 1) - return {PRESERVED_MARKER: True, "value": json_dict} - - json_dict = {} + json_dict: dict[str, Any] = {} for k_any, val_any in v_dict.items(): # type: ignore[assignment] - k_str = str(k_any) + k_str: str = str(k_any) json_dict[k_str] = _enc(val_any, stack, depth + 1) return json_dict finally: @@ -146,13 +132,6 @@ def decode_checkpoint_value(value: Any) -> Any: """Recursively decode values previously encoded by encode_checkpoint_value.""" if isinstance(value, dict): value_dict = cast(dict[str, Any], value) # encoded form always uses string keys - - # Handle preserved marker first - this was user data that looked like a marker pattern - if PRESERVED_MARKER in value_dict and "value" in value_dict: - preserved_value = value_dict.get("value") - # Decode as a regular dict without marker interpretation - return _decode_as_regular_dict(preserved_value) - # Structured model marker handling if MODEL_MARKER in value_dict and "value" in value_dict: type_key: str | None = value_dict.get(MODEL_MARKER) # type: ignore[assignment] @@ -217,23 +196,6 @@ def decode_checkpoint_value(value: Any) -> Any: return value -def _decode_as_regular_dict(value: Any) -> Any: - """Decode value as a regular dict/list without marker interpretation. - - Used to recover preserved user data that looked like marker patterns. - """ - if isinstance(value, dict): - value_dict = cast(dict[str, Any], value) - decoded: dict[str, Any] = {} - for k_any, v_any in value_dict.items(): - decoded[k_any] = _decode_as_regular_dict(v_any) - return decoded - if isinstance(value, list): - value_list: list[Any] = value # type: ignore[assignment] - return [_decode_as_regular_dict(v_any) for v_any in value_list] - return value - - def _class_supports_model_protocol(cls: type[Any]) -> bool: """Check if a class type supports the model serialization protocol. diff --git a/python/packages/core/tests/workflow/test_checkpoint_decode.py b/python/packages/core/tests/workflow/test_checkpoint_decode.py index 224bf74ec8..431c70cc3c 100644 --- a/python/packages/core/tests/workflow/test_checkpoint_decode.py +++ b/python/packages/core/tests/workflow/test_checkpoint_decode.py @@ -7,7 +7,6 @@ from agent_framework._workflows._checkpoint_encoding import ( DATACLASS_MARKER, MODEL_MARKER, - PRESERVED_MARKER, decode_checkpoint_value, encode_checkpoint_value, ) @@ -155,10 +154,10 @@ def test_encode_allows_value_key_without_marker_key() -> None: def test_encode_allows_marker_with_value_key() -> None: - """Test that encoding a dict with marker and 'value' keys wraps it in preservation envelope. + """Test that encoding a dict with marker and 'value' keys is allowed. - User data that looks like a marker pattern is preserved to prevent confusion - during deserialization. + This is allowed because legitimate encoded data may contain these keys, + and security is enforced at deserialization time by validating class types. """ dict_with_both = { MODEL_MARKER: "some.module:SomeClass", @@ -166,12 +165,8 @@ def test_encode_allows_marker_with_value_key() -> None: "strategy": "to_dict", } encoded = encode_checkpoint_value(dict_with_both) - # Should be wrapped in preservation envelope - assert PRESERVED_MARKER in encoded - assert encoded[PRESERVED_MARKER] is True + assert MODEL_MARKER in encoded assert "value" in encoded - # Original dict is inside the envelope - assert MODEL_MARKER in encoded["value"] class NotADataclass: @@ -226,10 +221,10 @@ def test_decode_rejects_non_model_with_model_marker() -> None: def test_encode_allows_nested_dict_with_marker_keys() -> None: - """Test that encoding wraps nested dicts containing marker patterns in preservation envelope. + """Test that encoding allows nested dicts containing marker patterns. - User data that looks like marker patterns is preserved at any nesting level - to prevent confusion during deserialization. + Security is enforced at deserialization time, not serialization time, + so legitimate encoded data can contain markers at any nesting level. """ nested_data = { "outer": { @@ -240,6 +235,4 @@ def test_encode_allows_nested_dict_with_marker_keys() -> None: encoded = encode_checkpoint_value(nested_data) assert "outer" in encoded - # Nested dict should be wrapped in preservation envelope - assert PRESERVED_MARKER in encoded["outer"] - assert encoded["outer"][PRESERVED_MARKER] is True + assert MODEL_MARKER in encoded["outer"] diff --git a/python/packages/core/tests/workflow/test_checkpoint_encode.py b/python/packages/core/tests/workflow/test_checkpoint_encode.py index 7f2d4bfb9e..3f4db1f864 100644 --- a/python/packages/core/tests/workflow/test_checkpoint_encode.py +++ b/python/packages/core/tests/workflow/test_checkpoint_encode.py @@ -7,8 +7,6 @@ _CYCLE_SENTINEL, DATACLASS_MARKER, MODEL_MARKER, - PRESERVED_MARKER, - decode_checkpoint_value, encode_checkpoint_value, ) @@ -298,44 +296,43 @@ def test_encode_list_with_self_reference() -> None: # --- Tests for reserved keyword handling --- -# User data containing marker keys + "value" is preserved in a special envelope -# during serialization and recovered during deserialization. +# Note: Security is enforced at deserialization time by validating class types, +# not at serialization time. This allows legitimate encoded data to be re-encoded. -def test_encode_preserves_dict_with_model_marker_and_value() -> None: - """Test that user dict with MODEL_MARKER and 'value' is preserved in envelope.""" +def test_encode_allows_dict_with_model_marker_and_value() -> None: + """Test that encoding a dict with MODEL_MARKER and 'value' is allowed. + + Security is enforced at deserialization time, not serialization time. + """ data = { MODEL_MARKER: "some.module:SomeClass", "value": {"data": "test"}, } result = encode_checkpoint_value(data) - # Should be wrapped in preservation envelope - assert PRESERVED_MARKER in result - assert result[PRESERVED_MARKER] is True + assert MODEL_MARKER in result assert "value" in result - # The inner value should contain the original dict - assert MODEL_MARKER in result["value"] - assert result["value"]["value"] == {"data": "test"} -def test_encode_preserves_dict_with_dataclass_marker_and_value() -> None: - """Test that user dict with DATACLASS_MARKER and 'value' is preserved in envelope.""" +def test_encode_allows_dict_with_dataclass_marker_and_value() -> None: + """Test that encoding a dict with DATACLASS_MARKER and 'value' is allowed. + + Security is enforced at deserialization time, not serialization time. + """ data = { DATACLASS_MARKER: "some.module:SomeClass", "value": {"field": "test"}, } result = encode_checkpoint_value(data) - # Should be wrapped in preservation envelope - assert PRESERVED_MARKER in result - assert result[PRESERVED_MARKER] is True + assert DATACLASS_MARKER in result assert "value" in result - # The inner value should contain the original dict - assert DATACLASS_MARKER in result["value"] - assert result["value"]["value"] == {"field": "test"} -def test_encode_preserves_nested_dict_with_marker_keys() -> None: - """Test that nested dict with marker keys is preserved in envelope.""" +def test_encode_allows_nested_dict_with_marker_keys() -> None: + """Test that encoding nested dict with marker keys is allowed. + + Security is enforced at deserialization time, not serialization time. + """ nested_data = { "outer": { MODEL_MARKER: "some.module:SomeClass", @@ -344,61 +341,27 @@ def test_encode_preserves_nested_dict_with_marker_keys() -> None: } result = encode_checkpoint_value(nested_data) assert "outer" in result - # The nested dict should be wrapped in preservation envelope - assert PRESERVED_MARKER in result["outer"] - assert result["outer"][PRESERVED_MARKER] is True - - -def test_decode_recovers_preserved_dict_with_model_marker() -> None: - """Test that preserved dict with MODEL_MARKER is recovered correctly.""" - original_data = { - MODEL_MARKER: "some.module:SomeClass", - "value": {"data": "test"}, - } - encoded = encode_checkpoint_value(original_data) - decoded = decode_checkpoint_value(encoded) - # Should recover the original dict structure - assert MODEL_MARKER in decoded - assert decoded[MODEL_MARKER] == "some.module:SomeClass" - assert decoded["value"] == {"data": "test"} - - -def test_decode_recovers_preserved_dict_with_dataclass_marker() -> None: - """Test that preserved dict with DATACLASS_MARKER is recovered correctly.""" - original_data = { - DATACLASS_MARKER: "some.module:SomeClass", - "value": {"field": "test"}, - } - encoded = encode_checkpoint_value(original_data) - decoded = decode_checkpoint_value(encoded) - # Should recover the original dict structure - assert DATACLASS_MARKER in decoded - assert decoded[DATACLASS_MARKER] == "some.module:SomeClass" - assert decoded["value"] == {"field": "test"} + assert MODEL_MARKER in result["outer"] def test_encode_allows_marker_without_value() -> None: - """Test that a dict with marker key but without 'value' key is NOT preserved.""" + """Test that a dict with marker key but without 'value' key is allowed.""" data = { MODEL_MARKER: "some.module:SomeClass", "other_key": "allowed", } result = encode_checkpoint_value(data) - # Should NOT be wrapped (no "value" key present) - assert PRESERVED_MARKER not in result assert MODEL_MARKER in result assert result["other_key"] == "allowed" def test_encode_allows_value_without_marker() -> None: - """Test that a dict with 'value' key but without marker is NOT preserved.""" + """Test that a dict with 'value' key but without marker is allowed.""" data = { "value": {"nested": "data"}, "other_key": "allowed", } result = encode_checkpoint_value(data) - # Should NOT be wrapped (no marker key present) - assert PRESERVED_MARKER not in result assert "value" in result assert result["other_key"] == "allowed"