From c9646bbeb55e0e38f50c5efc0e345d15dd94fca0 Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Mon, 6 Oct 2025 10:43:10 +0200
Subject: [PATCH 01/10] feature: add evaluator metadata support to V2 API for
 custom data displa

---
 autoblocks/_impl/testing/v2/api.py         | 2 ++
 autoblocks/_impl/testing/v2/run_manager.py | 9 ++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/autoblocks/_impl/testing/v2/api.py b/autoblocks/_impl/testing/v2/api.py
index 27ab8350..2797760a 100644
--- a/autoblocks/_impl/testing/v2/api.py
+++ b/autoblocks/_impl/testing/v2/api.py
@@ -156,6 +156,7 @@ async def send_create_result(
     evaluator_id_to_result: dict[str, bool],
     evaluator_id_to_reason: dict[str, str],
     evaluator_id_to_score: dict[str, float],
+    evaluator_id_to_metadata: dict[str, dict],
     run_message: Optional[str] = None,
 ) -> Response:
     return await post_to_api(
@@ -175,5 +176,6 @@ async def send_create_result(
             evaluatorIdToResult=evaluator_id_to_result,
             evaluatorIdToReason=evaluator_id_to_reason,
             evaluatorIdToScore=evaluator_id_to_score,
+            evaluatorIdToMetadata=evaluator_id_to_metadata,
         ),
     )
diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index 04011a0b..1f69b4fa 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -99,10 +99,11 @@ async def _compute_evaluations(
     @staticmethod
     def _evaluations_to_maps(
         evals: List[EvaluationWithId],
-    ) -> tuple[dict[str, bool], dict[str, str], dict[str, float]]:
+    ) -> tuple[dict[str, bool], dict[str, str], dict[str, float], dict[str, dict]]:
         id_to_result: dict[str, bool] = {}
         id_to_reason: dict[str, str] = {}
         id_to_score: dict[str, float] = {}
+        id_to_metadata: dict[str, dict] = {}
 
         for e in evals:
             # Reconstruct Evaluation to compute passed()
@@ -137,7 +138,8 @@ def _evaluations_to_maps(
 
             id_to_reason[e.id] = reason
             id_to_score[e.id] = e.score
-        return id_to_result, id_to_reason, id_to_score
+            id_to_metadata[e.id] = e.metadata or {}
+        return id_to_result, id_to_reason, id_to_score, id_to_metadata
 
     async def async_add_result(
         self,
@@ -162,7 +164,7 @@ async def async_add_result(
             output=output,
             evaluators=evaluators or [],
         )
-        eval_result_map, eval_reason_map, eval_score_map = self._evaluations_to_maps(evals)
+        eval_result_map, eval_reason_map, eval_score_map, eval_metadata_map = self._evaluations_to_maps(evals)
 
         # Serialize input/output with standard json to match expected formatting in tests
         input_raw = json.dumps(serialize_test_case(test_case))
@@ -186,6 +188,7 @@ async def async_add_result(
             evaluator_id_to_result=eval_result_map,
             evaluator_id_to_reason=eval_reason_map,
             evaluator_id_to_score=eval_score_map,
+            evaluator_id_to_metadata=eval_metadata_map,
             run_message=self.run_message,
         )
         data = resp.json()

From b641144fadbd57f322763ff9aa2538074c9c9f99 Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Mon, 6 Oct 2025 10:56:54 +0200
Subject: [PATCH 02/10] fix: Missing type parameters

---
 autoblocks/_impl/testing/v2/api.py         | 2 +-
 autoblocks/_impl/testing/v2/run_manager.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/autoblocks/_impl/testing/v2/api.py b/autoblocks/_impl/testing/v2/api.py
index 2797760a..5c6e7ab0 100644
--- a/autoblocks/_impl/testing/v2/api.py
+++ b/autoblocks/_impl/testing/v2/api.py
@@ -156,7 +156,7 @@ async def send_create_result(
     evaluator_id_to_result: dict[str, bool],
     evaluator_id_to_reason: dict[str, str],
     evaluator_id_to_score: dict[str, float],
-    evaluator_id_to_metadata: dict[str, dict],
+    evaluator_id_to_metadata: dict[str, dict[str, Any]],
     run_message: Optional[str] = None,
 ) -> Response:
     return await post_to_api(
diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index 1f69b4fa..eeb6bc27 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -99,11 +99,11 @@ async def _compute_evaluations(
     @staticmethod
     def _evaluations_to_maps(
         evals: List[EvaluationWithId],
-    ) -> tuple[dict[str, bool], dict[str, str], dict[str, float], dict[str, dict]]:
+    ) -> tuple[dict[str, bool], dict[str, str], dict[str, float], dict[str, dict[str, Any]]]:
         id_to_result: dict[str, bool] = {}
         id_to_reason: dict[str, str] = {}
         id_to_score: dict[str, float] = {}
-        id_to_metadata: dict[str, dict] = {}
+        id_to_metadata: dict[str, dict[str, Any]] = {}
 
         for e in evals:
             # Reconstruct Evaluation to compute passed()

From e7222fdbc1b899b8838ab844dabdbd9c77003090 Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Mon, 6 Oct 2025 11:03:07 +0200
Subject: [PATCH 03/10] update: tests to include v2

---
 tests/autoblocks/test_run_manager_v2.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/autoblocks/test_run_manager_v2.py b/tests/autoblocks/test_run_manager_v2.py
index 1ba9e311..4e61717c 100644
--- a/tests/autoblocks/test_run_manager_v2.py
+++ b/tests/autoblocks/test_run_manager_v2.py
@@ -94,6 +94,7 @@ def test_full_lifecycle_v2(httpx_mock):
             evaluatorIdToResult={"evaluator-external-id": True},
             evaluatorIdToReason={"evaluator-external-id": "ok"},
             evaluatorIdToScore={"evaluator-external-id": 1},
+            evaluatorIdToMetadata={"evaluator-external-id": {"reason": "ok"}},
         ),
         json=dict(executionId="mock-exec-id"),
     )
@@ -180,6 +181,7 @@ def test_add_result_without_evaluators_sends_empty_maps(httpx_mock):
             evaluatorIdToResult={},
             evaluatorIdToReason={},
             evaluatorIdToScore={},
+            evaluatorIdToMetadata={},
         ),
         json=dict(executionId="exec-1"),
     )

From 8f60f1092d8bd6e3c3b6996acfb01fe9f4d0a898 Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Tue, 7 Oct 2025 14:00:24 +0200
Subject: [PATCH 04/10] test

---
 autoblocks/_impl/testing/v2/run_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index eeb6bc27..f76ccf75 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -263,4 +263,4 @@ def create_human_review(
                 rubric_id=rubric_id,
             ),
             global_state.event_loop(),
-        ).result()
+        ).result()
\ No newline at end of file

From db787059fd9b3f5f6683b19a6b42329fd3270e24 Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Tue, 7 Oct 2025 14:03:52 +0200
Subject: [PATCH 05/10] pre commit fix newline

---
 autoblocks/_impl/testing/v2/run_manager.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index f76ccf75..544de892 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -263,4 +263,5 @@ def create_human_review(
                 rubric_id=rubric_id,
             ),
             global_state.event_loop(),
-        ).result()
\ No newline at end of file
+        ).result()
+        
\ No newline at end of file

From 5c801573b55d025e9bc76c5140a02b00e8c3240b Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Thu, 9 Oct 2025 11:45:33 +0200
Subject: [PATCH 06/10] fix pre-commit checks

---
 autoblocks/_impl/testing/v2/run_manager.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index 544de892..eeb6bc27 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -264,4 +264,3 @@ def create_human_review(
             ),
             global_state.event_loop(),
         ).result()
-        
\ No newline at end of file

From c42985f293ee9ae166d3595e2b42116bef0571c7 Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Thu, 9 Oct 2025 11:51:27 +0200
Subject: [PATCH 07/10] test

---
 autoblocks/_impl/testing/v2/run_manager.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index eeb6bc27..5f0e7b0f 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -264,3 +264,4 @@ def create_human_review(
             ),
             global_state.event_loop(),
         ).result()
+

From e3be8e2d15c2c1ebb8e847d3bf503e255b5c021f Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Thu, 9 Oct 2025 11:57:10 +0200
Subject: [PATCH 08/10] test

---
 autoblocks/_impl/testing/v2/run_manager.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index 5f0e7b0f..eeb6bc27 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -264,4 +264,3 @@ def create_human_review(
             ),
             global_state.event_loop(),
         ).result()
-

From 4640ffaa8144b3ed7291ed3ef9185c6ff9312b63 Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Thu, 9 Oct 2025 12:00:25 +0200
Subject: [PATCH 09/10] test

---
 autoblocks/_impl/testing/v2/run_manager.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index eeb6bc27..5f0e7b0f 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -264,3 +264,4 @@ def create_human_review(
             ),
             global_state.event_loop(),
         ).result()
+

From 5a16e22a8964e6f6b4ae428a448b922560c01f18 Mon Sep 17 00:00:00 2001
From: uros-ivetic <uros@autoblocks.ai>
Date: Thu, 9 Oct 2025 12:10:51 +0200
Subject: [PATCH 10/10] test

---
 autoblocks/_impl/testing/v2/run_manager.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autoblocks/_impl/testing/v2/run_manager.py b/autoblocks/_impl/testing/v2/run_manager.py
index 5f0e7b0f..eeb6bc27 100644
--- a/autoblocks/_impl/testing/v2/run_manager.py
+++ b/autoblocks/_impl/testing/v2/run_manager.py
@@ -264,4 +264,3 @@ def create_human_review(
             ),
             global_state.event_loop(),
         ).result()
-