From 797a14d1cd015a2ee0b47641cd1674b06bf1069b Mon Sep 17 00:00:00 2001 From: Rakhman Asmatullayev Date: Fri, 13 Feb 2026 14:39:15 +0000 Subject: [PATCH 1/4] start evaluation span in child process upon task execution --- benchmarks/utils/evaluation.py | 51 +++++++++++++++++++------- benchmarks/utils/laminar.py | 61 +++++++++++++++++++------------ pyproject.toml | 2 +- uv.lock | 65 ++++++++++++++++++---------------- 4 files changed, 112 insertions(+), 67 deletions(-) diff --git a/benchmarks/utils/evaluation.py b/benchmarks/utils/evaluation.py index 32177e38..bda531fb 100644 --- a/benchmarks/utils/evaluation.py +++ b/benchmarks/utils/evaluation.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Callable, List, Optional, Tuple +from typing import Any, Callable, List, Optional, Tuple from uuid import UUID from lmnr import Laminar @@ -369,19 +369,20 @@ def attempt_on_result(instance: EvalInstance, out: EvalOutput) -> None: pending_instances: dict[Future, PendingInstance] = {} try: for index, inst in enumerate(instances_to_process): - datapoint_id, lmnr_span_ctx = ( - LaminarService.get().create_evaluation_datapoint( - self.metadata.lmnr.eval_id, - inst.id, - self.metadata.model_dump(mode="json"), - index, - session_id=self._laminar_session_id, - trace_metadata=self._laminar_trace_meta, - ) + datapoint_id = LaminarService.get().create_evaluation_datapoint( + self.metadata.lmnr.eval_id, + inst.id, + self.metadata.model_dump(mode="json"), + index, ) fut = pool.submit( - self._process_one_mp, inst, lmnr_span_ctx, attempt + self._process_one_mp, + inst, + attempt, + lmnr_session_id=self._laminar_session_id, + lmnr_trace_metadata=self._laminar_trace_meta, + lmnr_datapoint_id=datapoint_id, ) futures.append(fut) pending_instances[fut] = PendingInstance( @@ -557,7 +558,12 @@ def _calculate_resource_factor(self, runtime_failure_count: int) -> int: # --- Worker-side method (executed in child processes) --------------------------- def _process_one_mp( - self, instance: EvalInstance, eval_span_ctx: str | None, critic_attempt: int + self, + instance: EvalInstance, + critic_attempt: int, + lmnr_session_id: str | None = None, + lmnr_trace_metadata: dict[str, Any] | None = None, + lmnr_datapoint_id: UUID | None = None, ) -> Tuple[EvalInstance, EvalOutput]: """Execute one instance in a child process with retry logic. @@ -578,6 +584,27 @@ def _process_one_mp( with redirect_stdout_stderr(log_file): logger.info("[child] start id=%s", instance.id) + # Create root "Evaluation" span in the child so the timeline + # reflects actual execution start, then update the datapoint + # with the span's trace_id. + eval_span = Laminar.start_active_span( + "Evaluation", + span_type="EVALUATION", # type: ignore + session_id=lmnr_session_id, + metadata=lmnr_trace_metadata, + ) + eval_span_ctx = Laminar.serialize_span_context(eval_span) + eval_span.end() + + if lmnr_datapoint_id is not None and self.metadata.lmnr is not None: + trace_id = UUID(int=eval_span.get_span_context().trace_id) + LaminarService.get().initialize() + LaminarService.get().update_datapoint_trace_id( + eval_id=self.metadata.lmnr.eval_id, + datapoint_id=lmnr_datapoint_id, + trace_id=trace_id, + ) + retry_count = 0 runtime_failure_count = 0 last_error = None diff --git a/benchmarks/utils/laminar.py b/benchmarks/utils/laminar.py index 92141819..2ea41eb7 100644 --- a/benchmarks/utils/laminar.py +++ b/benchmarks/utils/laminar.py @@ -103,51 +103,66 @@ def create_evaluation_datapoint( data: Any, metadata: dict[str, Any], index: int, - session_id: str | None = None, - trace_metadata: dict[str, Any] | None = None, - ) -> tuple[UUID | None, str | None]: + ) -> UUID | None: """ Create a Laminar datapoint. - Creates a new span for the evaluation and returns the span context. - Session ID and trace metadata are set on the span if provided. + + The datapoint is registered immediately (for UI visibility and progress + tracking) without a trace ID. The child process will later start the + root "Evaluation" span and update the datapoint with the real trace ID, + so the timeline accurately reflects when work begins. + + Returns the datapoint_id. """ if eval_id is None: - return None, None + return None client = self._get_client() if client is None: - return None, None + return None try: - eval_span = Laminar.start_active_span( - "Evaluation", - span_type="EVALUATION", # type: ignore - ) - # Set session ID and metadata on the active span - if session_id: - Laminar.set_trace_session_id(session_id) - if trace_metadata: - Laminar.set_trace_metadata(trace_metadata) - - lmnr_span_ctx = Laminar.serialize_span_context(eval_span) - eval_span.end() - return client.evals.create_datapoint( eval_id=eval_id, data=data, target=1, metadata=metadata, index=index, - trace_id=UUID(int=eval_span.get_span_context().trace_id), - ), lmnr_span_ctx + ) except Exception as exc: logger.debug( "Failed to create Laminar datapoint for eval %s: %s", eval_id, exc, ) - return None, None + return None + + def update_datapoint_trace_id( + self, + eval_id: UUID | None, + datapoint_id: UUID | None, + trace_id: UUID, + ) -> None: + """Link a datapoint to a trace after the span has been created.""" + + client = self._get_client() + if client is None or not eval_id or not datapoint_id: + return + + try: + client.evals.update_datapoint( + eval_id=eval_id, + datapoint_id=datapoint_id, + scores={}, + trace_id=trace_id, + ) + except Exception as exc: # pragma: no cover - defensive logging + logger.debug( + "Failed to update trace_id for datapoint %s: %s", + datapoint_id, + exc, + ) def _update_evaluation_datapoint( self, diff --git a/pyproject.toml b/pyproject.toml index 843655a6..5aefd615 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ "python-json-logger>=3.3.0", "tenacity>=9.1.2", "websockets>=12", - "lmnr>=0.7.24", + "lmnr>=0.7.41", "multi-swe-bench>=1.1.1", "swt-bench @ git+https://github.com/logic-star-ai/swt-bench.git@5fdcd446ff05e248ecfffc19d560a210699f71f8", ] diff --git a/uv.lock b/uv.lock index 54abe03d..f81b01ff 100644 --- a/uv.lock +++ b/uv.lock @@ -1200,6 +1200,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, @@ -1210,6 +1211,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, @@ -1220,6 +1222,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, @@ -1709,7 +1712,7 @@ wheels = [ [[package]] name = "lmnr" -version = "0.7.25" +version = "0.7.41" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "grpcio", version = "1.67.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, @@ -1730,9 +1733,9 @@ dependencies = [ { name = "tenacity" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/10/bd/a65219ca6f09199e35a14a55acb503e3ac896db15018d342076bd24401e1/lmnr-0.7.25.tar.gz", hash = "sha256:a3a0ba9a305243bbe97f2fcb8afc7d39d201dc11107b4633c257b64b838b2979", size = 203876, upload-time = "2025-12-18T17:31:24.507Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/5b/e77e978b37037e7884ebbd9d663b3f4e219316b0e0cb1ff21a8579eb1b42/lmnr-0.7.41.tar.gz", hash = "sha256:24127f92726242fcf3b5c392e9c35ff68146c781810eaeef17262fe53d982301", size = 249830, upload-time = "2026-02-13T14:32:57.759Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/45/35/1f22e3fea98416d58dddbdbc63e18ddcfb2b8f850b8ec065652a90d99666/lmnr-0.7.25-py3-none-any.whl", hash = "sha256:c0539d5f8c8e59a2d5d0ab04e498a82351d51fde8cf04ef8b312424e0be537ac", size = 266040, upload-time = "2025-12-18T17:31:22.986Z" }, + { url = "https://files.pythonhosted.org/packages/80/aa/ce659a4988b7546824c97d2f580f08d3dc6d75a3ff764d5bbb5e4d967197/lmnr-0.7.41-py3-none-any.whl", hash = "sha256:f6cbb9ee2af0c691e3adc6d0962eb9932829588c99ba012346698c449dd15791", size = 327364, upload-time = "2026-02-13T14:32:55.675Z" }, ] [[package]] @@ -2365,7 +2368,7 @@ requires-dist = [ { name = "huggingface-hub" }, { name = "jinja2" }, { name = "litellm", specifier = ">=1.77.7.dev9" }, - { name = "lmnr", specifier = ">=0.7.24" }, + { name = "lmnr", specifier = ">=0.7.41" }, { name = "modal", specifier = ">=1.1.4" }, { name = "multi-swe-bench", specifier = ">=1.1.1" }, { name = "openhands-agent-server", editable = "vendor/software-agent-sdk/openhands-agent-server" }, @@ -2490,32 +2493,32 @@ requires-dist = [ [[package]] name = "opentelemetry-api" -version = "1.38.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "importlib-metadata" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" } +sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" }, + { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" }, ] [[package]] name = "opentelemetry-exporter-otlp-proto-common" -version = "1.38.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-proto" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e9/9d/22d241b66f7bbde88a3bfa6847a351d2c46b84de23e71222c6aae25c7050/opentelemetry_exporter_otlp_proto_common-1.39.1.tar.gz", hash = "sha256:763370d4737a59741c89a67b50f9e39271639ee4afc999dadfe768541c027464", size = 20409, upload-time = "2025-12-11T13:32:40.885Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" }, + { url = "https://files.pythonhosted.org/packages/8c/02/ffc3e143d89a27ac21fd557365b98bd0653b98de8a101151d5805b5d4c33/opentelemetry_exporter_otlp_proto_common-1.39.1-py3-none-any.whl", hash = "sha256:08f8a5862d64cc3435105686d0216c1365dc5701f86844a8cd56597d0c764fde", size = 18366, upload-time = "2025-12-11T13:32:20.2Z" }, ] [[package]] name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.38.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "googleapis-common-protos" }, @@ -2527,14 +2530,14 @@ dependencies = [ { name = "opentelemetry-sdk" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a2/c0/43222f5b97dc10812bc4f0abc5dc7cd0a2525a91b5151d26c9e2e958f52e/opentelemetry_exporter_otlp_proto_grpc-1.38.0.tar.gz", hash = "sha256:2473935e9eac71f401de6101d37d6f3f0f1831db92b953c7dcc912536158ebd6", size = 24676, upload-time = "2025-10-16T08:35:53.83Z" } +sdist = { url = "https://files.pythonhosted.org/packages/53/48/b329fed2c610c2c32c9366d9dc597202c9d1e58e631c137ba15248d8850f/opentelemetry_exporter_otlp_proto_grpc-1.39.1.tar.gz", hash = "sha256:772eb1c9287485d625e4dbe9c879898e5253fea111d9181140f51291b5fec3ad", size = 24650, upload-time = "2025-12-11T13:32:41.429Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/f0/bd831afbdba74ca2ce3982142a2fad707f8c487e8a3b6fef01f1d5945d1b/opentelemetry_exporter_otlp_proto_grpc-1.38.0-py3-none-any.whl", hash = "sha256:7c49fd9b4bd0dbe9ba13d91f764c2d20b0025649a6e4ac35792fb8d84d764bc7", size = 19695, upload-time = "2025-10-16T08:35:35.053Z" }, + { url = "https://files.pythonhosted.org/packages/81/a3/cc9b66575bd6597b98b886a2067eea2693408d2d5f39dad9ab7fc264f5f3/opentelemetry_exporter_otlp_proto_grpc-1.39.1-py3-none-any.whl", hash = "sha256:fa1c136a05c7e9b4c09f739469cbdb927ea20b34088ab1d959a849b5cc589c18", size = 19766, upload-time = "2025-12-11T13:32:21.027Z" }, ] [[package]] name = "opentelemetry-exporter-otlp-proto-http" -version = "1.38.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "googleapis-common-protos" }, @@ -2545,14 +2548,14 @@ dependencies = [ { name = "requests" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" } +sdist = { url = "https://files.pythonhosted.org/packages/80/04/2a08fa9c0214ae38880df01e8bfae12b067ec0793446578575e5080d6545/opentelemetry_exporter_otlp_proto_http-1.39.1.tar.gz", hash = "sha256:31bdab9745c709ce90a49a0624c2bd445d31a28ba34275951a6a362d16a0b9cb", size = 17288, upload-time = "2025-12-11T13:32:42.029Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" }, + { url = "https://files.pythonhosted.org/packages/95/f1/b27d3e2e003cd9a3592c43d099d2ed8d0a947c15281bf8463a256db0b46c/opentelemetry_exporter_otlp_proto_http-1.39.1-py3-none-any.whl", hash = "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985", size = 19641, upload-time = "2025-12-11T13:32:22.248Z" }, ] [[package]] name = "opentelemetry-instrumentation" -version = "0.59b0" +version = "0.60b1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, @@ -2560,62 +2563,62 @@ dependencies = [ { name = "packaging" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/04/ed/9c65cd209407fd807fa05be03ee30f159bdac8d59e7ea16a8fe5a1601222/opentelemetry_instrumentation-0.59b0.tar.gz", hash = "sha256:6010f0faaacdaf7c4dff8aac84e226d23437b331dcda7e70367f6d73a7db1adc", size = 31544, upload-time = "2025-10-16T08:39:31.959Z" } +sdist = { url = "https://files.pythonhosted.org/packages/41/0f/7e6b713ac117c1f5e4e3300748af699b9902a2e5e34c9cf443dde25a01fa/opentelemetry_instrumentation-0.60b1.tar.gz", hash = "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a", size = 31706, upload-time = "2025-12-11T13:36:42.515Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/f5/7a40ff3f62bfe715dad2f633d7f1174ba1a7dd74254c15b2558b3401262a/opentelemetry_instrumentation-0.59b0-py3-none-any.whl", hash = "sha256:44082cc8fe56b0186e87ee8f7c17c327c4c2ce93bdbe86496e600985d74368ee", size = 33020, upload-time = "2025-10-16T08:38:31.463Z" }, + { url = "https://files.pythonhosted.org/packages/77/d2/6788e83c5c86a2690101681aeef27eeb2a6bf22df52d3f263a22cee20915/opentelemetry_instrumentation-0.60b1-py3-none-any.whl", hash = "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", size = 33096, upload-time = "2025-12-11T13:35:33.067Z" }, ] [[package]] name = "opentelemetry-instrumentation-threading" -version = "0.59b0" +version = "0.60b1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "opentelemetry-instrumentation" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/82/7a/84e97d8992808197006e607ae410c2219bdbbc23d1289ba0c244d3220741/opentelemetry_instrumentation_threading-0.59b0.tar.gz", hash = "sha256:ce5658730b697dcbc0e0d6d13643a69fd8aeb1b32fa8db3bade8ce114c7975f3", size = 8770, upload-time = "2025-10-16T08:40:03.587Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/0a/e36123ec4c0910a3936b92982545a53e9bca5b26a28df06883751a783f84/opentelemetry_instrumentation_threading-0.60b1.tar.gz", hash = "sha256:20b18a68abe5801fa9474336b7c27487d4af3e00b66f6a8734e4fdd75c8b0b43", size = 8768, upload-time = "2025-12-11T13:37:16.29Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/50/32d29076aaa1c91983cdd3ca8c6bb4d344830cd7d87a7c0fdc2d98c58509/opentelemetry_instrumentation_threading-0.59b0-py3-none-any.whl", hash = "sha256:76da2fc01fe1dccebff6581080cff9e42ac7b27cc61eb563f3c4435c727e8eca", size = 9313, upload-time = "2025-10-16T08:39:15.876Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a3/448738b927bcc1843ace7d4ed55dd54441a71363075eeeee89c5944dd740/opentelemetry_instrumentation_threading-0.60b1-py3-none-any.whl", hash = "sha256:92a52a60fee5e32bc6aa8f5acd749b15691ad0bc4457a310f5736b76a6d9d1de", size = 9312, upload-time = "2025-12-11T13:36:28.434Z" }, ] [[package]] name = "opentelemetry-proto" -version = "1.38.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "protobuf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" } +sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" }, + { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" }, ] [[package]] name = "opentelemetry-sdk" -version = "1.38.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "opentelemetry-semantic-conventions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" }, + { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" }, ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.59b0" +version = "0.60b1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" } +sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" }, + { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" }, ] [[package]] From 74934ec4560e702894c0a84cdef53ae9fef7cbba Mon Sep 17 00:00:00 2001 From: Rakhman Asmatullayev Date: Fri, 13 Feb 2026 15:19:18 +0000 Subject: [PATCH 2/4] chore --- benchmarks/utils/evaluation.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/benchmarks/utils/evaluation.py b/benchmarks/utils/evaluation.py index bda531fb..002a4f6d 100644 --- a/benchmarks/utils/evaluation.py +++ b/benchmarks/utils/evaluation.py @@ -593,8 +593,7 @@ def _process_one_mp( session_id=lmnr_session_id, metadata=lmnr_trace_metadata, ) - eval_span_ctx = Laminar.serialize_span_context(eval_span) - eval_span.end() + eval_span_ctx = Laminar.get_laminar_span_context(eval_span) if lmnr_datapoint_id is not None and self.metadata.lmnr is not None: trace_id = UUID(int=eval_span.get_span_context().trace_id) @@ -616,14 +615,12 @@ def _process_one_mp( # Start Laminar execution span and inject context into os.environ so workspace can pick it up # Escape the serialized context to safely pass as a cli argument - lmnr_span = Laminar.start_active_span( + exec_span = Laminar.start_active_span( "Execution", span_type="EXECUTOR", # type: ignore - parent_span_context=Laminar.deserialize_span_context(eval_span_ctx) - if eval_span_ctx - else None, + parent_span_context=eval_span_ctx, ) - exec_span_ctx = json.dumps(Laminar.serialize_span_context(lmnr_span)) + exec_span_ctx = json.dumps(Laminar.serialize_span_context(exec_span)) os.environ["LMNR_SPAN_CONTEXT"] = exec_span_ctx or "" try: @@ -676,7 +673,7 @@ def _process_one_mp( except Exception as e: last_error = e retry_count += 1 - lmnr_span.record_exception(e) + exec_span.record_exception(e) # Log structured runtime allocation/init failures so we can trace instance -> runtime/pod runtime_id = ( @@ -746,7 +743,8 @@ def _process_one_mp( f"[child] Failed to cleanup workspace for {instance.id}: " f"{str(cleanup_error)[:50]}" ) - lmnr_span.end() + exec_span.end() + eval_span.end() # This should never be reached, but added for type safety error_output = self._create_error_output( From b5acc807978d8467bedfe20119fcf4176cb98ab6 Mon Sep 17 00:00:00 2001 From: Rakhman Asmatullayev Date: Fri, 13 Feb 2026 15:39:36 +0000 Subject: [PATCH 3/4] always end span --- benchmarks/utils/evaluation.py | 251 +++++++++++++++++---------------- 1 file changed, 130 insertions(+), 121 deletions(-) diff --git a/benchmarks/utils/evaluation.py b/benchmarks/utils/evaluation.py index 002a4f6d..9433b38e 100644 --- a/benchmarks/utils/evaluation.py +++ b/benchmarks/utils/evaluation.py @@ -610,141 +610,150 @@ def _process_one_mp( max_retries = self.metadata.max_retries runtime_runs: list[RemoteRuntimeAllocation] = [] - while retry_count <= max_retries: - workspace = None - - # Start Laminar execution span and inject context into os.environ so workspace can pick it up - # Escape the serialized context to safely pass as a cli argument - exec_span = Laminar.start_active_span( - "Execution", - span_type="EXECUTOR", # type: ignore - parent_span_context=eval_span_ctx, - ) - exec_span_ctx = json.dumps(Laminar.serialize_span_context(exec_span)) - os.environ["LMNR_SPAN_CONTEXT"] = exec_span_ctx or "" - - try: - # Calculate resource factor based on runtime failures - resource_factor = self._calculate_resource_factor( - runtime_failure_count + try: + while retry_count <= max_retries: + workspace = None + + # Start Laminar execution span and inject context into os.environ so workspace can pick it up + # Escape the serialized context to safely pass as a cli argument + exec_span = Laminar.start_active_span( + "Execution", + span_type="EXECUTOR", # type: ignore + parent_span_context=eval_span_ctx, ) - if runtime_failure_count > 0: - logger.warning( - f"[child] Instance {instance.id}: " - f"attempt {retry_count + 1}/{max_retries + 1}, " - f"runtime_failure_count={runtime_failure_count}, " - f"resource_factor={resource_factor}" - ) - - workspace = self.prepare_workspace( - instance, - resource_factor=resource_factor, - forward_env=LMNR_ENV_VARS, + exec_span_ctx = json.dumps( + Laminar.serialize_span_context(exec_span) ) + os.environ["LMNR_SPAN_CONTEXT"] = exec_span_ctx or "" - # Record runtime/pod mapping only for remote runtimes - if isinstance(workspace, APIRemoteWorkspace): - retry_number = retry_count + 1 # 1-indexed for readability - runtime_run = RemoteRuntimeAllocation( - runtime_id=getattr(workspace, "_runtime_id", None), - session_id=getattr(workspace, "session_id", None), - runtime_url=getattr(workspace, "_runtime_url", None), - resource_factor=resource_factor, - critic_attempt=critic_attempt, - retry=retry_number, - started_at=datetime.now(timezone.utc), - ) - runtime_runs.append(runtime_run) - logger.info( - "[child] runtime allocated instance=%s attempt=%d retry=%d workspace=%s runtime_id=%s session_id=%s resource_factor=%s", - instance.id, - critic_attempt, - retry_number, - workspace.__class__.__name__, - runtime_run.runtime_id, - runtime_run.session_id, - runtime_run.resource_factor, + try: + # Calculate resource factor based on runtime failures + resource_factor = self._calculate_resource_factor( + runtime_failure_count ) - out = self.evaluate_instance(instance, workspace) - if runtime_runs: - out.runtime_runs = runtime_runs - logger.info("[child] done id=%s", instance.id) - return instance, out - except Exception as e: - last_error = e - retry_count += 1 - exec_span.record_exception(e) + if runtime_failure_count > 0: + logger.warning( + f"[child] Instance {instance.id}: " + f"attempt {retry_count + 1}/{max_retries + 1}, " + f"runtime_failure_count={runtime_failure_count}, " + f"resource_factor={resource_factor}" + ) - # Log structured runtime allocation/init failures so we can trace instance -> runtime/pod - runtime_id = ( - getattr(workspace, "_runtime_id", None) if workspace else None - ) - session_id = ( - getattr(workspace, "session_id", None) if workspace else None - ) - if isinstance(workspace, APIRemoteWorkspace) or ( - "Runtime not yet ready" in str(e) - ): - logger.warning( - "[child] runtime init failure instance=%s attempt=%d retry=%d runtime_id=%s session_id=%s error=%s", - instance.id, - critic_attempt, - retry_count, - runtime_id, - session_id, - str(e), + workspace = self.prepare_workspace( + instance, + resource_factor=resource_factor, + forward_env=LMNR_ENV_VARS, ) - # TODO(#277): add an exception classifier to decide when to bump resources - runtime_failure_count += 1 - logger.warning( - f"[child] Instance {instance.id}: runtime_failure_count=" - f"{runtime_failure_count}" - ) - - if retry_count <= max_retries: - logger.warning( - f"[child] Instance {instance.id} failed " - f"(attempt {retry_count}/{max_retries}): " - f"{str(e)}" - ) - else: - logger.error( - f"[child] Instance {instance.id} failed after " - f"{max_retries} retries. Last error: {str(e)}", - exc_info=True, + # Record runtime/pod mapping only for remote runtimes + if isinstance(workspace, APIRemoteWorkspace): + retry_number = retry_count + 1 # 1-indexed for readability + runtime_run = RemoteRuntimeAllocation( + runtime_id=getattr(workspace, "_runtime_id", None), + session_id=getattr(workspace, "session_id", None), + runtime_url=getattr(workspace, "_runtime_url", None), + resource_factor=resource_factor, + critic_attempt=critic_attempt, + retry=retry_number, + started_at=datetime.now(timezone.utc), + ) + runtime_runs.append(runtime_run) + logger.info( + "[child] runtime allocated instance=%s attempt=%d retry=%d workspace=%s runtime_id=%s session_id=%s resource_factor=%s", + instance.id, + critic_attempt, + retry_number, + workspace.__class__.__name__, + runtime_run.runtime_id, + runtime_run.session_id, + runtime_run.resource_factor, + ) + out = self.evaluate_instance(instance, workspace) + if runtime_runs: + out.runtime_runs = runtime_runs + logger.info("[child] done id=%s", instance.id) + return instance, out + except Exception as e: + last_error = e + retry_count += 1 + exec_span.record_exception(e) + + # Log structured runtime allocation/init failures so we can trace instance -> runtime/pod + runtime_id = ( + getattr(workspace, "_runtime_id", None) + if workspace + else None ) - # Create error output for final failure - error_output = self._create_error_output( - instance, last_error, max_retries + session_id = ( + getattr(workspace, "session_id", None) + if workspace + else None ) - if runtime_runs: - error_output.runtime_runs = runtime_runs - return instance, error_output - finally: - # Ensure workspace cleanup happens regardless of success or failure - if workspace is not None: - try: - self._capture_conversation_archive(workspace, instance) - except Exception as archive_error: + if isinstance(workspace, APIRemoteWorkspace) or ( + "Runtime not yet ready" in str(e) + ): logger.warning( - "[child] Failed to capture conversation archive for %s: %s", + "[child] runtime init failure instance=%s attempt=%d retry=%d runtime_id=%s session_id=%s error=%s", instance.id, - archive_error, - ) - try: - # Use the context manager protocol for cleanup - workspace.__exit__(None, None, None) - logger.debug( - "[child] cleaned up workspace for id=%s", instance.id + critic_attempt, + retry_count, + runtime_id, + session_id, + str(e), ) - except Exception as cleanup_error: + + # TODO(#277): add an exception classifier to decide when to bump resources + runtime_failure_count += 1 + logger.warning( + f"[child] Instance {instance.id}: runtime_failure_count=" + f"{runtime_failure_count}" + ) + + if retry_count <= max_retries: logger.warning( - f"[child] Failed to cleanup workspace for {instance.id}: " - f"{str(cleanup_error)[:50]}" + f"[child] Instance {instance.id} failed " + f"(attempt {retry_count}/{max_retries}): " + f"{str(e)}" + ) + else: + logger.error( + f"[child] Instance {instance.id} failed after " + f"{max_retries} retries. Last error: {str(e)}", + exc_info=True, ) - exec_span.end() - eval_span.end() + # Create error output for final failure + error_output = self._create_error_output( + instance, last_error, max_retries + ) + if runtime_runs: + error_output.runtime_runs = runtime_runs + return instance, error_output + finally: + # Ensure workspace cleanup happens regardless of success or failure + if workspace is not None: + try: + self._capture_conversation_archive(workspace, instance) + except Exception as archive_error: + logger.warning( + "[child] Failed to capture conversation archive for %s: %s", + instance.id, + archive_error, + ) + try: + # Use the context manager protocol for cleanup + workspace.__exit__(None, None, None) + logger.debug( + "[child] cleaned up workspace for id=%s", + instance.id, + ) + except Exception as cleanup_error: + logger.warning( + f"[child] Failed to cleanup workspace for {instance.id}: " + f"{str(cleanup_error)[:50]}" + ) + exec_span.end() + finally: + eval_span.end() # This should never be reached, but added for type safety error_output = self._create_error_output( From 72037af1883f2ea15de9eecfe5f6c76bd2a1ee28 Mon Sep 17 00:00:00 2001 From: Rakhman Asmatullayev Date: Fri, 13 Feb 2026 15:45:31 +0000 Subject: [PATCH 4/4] fix tests --- tests/test_workspace_cleanup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_workspace_cleanup.py b/tests/test_workspace_cleanup.py index 1af4e764..24632dce 100644 --- a/tests/test_workspace_cleanup.py +++ b/tests/test_workspace_cleanup.py @@ -60,7 +60,7 @@ def evaluate_instance(self, instance, workspace): evaluator = TestEvaluation(metadata=metadata, num_workers=1) result_instance, result_output = evaluator._process_one_mp( - test_instance, None, critic_attempt=1 + test_instance, critic_attempt=1 ) mock_workspace.__exit__.assert_called_once_with(None, None, None) @@ -111,7 +111,7 @@ def evaluate_instance(self, instance, workspace): evaluator = TestEvaluation(metadata=metadata, num_workers=1) result_instance, result_output = evaluator._process_one_mp( - test_instance, None, critic_attempt=1 + test_instance, critic_attempt=1 ) mock_workspace.__exit__.assert_called_once_with(None, None, None) @@ -171,7 +171,7 @@ def evaluate_instance(self, instance, workspace): evaluator = TestEvaluation(metadata=metadata, num_workers=1) result_instance, result_output = evaluator._process_one_mp( - test_instance, None, critic_attempt=1 + test_instance, critic_attempt=1 ) mock_workspace.__exit__.assert_called_once_with(None, None, None) @@ -241,7 +241,7 @@ def evaluate_instance(self, instance, workspace): evaluator = TestEvaluation(metadata=metadata, num_workers=1) result_instance, result_output = evaluator._process_one_mp( - test_instance, None, critic_attempt=1 + test_instance, critic_attempt=1 ) assert len(workspaces_created) == 3