From 4934a986fbdab83a60efea4f2fd0e98c310c0189 Mon Sep 17 00:00:00 2001 From: dharapandya85 Date: Wed, 24 Dec 2025 18:58:33 +0000 Subject: [PATCH 1/5] fix(example): restore basic evaluation example --- src/agentunit/examples/basic_evaluation.py | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/agentunit/examples/basic_evaluation.py diff --git a/src/agentunit/examples/basic_evaluation.py b/src/agentunit/examples/basic_evaluation.py new file mode 100644 index 0000000..c4eee6b --- /dev/null +++ b/src/agentunit/examples/basic_evaluation.py @@ -0,0 +1,34 @@ +from agentunit import Evaluation, Task + + +class FakeAdapter: + def __init__(self, response: str): + self.response = response + + def run(self, prompt: str) -> str: + return self.response + + +def main(): + # define simple task + # each task specifies a prompt and expected output + task = Task(name="echo-task", prompt="Say hello", expected="hello") + # create a fake model adapter that always outputs "hello" + fake_model = FakeAdapter(response="hello") + + # build the evaluation + evaluation = Evaluation(task=[task], model=fake_model) + # results can be inspected/printed + results = evaluation.run() + + # print a readable summary + print("=== Evaluation Summary ===") + for result in results: + print(f"Task: {result.task.name}") + print(f"Prompt: {result.task.prompt}") + print(f"Model Output: {result.output}") + print(f"Expected: {result.task.expected}") + print(f"Passed: {result.passed}") + print("-" * 30) + if __name__ == "__main__": + main() From 3a427d06c1097b62311496ae552455890d1e795c Mon Sep 17 00:00:00 2001 From: dharapandya85 Date: Thu, 25 Dec 2025 12:04:51 +0000 Subject: [PATCH 2/5] fix(example): align basic evaluation example with current AgentUnit API --- src/agentunit/examples/basic_evaluation.py | 49 ++++++++++++---------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/agentunit/examples/basic_evaluation.py b/src/agentunit/examples/basic_evaluation.py index c4eee6b..37639d7 100644 --- a/src/agentunit/examples/basic_evaluation.py +++ b/src/agentunit/examples/basic_evaluation.py @@ -1,34 +1,39 @@ -from agentunit import Evaluation, Task +from agentunit import DatasetCase, Runner, Scenario class FakeAdapter: def __init__(self, response: str): self.response = response - def run(self, prompt: str) -> str: + def run(self, case: DatasetCase) -> str: return self.response -def main(): - # define simple task - # each task specifies a prompt and expected output - task = Task(name="echo-task", prompt="Say hello", expected="hello") - # create a fake model adapter that always outputs "hello" - fake_model = FakeAdapter(response="hello") +def main() -> None: + # define simple dataset + cases = [ + DatasetCase( + id="echo-task", + query="Say hello", + expected_output="hello", + ) + ] + # create a scenario using the fake adapter + scenario = Scenario( + name="Basic Evaluation Example", + adapter=FakeAdapter(response="hello"), + dataset=cases, + ) - # build the evaluation - evaluation = Evaluation(task=[task], model=fake_model) - # results can be inspected/printed - results = evaluation.run() + # run evaluation + runner = Runner() + result = runner.run(scenario) - # print a readable summary + # print summary print("=== Evaluation Summary ===") - for result in results: - print(f"Task: {result.task.name}") - print(f"Prompt: {result.task.prompt}") - print(f"Model Output: {result.output}") - print(f"Expected: {result.task.expected}") - print(f"Passed: {result.passed}") - print("-" * 30) - if __name__ == "__main__": - main() + print(f"Scenario: {scenario.name}") + print(f"Success rate: {result.success_rate:.0%}") + + +if __name__ == "__main__": + main() From 158dfdf6f574af738b42b8e65bf4834ae54c3612 Mon Sep 17 00:00:00 2001 From: dharapandya85 Date: Fri, 2 Jan 2026 19:10:07 +0000 Subject: [PATCH 3/5] fix(example): align basic example with Runner and BaseAdapter APIs --- src/agentunit/examples/basic_evaluation.py | 34 +++++++++++++--------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/agentunit/examples/basic_evaluation.py b/src/agentunit/examples/basic_evaluation.py index 37639d7..19d6c6f 100644 --- a/src/agentunit/examples/basic_evaluation.py +++ b/src/agentunit/examples/basic_evaluation.py @@ -1,38 +1,46 @@ -from agentunit import DatasetCase, Runner, Scenario +from agentunit.adapters.base import BaseAdapter +from agentunit.core.outcome import Outcome +from agentunit.core.runner import Runner +from agentunit.core.scenario import Scenario +from agentunit.dataset import DatasetCase, DatasetSource - -class FakeAdapter: +class FakeAdapter(BaseAdapter): def __init__(self, response: str): self.response = response - def run(self, case: DatasetCase) -> str: - return self.response - + def prepare(self): + pass + def execute(self, case, trace_log): + return Outcome(success=True, output=self.response, error=None) + def cleanup(self): + pass def main() -> None: # define simple dataset cases = [ DatasetCase( - id="echo-task", - query="Say hello", - expected_output="hello", + input="hello", + expected="hello", ) ] + dataset = DatasetSource.from_list(cases) + # create a scenario using the fake adapter scenario = Scenario( name="Basic Evaluation Example", adapter=FakeAdapter(response="hello"), - dataset=cases, + dataset=dataset, ) # run evaluation - runner = Runner() - result = runner.run(scenario) + runner = Runner([scenario]) + suite_result = runner.run() + scenario_result = suite_result.scenarios[0] # print summary print("=== Evaluation Summary ===") print(f"Scenario: {scenario.name}") - print(f"Success rate: {result.success_rate:.0%}") + print(f"Success rate: {scenario_result.success_rate:.0%}") if __name__ == "__main__": From 9e67660e5181aa9ac9a4399a224bb1d47406aa39 Mon Sep 17 00:00:00 2001 From: dharapandya85 Date: Sat, 3 Jan 2026 12:30:25 +0000 Subject: [PATCH 4/5] fix(example): align basic evaluation with current APIs --- src/agentunit/examples/basic_evaluation.py | 24 +++++++++++++--------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/agentunit/examples/basic_evaluation.py b/src/agentunit/examples/basic_evaluation.py index 19d6c6f..041400b 100644 --- a/src/agentunit/examples/basic_evaluation.py +++ b/src/agentunit/examples/basic_evaluation.py @@ -1,8 +1,9 @@ -from agentunit.adapters.base import BaseAdapter -from agentunit.core.outcome import Outcome +from agentunit.adapters.base import BaseAdapter, AdapterOutcome +from agentunit.core.tracelog import TraceLog from agentunit.core.runner import Runner from agentunit.core.scenario import Scenario -from agentunit.dataset import DatasetCase, DatasetSource +from agentunit.datasets.base import DatasetCase, DatasetSource + class FakeAdapter(BaseAdapter): def __init__(self, response: str): @@ -10,33 +11,36 @@ def __init__(self, response: str): def prepare(self): pass - def execute(self, case, trace_log): - return Outcome(success=True, output=self.response, error=None) + + def execute(self, case, trace_log: TraceLog) -> AdapterOutcome: + return AdapterOutcome(success=True, output=self.response, error=None) + def cleanup(self): pass + def main() -> None: # define simple dataset cases = [ DatasetCase( + id="case_1", input="hello", - expected="hello", + expected_output="hello", ) ] - dataset = DatasetSource.from_list(cases) # create a scenario using the fake adapter scenario = Scenario( name="Basic Evaluation Example", adapter=FakeAdapter(response="hello"), - dataset=dataset, + dataset=DatasetSource.from_list(cases), ) # run evaluation runner = Runner([scenario]) - suite_result = runner.run() + result = runner.run() - scenario_result = suite_result.scenarios[0] + scenario_result = result.scenarios[0] # print summary print("=== Evaluation Summary ===") print(f"Scenario: {scenario.name}") From 1d719e9f62f52c31a08a2b0cdf86ec435f608b0d Mon Sep 17 00:00:00 2001 From: dharapandya85 Date: Mon, 5 Jan 2026 18:48:16 +0000 Subject: [PATCH 5/5] fix tracelog import and pass failed tests --- src/agentunit/examples/basic_evaluation.py | 38 +++++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/agentunit/examples/basic_evaluation.py b/src/agentunit/examples/basic_evaluation.py index 041400b..02c93c4 100644 --- a/src/agentunit/examples/basic_evaluation.py +++ b/src/agentunit/examples/basic_evaluation.py @@ -1,22 +1,50 @@ -from agentunit.adapters.base import BaseAdapter, AdapterOutcome -from agentunit.core.tracelog import TraceLog +from agentunit.adapters.base import AdapterOutcome, BaseAdapter from agentunit.core.runner import Runner from agentunit.core.scenario import Scenario +from agentunit.core.trace import TraceLog from agentunit.datasets.base import DatasetCase, DatasetSource class FakeAdapter(BaseAdapter): + """ + A minimal adapter implementation used for testing and examples. + + FakeAdapter simulates an adapter without performing real computation, it returns predefined response. + This becomes useful in representing integration of adapters with the AgentUnit. + """ + def __init__(self, response: str): + """ + Initialize adapter with static response. + + Args: + response (str): The output string returns on execution. + """ self.response = response def prepare(self): - pass + """ + Prepare adapter before execution. + + FakeAdapter do nor require setup. + """ def execute(self, case, trace_log: TraceLog) -> AdapterOutcome: + """ + Execute the adapter for a given evaluation case. + + Args: + case(object): Input case of evaluation. + trace_log (TraceLog): Trace log for recording execution details. + """ return AdapterOutcome(success=True, output=self.response, error=None) def cleanup(self): - pass + """ + Cleanup adapter resources. + + No cleanup required for FakeAdapter. + """ def main() -> None: @@ -28,7 +56,7 @@ def main() -> None: expected_output="hello", ) ] - + # create a scenario using the fake adapter scenario = Scenario( name="Basic Evaluation Example",