Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions src/agentunit/examples/basic_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from agentunit.adapters.base import AdapterOutcome, BaseAdapter
from agentunit.core.runner import Runner
from agentunit.core.scenario import Scenario
from agentunit.core.trace import TraceLog
from agentunit.datasets.base import DatasetCase, DatasetSource


class FakeAdapter(BaseAdapter):
"""
A minimal adapter implementation used for testing and examples.

FakeAdapter simulates an adapter without performing real computation, it returns predefined response.
This becomes useful in representing integration of adapters with the AgentUnit.
"""

def __init__(self, response: str):
"""
Initialize adapter with static response.

Args:
response (str): The output string returns on execution.
"""
self.response = response

def prepare(self):
"""
Prepare adapter before execution.

FakeAdapter do nor require setup.
"""

def execute(self, case, trace_log: TraceLog) -> AdapterOutcome:
"""
Execute the adapter for a given evaluation case.

Args:
case(object): Input case of evaluation.
trace_log (TraceLog): Trace log for recording execution details.
"""
return AdapterOutcome(success=True, output=self.response, error=None)

def cleanup(self):
"""
Cleanup adapter resources.

No cleanup required for FakeAdapter.
"""


def main() -> None:
# define simple dataset
cases = [
DatasetCase(
id="case_1",
input="hello",
expected_output="hello",
)
Comment on lines +53 to +57
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Fix DatasetCase field names to match the API.

The DatasetCase constructor arguments don't match the actual field names defined in the class. From the relevant code snippets, DatasetCase has:

  • id: str (required)
  • query: str (required, not input)
  • expected_output: str | None (optional, not expected)

This will cause a TypeError at runtime, violating the acceptance criteria that the script must run without errors.

🔎 Proposed fix for DatasetCase construction
     cases = [
         DatasetCase(
-            input="hello",
-            expected="hello",
+            id="case_1",
+            query="hello",
+            expected_output="hello",
         )
     ]
🤖 Prompt for AI Agents
In src/agentunit/examples/basic_evaluation.py around lines 21 to 24, the
DatasetCase constructor is using incorrect field names; replace the kwargs to
match the class API by adding a required id (e.g., "case-1"), rename input to
query, and rename expected to expected_output (or omit expected_output if you
want None). Ensure the resulting call is DatasetCase(id="...", query="hello",
expected_output="hello") or DatasetCase(id="...", query="hello") if no expected
output is needed.

]

# create a scenario using the fake adapter
scenario = Scenario(
name="Basic Evaluation Example",
adapter=FakeAdapter(response="hello"),
dataset=DatasetSource.from_list(cases),
)

# run evaluation
runner = Runner([scenario])
result = runner.run()

scenario_result = result.scenarios[0]
# print summary
print("=== Evaluation Summary ===")
print(f"Scenario: {scenario.name}")
print(f"Success rate: {scenario_result.success_rate:.0%}")


if __name__ == "__main__":
main()