From b0523296c8fb4ca634b5a1dfb9bc4534c845687e Mon Sep 17 00:00:00 2001 From: Jagriti-student Date: Sun, 7 Dec 2025 14:43:39 +0530 Subject: [PATCH 1/2] Add basic evaluation example script --- examples/basic_evaluation.py | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 examples/basic_evaluation.py diff --git a/examples/basic_evaluation.py b/examples/basic_evaluation.py new file mode 100644 index 0000000..e5c06d7 --- /dev/null +++ b/examples/basic_evaluation.py @@ -0,0 +1,45 @@ +""" +Basic Evaluation Example for AgentUnit +-------------------------------------- + +This script demonstrates how to run a minimal evaluation using +AgentUnit with a FakeAdapter. It is designed for beginners and does +not require any extra dependencies. +""" + +from agentunit.core.evaluator import Evaluator +from agentunit.core.adapters import BaseAdapter + + +class FakeAdapter(BaseAdapter): + """ + A simple mock adapter used only for demonstration. + It returns a predictable output so evaluation is easy to understand. + """ + + def generate(self, prompt: str) -> str: + # Always returns the same answer for simplicity + return "Hello, this is a fake response!" + + +def main(): + # Step 1 — Prepare the adapter + adapter = FakeAdapter() + + # Step 2 — Create the evaluator + evaluator = Evaluator(adapter=adapter) + + # Step 3 — Prepare an example prompt + prompt = "Say hello!" + + # Step 4 — Run the evaluation + result = evaluator.evaluate(prompt) + + # Step 5 — Print the output + print("Prompt:", prompt) + print("Model Output:", result.output) + print("Evaluation Score:", result.score) + + +if __name__ == "__main__": + main() From dd2f4feeed6b9ddf968cd25fccc7cbdea345e0d7 Mon Sep 17 00:00:00 2001 From: Jagriti-student Date: Mon, 8 Dec 2025 15:37:23 +0530 Subject: [PATCH 2/2] Fix typos and improve clarity in docstrings across core modules --- src/agentunit/core/__init__.py | 4 +++- src/agentunit/core/exceptions.py | 16 ++++++++++++---- src/agentunit/core/replay.py | 8 ++++++-- src/agentunit/core/runner.py | 4 +++- src/agentunit/core/scenario.py | 16 ++++++++++++---- src/agentunit/core/trace.py | 12 +++++++++--- 6 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/agentunit/core/__init__.py b/src/agentunit/core/__init__.py index 952bcb5..757d228 100644 --- a/src/agentunit/core/__init__.py +++ b/src/agentunit/core/__init__.py @@ -1,4 +1,6 @@ -"""Core components for AgentUnit.""" +""" +Core components for AgentUnit. +""" from agentunit.datasets.base import DatasetCase, DatasetSource from agentunit.reporting.results import ScenarioResult diff --git a/src/agentunit/core/exceptions.py b/src/agentunit/core/exceptions.py index 03326f2..2a1d159 100644 --- a/src/agentunit/core/exceptions.py +++ b/src/agentunit/core/exceptions.py @@ -1,15 +1,23 @@ -"""Custom exceptions for AgentUnit.""" +""" +Custom exceptions for AgentUnit. +""" from __future__ import annotations class AgentUnitError(Exception): - """Base class for AgentUnit exceptions.""" + """ + Base class for AgentUnit exceptions. + """ class AdapterNotAvailableError(AgentUnitError): - """Raised when an adapter cannot be initialized due to missing dependencies.""" + """ + Raised when an adapter cannot be initialized due to missing dependencies. + """ class ScenarioExecutionError(AgentUnitError): - """Raised when a scenario fails during execution.""" + """ + Raised when a scenario fails during execution. + """ diff --git a/src/agentunit/core/replay.py b/src/agentunit/core/replay.py index af78380..f0e2b5f 100644 --- a/src/agentunit/core/replay.py +++ b/src/agentunit/core/replay.py @@ -1,4 +1,6 @@ -"""Replay utilities leveraging stored traces.""" +""" +Replay utilities leveraging stored traces. +""" from __future__ import annotations @@ -8,7 +10,9 @@ def load_traces(traces_dir: str | Path) -> list[TraceLog]: - """Load stored traces from disk for deterministic replay or analysis.""" + """ + Load stored traces from disk for deterministic replay or analysis. + """ path = Path(traces_dir) logs: list[TraceLog] = [] diff --git a/src/agentunit/core/runner.py b/src/agentunit/core/runner.py index bcbd66c..143921b 100644 --- a/src/agentunit/core/runner.py +++ b/src/agentunit/core/runner.py @@ -1,4 +1,6 @@ -"""Scenario runner orchestration.""" +""" +Scenario runner orchestration. +""" from __future__ import annotations diff --git a/src/agentunit/core/scenario.py b/src/agentunit/core/scenario.py index f0b0d86..70bba2a 100644 --- a/src/agentunit/core/scenario.py +++ b/src/agentunit/core/scenario.py @@ -1,4 +1,6 @@ -"""Scenario definition API exposed to end users.""" +""" +Scenario definition API exposed to end users. +""" from __future__ import annotations @@ -19,7 +21,9 @@ @dataclass(slots=True) class Scenario: - """Defines a reproducible agent evaluation scenario.""" + """ + Defines a reproducible agent evaluation scenario. + """ name: str adapter: BaseAdapter @@ -75,7 +79,9 @@ def from_crewai( name: str | None = None, **options: object, ) -> Scenario: - """Create scenario from CrewAI crew.""" + """ + Create scenario from CrewAI crew. + """ from agentunit.adapters.crewai import CrewAIAdapter adapter = CrewAIAdapter.from_crew(crew, **options) @@ -91,7 +97,9 @@ def from_autogen( name: str | None = None, **options: object, ) -> Scenario: - """Create scenario from AutoGen orchestrator.""" + """ + Create scenario from AutoGen orchestrator. + """ from agentunit.adapters.autogen import AutoGenAdapter adapter = AutoGenAdapter(orchestrator=orchestrator, **options) diff --git a/src/agentunit/core/trace.py b/src/agentunit/core/trace.py index 52f1afc..59d0978 100644 --- a/src/agentunit/core/trace.py +++ b/src/agentunit/core/trace.py @@ -1,4 +1,6 @@ -"""Tracing utilities shared between adapters and the runner.""" +""" +Tracing utilities shared between adapters and the runner. +""" from __future__ import annotations @@ -11,7 +13,9 @@ @dataclass(slots=True) class TraceEvent: - """Represents a single prompt, tool call, or response in an agent run.""" + """ + Represents a single prompt, tool call, or response in an agent run. + """ type: str payload: dict[str, Any] @@ -20,7 +24,9 @@ class TraceEvent: @dataclass(slots=True) class TraceLog: - """A collection of chronological events for a scenario iteration.""" + """ + A collection of chronological events for a scenario iteration. + """ events: list[TraceEvent] = field(default_factory=list)