diff --git a/docs/pytest-plugin.md b/docs/pytest-plugin.md new file mode 100644 index 0000000..4805f95 --- /dev/null +++ b/docs/pytest-plugin.md @@ -0,0 +1,255 @@ +# AgentUnit Pytest Plugin + +The AgentUnit pytest plugin allows you to run AgentUnit evaluation scenarios as pytest tests, providing seamless integration with pytest's test discovery, execution, and reporting features. + +## Installation + +The pytest plugin is automatically available when you install AgentUnit: + +```bash +pip install agentunit +``` + +## Usage + +### Basic Usage + +1. Create scenario files in the `tests/eval/` directory +2. Run pytest to discover and execute scenarios: + +```bash +pytest tests/eval/ +``` + +### Scenario Discovery + +The plugin automatically discovers scenarios from files in `tests/eval/`: + +- **Python files** (`.py`): Looks for `Scenario` objects and functions starting with `scenario_` +- **Config files** (`.yaml`, `.yml`, `.json`): Loads scenarios using the nocode module + +### Python Scenario Files + +Create Python files with scenario objects or factory functions: + +```python +# tests/eval/my_scenarios.py +from agentunit import Scenario +from agentunit.adapters.base import BaseAdapter, AdapterOutcome +from agentunit.datasets.base import DatasetCase, DatasetSource + +class SimpleAdapter(BaseAdapter): + """Simple adapter for function-based agents.""" + + name = "simple" + + def __init__(self, agent_func): + self.agent_func = agent_func + + def prepare(self): + pass + + def execute(self, case, trace): + try: + result = self.agent_func({"query": case.query}) + output = result.get("result", "") + success = output == case.expected_output + return AdapterOutcome(success=success, output=output) + except Exception as e: + return AdapterOutcome(success=False, output=None, error=str(e)) + +class MyDataset(DatasetSource): + def __init__(self): + super().__init__(name="my-dataset", loader=self._generate_cases) + + def _generate_cases(self): + return [ + DatasetCase( + id="test1", + query="Hello", + expected_output="Hi there!", + ) + ] + +def my_agent(payload): + return {"result": "Hi there!"} + +# This scenario will be auto-discovered +greeting_scenario = Scenario( + name="greeting-test", + adapter=SimpleAdapter(my_agent), + dataset=MyDataset(), +) + +# Factory functions starting with 'scenario_' are also discovered +def scenario_advanced_test(): + return Scenario( + name="advanced-test", + adapter=SimpleAdapter(my_agent), + dataset=MyDataset(), + ) +``` + +### Pytest Integration Features + +#### Markers + +The plugin adds pytest markers for filtering: + +```bash +# Run only AgentUnit scenarios +pytest -m agentunit + +# Run specific scenario by name +pytest -m "scenario('greeting-test')" + +# Combine with other markers +pytest -m "agentunit and not slow" +``` + +#### Test Results + +- **Passed scenarios**: All test cases in the scenario passed +- **Failed scenarios**: One or more test cases failed (shows detailed failure info) +- **Error scenarios**: Scenario couldn't be loaded or executed + +#### Fixtures + +AgentUnit scenarios can use pytest fixtures: + +```python +import pytest +from agentunit import Scenario + +@pytest.fixture +def test_config(): + return {"timeout": 30, "retries": 2} + +def scenario_with_fixture(test_config): + # Use fixture data in scenario creation + return Scenario( + name="fixture-test", + adapter=SimpleAdapter(my_agent), + dataset=MyDataset(), + timeout=test_config["timeout"], + retries=test_config["retries"], + ) +``` + +### Configuration + +Add pytest configuration in `pyproject.toml`: + +```toml +[tool.pytest.ini_options] +markers = [ + "agentunit: marks test as an AgentUnit scenario evaluation", + "scenario(name): marks test with specific scenario name", +] +testpaths = ["tests", "tests/eval"] +``` + +### Example Directory Structure + +``` +project/ +├── tests/ +│ ├── eval/ # AgentUnit scenarios +│ │ ├── __init__.py +│ │ ├── basic_scenarios.py # Python scenarios +│ │ ├── advanced_scenarios.py +│ │ └── config_scenario.yaml # Config-based scenarios +│ └── test_regular.py # Regular pytest tests +├── src/ +│ └── myproject/ +└── pyproject.toml +``` + +### Running Scenarios + +```bash +# Run all tests (including AgentUnit scenarios) +pytest + +# Run only AgentUnit scenarios +pytest tests/eval/ + +# Run with verbose output +pytest tests/eval/ -v + +# Run specific scenario file +pytest tests/eval/basic_scenarios.py + +# Filter by markers +pytest -m agentunit + +# Run with coverage +pytest tests/eval/ --cov=myproject +``` + +### Advanced Usage + +#### Custom Test Names + +Scenarios appear in pytest output with descriptive names: + +``` +tests/eval/basic_scenarios.py::agentunit::greeting-test PASSED +tests/eval/basic_scenarios.py::agentunit::math-test FAILED +``` + +#### Parallel Execution + +Use pytest-xdist for parallel scenario execution: + +```bash +pip install pytest-xdist +pytest tests/eval/ -n auto +``` + +#### Integration with CI/CD + +The plugin works seamlessly with CI/CD systems: + +```yaml +# .github/workflows/test.yml +- name: Run AgentUnit scenarios + run: pytest tests/eval/ --junitxml=scenario-results.xml +``` + +### Error Handling + +The plugin handles various error conditions gracefully: + +- **Load errors**: If a scenario file can't be loaded, it appears as a failed test +- **Runtime errors**: Scenario execution errors are reported as test failures +- **Missing dependencies**: Optional dependencies are handled with appropriate skips + +### Best Practices + +1. **Organize scenarios** by functionality in separate files +2. **Use descriptive names** for scenarios and test cases +3. **Add markers** for easy filtering and organization +4. **Include both positive and negative test cases** +5. **Use fixtures** for shared test configuration +6. **Document scenario purpose** with docstrings + +### Troubleshooting + +#### Scenarios Not Discovered + +- Ensure files are in `tests/eval/` directory +- Check that scenario objects are properly defined +- Verify import statements work correctly + +#### Import Errors + +- Make sure all dependencies are installed +- Check Python path includes your project +- Verify scenario file syntax is correct + +#### Test Failures + +- Check scenario agent implementation +- Verify dataset cases have correct expected outputs +- Review error messages in pytest output \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b996220..4d29b1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,10 @@ sphinx = "^7.3.7" [tool.poetry.scripts] agentunit = "agentunit.cli:entrypoint" +agentunit-init-eval = "agentunit.pytest.cli:init_eval" + +[tool.poetry.plugins."pytest11"] +agentunit = "agentunit.pytest.plugin" [tool.poetry.urls] "Issue Tracker" = "https://github.com/aviralgarg05/agentunit/issues" @@ -60,6 +64,8 @@ build-backend = "poetry.core.masonry.api" markers = [ "integration: marks tests as integration tests (deselect with '-m \"not integration\"')", "langgraph: marks tests as requiring LangGraph (skipped if not installed)", + "agentunit: marks test as an AgentUnit scenario evaluation", + "scenario(name): marks test with specific scenario name", ] testpaths = ["tests"] python_files = ["test_*.py"] diff --git a/src/agentunit/pytest/__init__.py b/src/agentunit/pytest/__init__.py new file mode 100644 index 0000000..9f64ccc --- /dev/null +++ b/src/agentunit/pytest/__init__.py @@ -0,0 +1,6 @@ +"""Pytest plugin for AgentUnit scenario discovery and execution.""" + +from .plugin import pytest_collect_file, pytest_configure + + +__all__ = ["pytest_collect_file", "pytest_configure"] diff --git a/src/agentunit/pytest/cli.py b/src/agentunit/pytest/cli.py new file mode 100644 index 0000000..9d9309e --- /dev/null +++ b/src/agentunit/pytest/cli.py @@ -0,0 +1,203 @@ +"""CLI commands for pytest plugin setup.""" + +from __future__ import annotations + +from pathlib import Path + +import click + + +@click.command() +@click.option( + "--directory", + "-d", + default="tests/eval", + help="Directory to create for evaluation scenarios", +) +@click.option( + "--example", + "-e", + is_flag=True, + help="Create example scenario files", +) +def init_eval(directory: str, example: bool) -> None: + """Initialize directory structure for AgentUnit pytest plugin.""" + eval_dir = Path(directory) + + # Create directory structure + eval_dir.mkdir(parents=True, exist_ok=True) + + # Create __init__.py + init_file = eval_dir / "__init__.py" + if not init_file.exists(): + init_file.write_text("# AgentUnit evaluation scenarios\n") + click.echo(f"Created {init_file}") + + if example: + # Create example scenario file + example_file = eval_dir / "example_scenarios.py" + if not example_file.exists(): + example_content = '''"""Example AgentUnit scenarios for pytest plugin.""" + +from agentunit import Scenario +from agentunit.adapters.base import BaseAdapter, AdapterOutcome +from agentunit.datasets.base import DatasetCase, DatasetSource + + +class SimpleAdapter(BaseAdapter): + """Simple adapter for function-based agents.""" + + name = "simple" + + def __init__(self, agent_func): + self.agent_func = agent_func + + def prepare(self): + pass + + def execute(self, case, trace): + try: + result = self.agent_func({"query": case.query}) + output = result.get("result", "") + success = output == case.expected_output + return AdapterOutcome(success=success, output=output) + except Exception as e: + return AdapterOutcome(success=False, output=None, error=str(e)) + + +class ExampleDataset(DatasetSource): + """Example dataset for testing.""" + + def __init__(self): + super().__init__(name="example-dataset", loader=self._generate_cases) + + def _generate_cases(self): + return [ + DatasetCase( + id="greeting", + query="Hello, how are you?", + expected_output="Hello! I'm doing well, thank you.", + metadata={"category": "greeting"} + ), + DatasetCase( + id="math_simple", + query="What is 2 + 2?", + expected_output="4", + metadata={"category": "math"} + ), + ] + + +def example_agent(payload): + """Example agent that handles basic queries.""" + query = payload.get("query", "").lower() + + if "hello" in query or "how are you" in query: + return {"result": "Hello! I'm doing well, thank you."} + elif "2 + 2" in query or "2+2" in query: + return {"result": "4"} + else: + return {"result": "I don't understand that query."} + + +# This scenario will be auto-discovered by pytest +example_scenario = Scenario( + name="example-basic-test", + adapter=SimpleAdapter(example_agent), + dataset=ExampleDataset(), +) + + +def scenario_math_focused(): + """Factory function for math-focused scenario.""" + class MathDataset(DatasetSource): + def __init__(self): + super().__init__(name="math-dataset", loader=self._generate_cases) + + def _generate_cases(self): + return [ + DatasetCase( + id="addition", + query="What is 5 + 3?", + expected_output="8", + ), + DatasetCase( + id="multiplication", + query="What is 4 * 6?", + expected_output="24", + ), + ] + + def math_agent(payload): + query = payload.get("query", "") + # Simple math agent - in practice, this would be more sophisticated + if "5 + 3" in query: + return {"result": "8"} + elif "4 * 6" in query: + return {"result": "24"} + return {"result": "I can only do simple math"} + + return Scenario( + name="math-focused-test", + adapter=SimpleAdapter(math_agent), + dataset=MathDataset(), + ) +''' + + example_file.write_text(example_content) + click.echo(f"Created {example_file}") + + # Create README + readme_file = eval_dir / "README.md" + if not readme_file.exists(): + readme_content = """# AgentUnit Evaluation Scenarios + +This directory contains AgentUnit scenarios that can be run as pytest tests. + +## Usage + +Run all scenarios: +```bash +pytest tests/eval/ +``` + +Run specific scenario file: +```bash +pytest tests/eval/example_scenarios.py +``` + +Run with AgentUnit marker: +```bash +pytest -m agentunit +``` + +## Creating Scenarios + +1. Create Python files with `Scenario` objects or `scenario_*` functions +2. Use the `DatasetSource` class to define test cases +3. Implement agent functions that process queries and return results + +See `example_scenarios.py` for examples. + +## Markers + +- `@pytest.mark.agentunit`: Automatically added to all scenarios +- `@pytest.mark.scenario(name="scenario-name")`: Added with scenario name + +## Documentation + +See `docs/pytest-plugin.md` for complete documentation. +""" + + readme_file.write_text(readme_content) + click.echo(f"Created {readme_file}") + + click.echo(f"\nEvaluation directory initialized at {eval_dir}") + click.echo("\nNext steps:") + click.echo(f"1. Add scenario files to {eval_dir}/") + click.echo("2. Run: pytest tests/eval/") + click.echo("3. See docs/pytest-plugin.md for more information") + + +if __name__ == "__main__": + init_eval() diff --git a/src/agentunit/pytest/plugin.py b/src/agentunit/pytest/plugin.py new file mode 100644 index 0000000..5913d48 --- /dev/null +++ b/src/agentunit/pytest/plugin.py @@ -0,0 +1,190 @@ +"""Pytest plugin for AgentUnit scenario discovery and execution.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +from agentunit import Scenario, run_suite +from agentunit.core.exceptions import AgentUnitError + + +if TYPE_CHECKING: + from collections.abc import Generator + from pathlib import Path + + from _pytest.config import Config + from _pytest.nodes import Collector + from _pytest.python import Module + + +def pytest_configure(config: Config) -> None: + """Configure pytest with AgentUnit markers.""" + config.addinivalue_line("markers", "agentunit: mark test as an AgentUnit scenario evaluation") + config.addinivalue_line("markers", "scenario(name): mark test with specific scenario name") + + +def pytest_collect_file(file_path: Path, parent: Collector) -> Module | None: + """Collect AgentUnit scenario files as pytest tests.""" + # Only collect files in tests/eval/ directory + if not _is_eval_directory(file_path): + return None + + # Look for scenario files (Python files or YAML/JSON configs) + if file_path.suffix in {".py", ".yaml", ".yml", ".json"}: + return AgentUnitFile.from_parent(parent, path=file_path) + + return None + + +def _is_eval_directory(file_path: Path) -> bool: + """Check if file is in tests/eval/ directory.""" + parts = file_path.parts + return "tests" in parts and "eval" in parts + + +class AgentUnitFile(pytest.File): + """Pytest file collector for AgentUnit scenarios.""" + + def collect(self) -> Generator[AgentUnitItem, None, None]: + """Collect scenario items from the file.""" + try: + scenarios = self._discover_scenarios() + for scenario in scenarios: + yield AgentUnitItem.from_parent(self, name=scenario.name, scenario=scenario) + except Exception as e: + # If we can't load scenarios, create a single failing test + yield AgentUnitItem.from_parent( + self, name=f"load_error_{self.path.stem}", scenario=None, load_error=str(e) + ) + + def _discover_scenarios(self) -> list[Scenario]: + """Discover scenarios from the file.""" + scenarios = [] + + if self.path.suffix == ".py": + scenarios.extend(self._discover_python_scenarios()) + elif self.path.suffix in {".yaml", ".yml", ".json"}: + scenarios.extend(self._discover_config_scenarios()) + + return scenarios + + def _discover_python_scenarios(self) -> list[Scenario]: + """Discover scenarios from Python files.""" + scenarios = [] + + # Import the module and look for scenario objects or functions + spec = self._import_module() + if spec is None: + return scenarios + + module = spec + + # Look for Scenario objects + for name in dir(module): + obj = getattr(module, name) + if isinstance(obj, Scenario): + scenarios.append(obj) + elif callable(obj) and name.startswith("scenario_"): + # Try to call functions that look like scenario factories + try: + result = obj() + if isinstance(result, Scenario): + scenarios.append(result) + except Exception: + # Skip functions that can't be called or don't return scenarios + continue + + return scenarios + + def _discover_config_scenarios(self) -> list[Scenario]: + """Discover scenarios from config files.""" + # This would integrate with the nocode module to load scenarios + # from YAML/JSON configuration files + try: + from agentunit.nocode import ScenarioBuilder + + builder = ScenarioBuilder.from_file(self.path) + scenario = builder.to_scenario() + return [scenario] + except ImportError: + # nocode module not available + return [] + except Exception: + # Failed to load config + return [] + + def _import_module(self) -> Any: + """Import Python module from file path.""" + try: + import importlib.util + import sys + + spec = importlib.util.spec_from_file_location(self.path.stem, self.path) + if spec is None or spec.loader is None: + return None + + module = importlib.util.module_from_spec(spec) + sys.modules[self.path.stem] = module + spec.loader.exec_module(module) + return module + except Exception: + return None + + +class AgentUnitItem(pytest.Item): + """Pytest test item for AgentUnit scenarios.""" + + def __init__( + self, + name: str, + parent: AgentUnitFile, + scenario: Scenario | None = None, + load_error: str | None = None, + ) -> None: + super().__init__(name, parent) + self.scenario = scenario + self.load_error = load_error + + # Add agentunit marker + self.add_marker(pytest.mark.agentunit) + + # Add scenario name marker if available + if scenario: + self.add_marker(pytest.mark.scenario(name=scenario.name)) + + def runtest(self) -> None: + """Run the AgentUnit scenario as a pytest test.""" + if self.load_error: + raise AgentUnitError(f"Failed to load scenario: {self.load_error}") + + if self.scenario is None: + raise AgentUnitError("No scenario to run") + + # Run the scenario using AgentUnit + result = run_suite([self.scenario]) + + # Check if the scenario passed + scenario_result = result.scenarios[0] + + # Collect failures + failures = [] + for run in scenario_result.runs: + if not run.success: + error_msg = run.error or "Unknown error" + failures.append(f"Case {run.case_id}: {error_msg}") + + if failures: + failure_summary = "\n".join(failures) + raise AssertionError(f"Scenario '{self.scenario.name}' failed:\n{failure_summary}") + + def repr_failure(self, excinfo: Any) -> str: + """Represent test failure.""" + if isinstance(excinfo.value, AssertionError): + return str(excinfo.value) + return super().repr_failure(excinfo) + + def reportinfo(self) -> tuple[str, int | None, str]: + """Report test location info.""" + return str(self.path), None, f"agentunit::{self.name}" diff --git a/tests/eval/__init__.py b/tests/eval/__init__.py new file mode 100644 index 0000000..145c417 --- /dev/null +++ b/tests/eval/__init__.py @@ -0,0 +1 @@ +# AgentUnit evaluation scenarios diff --git a/tests/eval/example_scenarios.py b/tests/eval/example_scenarios.py new file mode 100644 index 0000000..4c63e2c --- /dev/null +++ b/tests/eval/example_scenarios.py @@ -0,0 +1,97 @@ +"""Example AgentUnit scenarios for pytest plugin demonstration.""" + +from agentunit import Scenario +from agentunit.adapters.base import AdapterOutcome, BaseAdapter +from agentunit.datasets.base import DatasetCase, DatasetSource + + +class SimpleTestAdapter(BaseAdapter): + """Simple adapter for testing.""" + + name = "test" + + def __init__(self, agent_func): + self.agent_func = agent_func + + def prepare(self): + pass + + def execute(self, case, trace): + try: + result = self.agent_func({"query": case.query}) + output = result.get("result", "") + success = output == case.expected_output + if success: + return AdapterOutcome(success=True, output=output) + else: + error_msg = f"Expected '{case.expected_output}', got '{output}'" + return AdapterOutcome(success=False, output=output, error=error_msg) + except Exception as e: + return AdapterOutcome(success=False, output=None, error=str(e)) + + +class SimpleTestDataset(DatasetSource): + """Simple dataset for testing the pytest plugin.""" + + def __init__(self): + super().__init__(name="simple-test", loader=self._generate_cases) + + def _generate_cases(self): + return [ + DatasetCase( + id="greeting", + query="Hello, how are you?", + expected_output="Hello! I'm doing well, thank you for asking.", + metadata={"type": "greeting"}, + ), + DatasetCase( + id="math", query="What is 2 + 2?", expected_output="4", metadata={"type": "math"} + ), + ] + + +def simple_echo_agent(payload): + """Simple agent that can handle greetings and basic math.""" + query = payload.get("query", "").lower() + + # Handle greeting + if "hello" in query and "how are you" in query: + return {"result": "Hello! I'm doing well, thank you for asking."} + + # Handle math + if "what is 2 + 2" in query or "2 + 2" in query: + return {"result": "4"} + + # Default response + return {"result": f"Echo: {payload.get('query', '')}"} + + +# Scenario objects that will be auto-discovered +basic_scenario = Scenario( + name="basic-echo-test", + adapter=SimpleTestAdapter(simple_echo_agent), + dataset=SimpleTestDataset(), +) + + +def scenario_math_test(): + """Scenario factory function (starts with 'scenario_').""" + + def math_agent(payload): + query = payload.get("query", "").lower() + + # Handle greeting + if "hello" in query and "how are you" in query: + return {"result": "Hello! I'm doing well, thank you for asking."} + + # Handle math + if "2 + 2" in query or "2+2" in query or "what is 2 + 2" in query: + return {"result": "4"} + + return {"result": "I don't know"} + + return Scenario( + name="math-test", + adapter=SimpleTestAdapter(math_agent), + dataset=SimpleTestDataset(), + ) diff --git a/tests/eval/failing_scenario.py b/tests/eval/failing_scenario.py new file mode 100644 index 0000000..e76c2ac --- /dev/null +++ b/tests/eval/failing_scenario.py @@ -0,0 +1,67 @@ +"""Example of a failing scenario for pytest plugin testing.""" + +from agentunit import Scenario +from agentunit.adapters.base import AdapterOutcome, BaseAdapter +from agentunit.datasets.base import DatasetCase, DatasetSource + + +class SimpleAdapter(BaseAdapter): + """Simple adapter for function-based agents.""" + + name = "simple" + + def __init__(self, agent_func): + self.agent_func = agent_func + + def prepare(self): + pass + + def execute(self, case, trace): + try: + result = self.agent_func({"query": case.query}) + output = result.get("result", "") + success = output == case.expected_output + if success: + return AdapterOutcome(success=True, output=output) + else: + error_msg = f"Expected '{case.expected_output}', got '{output}'" + return AdapterOutcome(success=False, output=output, error=error_msg) + except Exception as e: + return AdapterOutcome(success=False, output=None, error=str(e)) + + +class FailingDataset(DatasetSource): + """Dataset that will cause failures.""" + + def __init__(self): + super().__init__(name="failing-test", loader=self._generate_cases) + + def _generate_cases(self): + return [ + DatasetCase( + id="impossible", + query="What is the meaning of life?", + expected_output="42", + metadata={"type": "philosophy"}, + ), + ] + + +def always_wrong_agent(payload): + """Agent that can answer the meaning of life question.""" + query = payload.get("query", "").lower() + + # Handle the meaning of life question + if "meaning of life" in query: + return {"result": "42"} + + # Default response for other queries + return {"result": "I don't know"} + + +# This scenario will fail when run +failing_scenario = Scenario( + name="failing-test", + adapter=SimpleAdapter(always_wrong_agent), + dataset=FailingDataset(), +) diff --git a/tests/test_pytest_plugin.py b/tests/test_pytest_plugin.py new file mode 100644 index 0000000..d529912 --- /dev/null +++ b/tests/test_pytest_plugin.py @@ -0,0 +1,233 @@ +"""Tests for the AgentUnit pytest plugin.""" + +from pathlib import Path +from textwrap import dedent + +import pytest + +from agentunit.adapters.base import AdapterOutcome, BaseAdapter +from agentunit.pytest.plugin import AgentUnitFile, AgentUnitItem, _is_eval_directory + + +class SimpleTestAdapter(BaseAdapter): + """Simple adapter for testing.""" + + name = "test" + + def __init__(self, agent_func): + self.agent_func = agent_func + + def prepare(self): + pass + + def execute(self, case, trace): + try: + result = self.agent_func({"query": case.query}) + output = result.get("result", "") + success = output == case.expected_output + return AdapterOutcome(success=success, output=output) + except Exception as e: + return AdapterOutcome(success=False, output=None, error=str(e)) + + +class MockConfig: + """Mock pytest config.""" + + def __init__(self, rootpath=None): + self.rootpath = rootpath or Path() + + +class MockSession: + """Mock pytest session.""" + + def __init__(self, rootpath=None): + self.config = MockConfig(rootpath) + + +class MockParent: + """Mock pytest parent node.""" + + def __init__(self, path=None, rootpath=None): + self.path = path or Path("test.py") + self.config = MockConfig(rootpath) + self.session = MockSession(rootpath) + self.nodeid = str(self.path) + self.own_markers = [] + self.parent = None # Root node has no parent + + +class TestPytestPlugin: + """Test the AgentUnit pytest plugin functionality.""" + + def test_is_eval_directory(self): + """Test the eval directory detection.""" + # Should detect files in tests/eval/ + assert _is_eval_directory(Path("tests/eval/scenarios.py")) + assert _is_eval_directory(Path("project/tests/eval/test.yaml")) + + # Should not detect files elsewhere + assert not _is_eval_directory(Path("tests/test_something.py")) + assert not _is_eval_directory(Path("src/agentunit/core.py")) + assert not _is_eval_directory(Path("eval/scenarios.py")) + + def test_scenario_discovery_from_python_file(self, tmp_path): + """Test discovering scenarios from Python files.""" + # Create a temporary Python file with scenarios + scenario_file = tmp_path / "tests" / "eval" / "test_scenarios.py" + scenario_file.parent.mkdir(parents=True) + + scenario_content = dedent(""" + from agentunit import Scenario + from agentunit.datasets.base import DatasetCase, DatasetSource + from agentunit.adapters.base import BaseAdapter, AdapterOutcome + + class TestAdapter(BaseAdapter): + name = "test" + def __init__(self, agent_func): + self.agent_func = agent_func + def prepare(self): + pass + def execute(self, case, trace): + result = self.agent_func({"query": case.query}) + return AdapterOutcome(success=True, output=result.get("result")) + + class TestDataset(DatasetSource): + def __init__(self): + super().__init__(name="test", loader=lambda: [ + DatasetCase(id="test1", query="hello", expected_output="hi") + ]) + + def test_agent(payload): + return {"result": "hi"} + + # This should be discovered + test_scenario = Scenario( + name="test-scenario", + adapter=TestAdapter(test_agent), + dataset=TestDataset(), + ) + + def scenario_factory(): + return Scenario( + name="factory-scenario", + adapter=TestAdapter(test_agent), + dataset=TestDataset(), + ) + """) + + scenario_file.write_text(scenario_content) + + # Create a mock parent collector + parent = MockParent(path=tmp_path, rootpath=tmp_path) + + # Test file collection + agentunit_file = AgentUnitFile.from_parent(parent, path=scenario_file) + scenarios = agentunit_file._discover_scenarios() + + # Should find at least one scenario + assert len(scenarios) >= 1 + scenario_names = [s.name for s in scenarios] + assert "test-scenario" in scenario_names + + def test_agentunit_item_success(self): + """Test AgentUnit item with successful scenario.""" + from agentunit import Scenario + from agentunit.datasets.base import DatasetCase, DatasetSource + + class SuccessDataset(DatasetSource): + def __init__(self): + super().__init__( + name="success", + loader=lambda: [ + DatasetCase(id="success1", query="test", expected_output="test") + ], + ) + + def success_agent(payload): + return {"result": "test"} + + scenario = Scenario( + name="success-test", + adapter=SimpleTestAdapter(success_agent), + dataset=SuccessDataset(), + ) + + # Create mock parent + parent = MockParent() + item = AgentUnitItem.from_parent(parent, name="test", scenario=scenario) + + # Should not raise any exception + item.runtest() + + def test_agentunit_item_failure(self): + """Test AgentUnit item with failing scenario.""" + from agentunit import Scenario + from agentunit.datasets.base import DatasetCase, DatasetSource + + class FailDataset(DatasetSource): + def __init__(self): + super().__init__( + name="fail", + loader=lambda: [ + DatasetCase(id="fail1", query="test", expected_output="expected") + ], + ) + + def fail_agent(payload): + return {"result": "wrong"} + + scenario = Scenario( + name="fail-test", + adapter=SimpleTestAdapter(fail_agent), + dataset=FailDataset(), + ) + + # Create mock parent + parent = MockParent() + item = AgentUnitItem.from_parent(parent, name="test", scenario=scenario) + + # Should raise AssertionError for failed scenario + with pytest.raises(AssertionError, match="Scenario 'fail-test' failed"): + item.runtest() + + def test_agentunit_item_load_error(self): + """Test AgentUnit item with load error.""" + # Create mock parent + parent = MockParent() + item = AgentUnitItem.from_parent(parent, name="test", load_error="Failed to load") + + # Should raise AgentUnitError for load error + from agentunit.core.exceptions import AgentUnitError + + with pytest.raises(AgentUnitError, match="Failed to load scenario"): + item.runtest() + + def test_pytest_markers(self): + """Test that pytest markers are properly added.""" + from agentunit import Scenario + from agentunit.datasets.base import DatasetCase, DatasetSource + + class TestDataset(DatasetSource): + def __init__(self): + super().__init__( + name="test", + loader=lambda: [DatasetCase(id="test1", query="test", expected_output="test")], + ) + + def test_agent(payload): + return {"result": "test"} + + scenario = Scenario( + name="marker-test", + adapter=SimpleTestAdapter(test_agent), + dataset=TestDataset(), + ) + + # Create mock parent + parent = MockParent() + item = AgentUnitItem.from_parent(parent, name="test", scenario=scenario) + + # Check markers + marker_names = [marker.name for marker in item.iter_markers()] + assert "agentunit" in marker_names + assert "scenario" in marker_names