diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..179c46d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,389 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +--- + +## Repository Purpose + +Kubernetes-native RAGAS-based agent evaluation system that executes test datasets via A2A protocol and publishes metrics via OTLP. Part of the Agentic Layer platform for automated agent testing and quality assurance. + +--- + +## Common Commands + +### Development Workflow + +```shell +# Install dependencies +uv sync + +# Run all quality checks (tests, mypy, bandit, ruff) +uv run poe check + +# Run unit tests only +uv run poe test + +# Run end-to-end tests (requires Tilt environment running) +uv run poe test_e2e + +# Code formatting and linting +uv run poe format # Format with Ruff +uv run poe lint # Lint and auto-fix with Ruff +uv run poe ruff # Both format and lint + +# Type checking and security +uv run poe mypy # Static type checking +uv run poe bandit # Security vulnerability scanning +``` + +### Local Development Environment + +```shell +# Start full Kubernetes environment (operators, agents, observability) +tilt up + +# Stop environment +tilt down + +# Required environment variable for local testing +export OPENAI_API_BASE="http://localhost:11001" # AI Gateway endpoint +export GOOGLE_API_KEY="your-api-key" # Required for Gemini models +``` + +### Running the 4-Phase Pipeline Locally + +```shell +# Phase 1: Download and convert dataset to RAGAS format +uv run python3 scripts/setup.py "http://localhost:11020/dataset.csv" + +# Phase 2: Execute queries through agent via A2A protocol +uv run python3 scripts/run.py "http://localhost:11010" + +# Phase 3: Evaluate responses using RAGAS metrics +uv run python3 scripts/evaluate.py gemini-2.5-flash-lite "faithfulness answer_relevancy" + +# Phase 4: Publish metrics to OTLP endpoint +uv run python3 scripts/publish.py "workflow-name" +``` + +### Testkube Execution + +```shell +# Run complete evaluation workflow in Kubernetes +kubectl testkube run testworkflow ragas-evaluation-workflow \ + --config datasetUrl="http://data-server.data-server:8000/dataset.csv" \ + --config agentUrl="http://weather-agent.sample-agents:8000" \ + --config metrics="nv_accuracy context_recall" \ + --config workflowName="Test-Run" \ + -n testkube + +# Watch workflow execution +kubectl testkube watch testworkflow ragas-evaluation-workflow -n testkube + +# Get workflow logs +kubectl testkube logs testworkflow ragas-evaluation-workflow -n testkube +``` + +### Docker Build + +```shell +# Build Docker image locally +make build + +# Run container locally +make run +``` + +--- + +## Architecture Overview + +### 4-Phase Evaluation Pipeline + +**Core Concept**: Sequential pipeline where each phase reads input from previous phase's output via shared `/app/data` volume. + +**Phase 1: Setup** (`scripts/setup.py`) +- **Input**: Dataset URL (CSV, JSON, or Parquet) +- **Output**: `data/datasets/ragas_dataset.jsonl` (RAGAS format) +- **Purpose**: Downloads external dataset, converts to RAGAS schema with `user_input`, `retrieved_contexts`, `reference` fields + +**Phase 2: Run** (`scripts/run.py`) +- **Input**: `data/datasets/ragas_dataset.jsonl` + Agent URL +- **Output**: `data/experiments/ragas_experiment.jsonl` (adds `response` field) +- **Purpose**: Sends each `user_input` to agent via A2A protocol using `a2a-sdk`, records agent responses + +**Phase 3: Evaluate** (`scripts/evaluate.py`) +- **Input**: `data/experiments/ragas_experiment.jsonl` + LLM model + metrics list +- **Output**: `data/results/evaluation_scores.json` +- **Purpose**: Calculates RAGAS metrics using LLM-as-a-judge via AI Gateway, tracks tokens and costs + +**Phase 4: Publish** (`scripts/publish.py`) +- **Input**: `data/results/evaluation_scores.json` + workflow name +- **Output**: Metrics published to OTLP endpoint +- **Purpose**: Sends evaluation results to observability backend (LGTM/Grafana) via OpenTelemetry + +### Data Flow + +``` +External Dataset (CSV/JSON/Parquet) + ↓ [setup.py] +data/datasets/ragas_dataset.jsonl + ↓ [run.py + A2A Client] +data/experiments/ragas_experiment.jsonl + ↓ [evaluate.py + RAGAS + AI Gateway] +data/results/evaluation_scores.json + ↓ [publish.py + OTLP] +Observability Backend (Grafana) +``` + +### Kubernetes Integration (Testkube) + +**Orchestration Pattern**: Each phase is a reusable `TestWorkflowTemplate` CRD that executes the same Docker image with different script arguments. + +**Shared State**: All phases mount the same `emptyDir` volume at `/app/data`, enabling stateless containers with persistent data flow between steps. + +**Template Files**: +- `deploy/base/templates/setup-template.yaml` - Phase 1 +- `deploy/base/templates/run-template.yaml` - Phase 2 +- `deploy/base/templates/evaluate-template.yaml` - Phase 3 +- `deploy/base/templates/publish-template.yaml` - Phase 4 +- `deploy/local/ragas-evaluation-workflow.yaml` - Combines all templates into complete workflow + +**Key Workflow Parameters**: +- `datasetUrl` - HTTP URL to test dataset +- `agentUrl` - A2A endpoint of agent to evaluate +- `model` - LLM model for RAGAS evaluation (e.g., `gemini-2.5-flash-lite`) +- `metrics` - Space-separated RAGAS metrics (e.g., `faithfulness context_recall`) +- `workflowName` - Label for published metrics +- `otlpEndpoint` - OpenTelemetry collector URL (default: `http://lgtm.monitoring:4318`) +- `image` - Docker image to use (default: `ghcr.io/agentic-layer/testbench/testworkflows:latest`) + +--- + +## Key Technology Integrations + +### RAGAS Framework +- **Purpose**: LLM-as-a-judge evaluation framework for RAG systems +- **Evaluation Approach**: Uses LLM to assess quality metrics beyond simple exact-match comparison +- **Available Metrics**: `faithfulness`, `answer_relevancy`, `context_precision`, `context_recall`, `nv_accuracy` +- **Cost Tracking**: Automatically tracks token usage and calculates evaluation costs +- **LLM Access**: Routes through AI Gateway (LiteLLM) configured via `OPENAI_API_BASE` environment variable + +### A2A Protocol (Agent-to-Agent) +- **Purpose**: Platform-agnostic JSON-RPC protocol for agent communication +- **Client Library**: `a2a-sdk` Python package +- **Usage in Testbench**: `run.py` uses `A2AClient` to send `user_input` prompts to agent's A2A endpoint +- **Response Handling**: Agent responses stored in `response` field of experiment JSONL + +### OpenTelemetry (OTLP) +- **Purpose**: Standard protocol for publishing observability data +- **Transport**: HTTP/protobuf to OTLP collector endpoint (port 4318) +- **Metrics Published**: Overall scores, individual results, token counts, costs +- **Labeling**: Each metric labeled with `workflowName` for filtering in Grafana + +### Tilt (Local Development) +- **Purpose**: Local Kubernetes development environment +- **What Gets Deployed**: + - Core operators: `agent-runtime` (v0.16.0), `ai-gateway-litellm` (v0.3.2), `agent-gateway-krakend` (v0.4.1) + - Test infrastructure: `testkube` (v2.4.2), sample `weather-agent`, `data-server` + - Observability: LGTM stack (Grafana, Loki, Tempo, Mimir) + - TestWorkflow templates and evaluation workflow +- **Port Forwards**: `11001` (AI Gateway), `11010` (Weather Agent), `11000` (Grafana), `11020` (Data Server) + +--- + +## Code Organization + +### Core Scripts (scripts/) +All scripts follow same pattern: parse arguments → read input file(s) → process → write output file + +- **`setup.py`**: Dataset download and conversion logic + - Supports CSV (with quoted array parsing), JSON, Parquet formats + - Validates required fields: `user_input`, `retrieved_contexts`, `reference` + - Creates parent directories if missing + +- **`run.py`**: Agent query execution + - Uses `A2AClient` from `a2a-sdk` for async HTTP requests + - Batch processes dataset entries + - Adds `response` field to each entry + +- **`evaluate.py`**: RAGAS metric calculation + - Configures LangChain OpenAI wrapper to use AI Gateway + - Instantiates RAGAS `SingleTurnSample` and `EvaluationDataset` + - Runs selected metrics, computes overall scores + - Extracts token usage and cost from callback handler + +- **`publish.py`**: OTLP metric publishing + - Converts evaluation scores to OpenTelemetry metrics + - Sends via HTTP to OTLP collector + - Uses workflow name as metric label + +### Test Organization + +**Unit Tests (`tests/`)**: +- One test file per script: `test_setup.py`, `test_run.py`, `test_evaluate.py`, `test_publish.py` +- Uses pytest with async support (`pytest-asyncio`) +- Mocks external dependencies: HTTP requests (`httpx.AsyncClient`), A2A client, RAGAS framework +- Uses `tmp_path` fixture for file I/O testing +- Test data samples in `tests/test_data/` + +**E2E Test (`tests_e2e/test_e2e.py`)**: +- Runs complete 4-phase pipeline in sequence +- Configurable via environment variables: `E2E_DATASET_URL`, `E2E_AGENT_URL`, `E2E_MODEL`, etc. +- Validates output files exist after each phase +- Requires Tilt environment running for dependencies + +### Deployment Manifests + +**Testkube Templates (`deploy/base/templates/`)**: +- Each template is a `TestWorkflowTemplate` CRD +- Defines container spec, volume mounts, command arguments +- Parameterized with `config.*` variables (e.g., `{{ config.datasetUrl }}`) + +**Local Development (`deploy/local/`)**: +- `ragas-evaluation-workflow.yaml` - Complete workflow definition +- `weather-agent.yaml` - Sample Agent CRD for testing +- `lgtm.yaml` - Grafana LGTM observability stack +- `data-server/` - ConfigMap with test datasets + Service for HTTP access + +--- + +## Development Guidelines + +### Testing Requirements +- **Never delete failing tests** - Either update tests to match correct implementation or fix code to pass tests +- **Unit tests must mock external dependencies** - No real HTTP calls, A2A clients, or LLM requests +- **E2E test validates file existence** - Doesn't validate content correctness (use unit tests for that) + +### Code Quality Standards +- **Line Length**: 120 characters max (Ruff) +- **Type Hints**: Required for all function signatures (mypy enforced) +- **Import Sorting**: Enabled via Ruff (I001 rule) +- **Security Scanning**: Bandit checks for vulnerabilities +- **Naming Conventions**: PEP 8 compliant (Ruff N rule) + +### Pre-commit Hooks +- Run automatically before commits via `.pre-commit-config.yaml` +- Enforces: Ruff formatting/linting, mypy, bandit +- Manual run: `pre-commit run --all-files` + +### Adding New RAGAS Metrics +1. Add metric import to `scripts/evaluate.py` +2. Update metric validation in argument parsing +3. Add to available metrics list in README +4. Add test cases in `tests/test_evaluate.py` with mocked metric + +### Modifying Data Flow +If changing intermediate file formats or locations: +1. Update corresponding script I/O logic +2. Update all dependent scripts (downstream phases) +3. Update TestWorkflowTemplate volume mount paths if needed +4. Update unit test mocks +5. Update E2E test file path validations + +--- + +## Common Debugging Scenarios + +### Local Pipeline Failures + +**Issue**: `setup.py` fails to download dataset +- **Check**: Dataset URL accessible from local machine +- **Check**: File format is CSV, JSON, or Parquet +- **Check**: Dataset contains required fields: `user_input`, `retrieved_contexts`, `reference` + +**Issue**: `run.py` fails to query agent +- **Check**: Agent URL is correct and agent is running (verify with `curl`) +- **Check**: Agent exposes A2A protocol endpoint +- **Check**: Network connectivity between testbench and agent + +**Issue**: `evaluate.py` fails with LLM errors +- **Check**: `OPENAI_API_BASE` points to AI Gateway (e.g., `http://localhost:11001`) +- **Check**: `GOOGLE_API_KEY` environment variable set +- **Check**: AI Gateway has access to specified model (check AI Gateway logs) + +**Issue**: `publish.py` fails to send metrics +- **Check**: OTLP endpoint is reachable +- **Check**: OTLP collector is running and accepting HTTP on port 4318 +- **Check**: Workflow name is valid (no special characters) + +### Testkube Workflow Failures + +**Issue**: Workflow stuck in "Queued" state +- **Check**: Testkube controller is running: `kubectl get pods -n testkube` +- **Check**: Sufficient cluster resources for workflow pods + +**Issue**: Workflow fails at specific step +- **Check step logs**: `kubectl testkube logs testworkflow ragas-evaluation-workflow -n testkube` +- **Check volume mounts**: Verify previous step wrote output file correctly +- **Check parameter values**: Ensure URLs and names are correct in workflow config + +**Issue**: Template not found errors +- **Check templates exist**: `kubectl get testworkflowtemplates -n testkube` +- **Reinstall templates**: `kubectl apply -f deploy/base/templates/ -n testkube` + +### Tilt Environment Issues + +**Issue**: Tilt fails to start operators +- **Check Kubernetes cluster**: `kubectl cluster-info` +- **Check tilt-extensions version**: Must be v0.6.0 or later in Tiltfile +- **Check .env file**: Must contain `GOOGLE_API_KEY` + +**Issue**: Port forward conflicts +- **Check ports available**: 11000, 11001, 11010, 11020 +- **Kill conflicting processes**: `lsof -ti:11001 | xargs kill` + +**Issue**: Agent not responding on port 11010 +- **Check agent status**: `kubectl get pods -n sample-agents` +- **Check agent logs**: `kubectl logs -n sample-agents deployment/weather-agent` + +--- + +## Cross-Repository Dependencies + +### Platform Operators (Required at Runtime) +- **agent-runtime-operator** (v0.16.0): Provides `Agent`, `ToolServer`, `AgenticWorkforce` CRDs +- **ai-gateway-litellm-operator** (v0.3.2): Provides `AiGateway` CRD for LLM access during evaluation +- **agent-gateway-krakend-operator** (v0.4.1): Provides `AgentGateway` CRD for routing (optional, only if using gateway) +- **tilt-extensions** (v0.6.0): Custom Tilt helpers for local operator installation + +### Version Sync Points +When operators update CRD schemas: +1. Verify YAML manifests in `deploy/local/` still valid +2. Update TestWorkflowTemplate CRDs if volume paths or parameters changed +3. Update Tiltfile with new operator versions +4. Test E2E pipeline with new operator versions + +### Agent Integration +Testbench can evaluate any agent that: +1. Exposes A2A protocol endpoint +2. Is deployed via `Agent` CRD or accessible HTTP endpoint +3. Returns text responses to text prompts + +Examples: `agent-samples/weather-agent`, showcase agents (`showcase-cross-selling`, `showcase-news`) + +--- + +## Important Constraints + +### RAGAS Metric Limitations +- Most metrics require `retrieved_contexts` field in dataset +- LLM-based metrics consume tokens and incur costs +- Evaluation speed depends on AI Gateway throughput and model latency +- Some metrics (e.g., `context_recall`) require `reference` ground truth + +### A2A Protocol Requirements +- Agents must implement A2A JSON-RPC specification +- Only supports text-based question-answering (no multi-modal, no streaming in evaluation) +- Response timeout configured in `a2a-sdk` client (default: 30s) + +### Kubernetes Resource Requirements +- TestWorkflows create pods that need persistent volume for shared data +- Each phase runs sequentially (no parallel execution of phases) +- Workflow pods cleaned up after completion (data persists in volume temporarily) + +### Data Privacy +- Datasets may contain sensitive information - ensure OTLP endpoints are secured +- Evaluation results include full prompts and responses - consider data retention policies +- AI Gateway logs may contain dataset content - review log retention settings diff --git a/README.md b/README.md index 692e088..46a7b24 100644 --- a/README.md +++ b/README.md @@ -161,16 +161,17 @@ uv sync # Required for evaluation - routes requests through our AI Gateway export OPENAI_API_BASE="http://localhost:11001" +export OPENAI_API_KEY="dummy-key-for-litellm" ``` #### Run the complete evaluation pipeline in 4 steps: ```shell # 1. Download and prepare dataset -uv run python3 scripts/setup.py "https://localhost:11020/dataset.csv" +uv run python3 scripts/setup.py "http://localhost:11020/dataset.csv" # 2. Execute queries through your agent -uv run python3 scripts/run.py "http://localhost:8000" +uv run python3 scripts/run.py "http://localhost:11010" # 3. Evaluate responses with RAGAS metrics uv run python3 scripts/evaluate.py gemini-2.5-flash-lite faithfulness answer_relevancy diff --git a/Tiltfile b/Tiltfile index 0b1b686..c852cb2 100644 --- a/Tiltfile +++ b/Tiltfile @@ -7,7 +7,7 @@ update_settings(max_parallel_updates=10) load('ext://dotenv', 'dotenv') dotenv() -v1alpha1.extension_repo(name='agentic-layer', url='https://github.com/agentic-layer/tilt-extensions', ref='v0.4.0') +v1alpha1.extension_repo(name='agentic-layer', url='https://github.com/agentic-layer/tilt-extensions', ref='v0.6.0') v1alpha1.extension(name='cert-manager', repo_name='agentic-layer', repo_path='cert-manager') load('ext://cert-manager', 'cert_manager_install') @@ -15,22 +15,23 @@ cert_manager_install() v1alpha1.extension(name='agent-runtime', repo_name='agentic-layer', repo_path='agent-runtime') load('ext://agent-runtime', 'agent_runtime_install') -agent_runtime_install(version='0.13.0') +agent_runtime_install(version='0.16.0') v1alpha1.extension(name='ai-gateway-litellm', repo_name='agentic-layer', repo_path='ai-gateway-litellm') load('ext://ai-gateway-litellm', 'ai_gateway_litellm_install') -ai_gateway_litellm_install(version='0.2.0') +ai_gateway_litellm_install(version='0.3.2') v1alpha1.extension(name='agent-gateway-krakend', repo_name='agentic-layer', repo_path='agent-gateway-krakend') load('ext://agent-gateway-krakend', 'agent_gateway_krakend_install') -agent_gateway_krakend_install(version='0.2.0') +agent_gateway_krakend_install(version='0.4.1') load('ext://helm_resource', 'helm_resource') helm_resource( 'testkube', 'oci://docker.io/kubeshop/testkube', namespace='testkube', - flags=['--version=2.4.2', '--create-namespace', '--values=deploy/local/testkube/values.yaml', '--wait', '--wait-for-jobs'], + flags=['--version=2.4.2', '--create-namespace', '--values=deploy/local/testkube/values.yaml', '--wait', + '--wait-for-jobs', '--timeout=10m'], ) # Apply Kubernetes manifests diff --git a/pyproject.toml b/pyproject.toml index 4ffb991..cbcab2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "opentelemetry-api>=1.20.0", "opentelemetry-sdk>=1.20.0", "opentelemetry-exporter-otlp-proto-http>=1.20.0", + "opentelemetry-instrumentation-httpx>=0.45b0", ] [dependency-groups] diff --git a/scripts/evaluate.py b/scripts/evaluate.py index cca7c79..9aff38a 100644 --- a/scripts/evaluate.py +++ b/scripts/evaluate.py @@ -91,25 +91,42 @@ def format_evaluation_scores( ragas_result: EvaluationResult, cost_per_input_token: float, cost_per_output_token: float, + experiment_file: str, ) -> EvaluationScores: """ Format the RAGAS evaluation results. Args: ragas_result: The result object from RAGAS evaluate() - experiment: Ragas Experiment containing the original results - metrics: List of metric names used + cost_per_input_token: Cost per input token + cost_per_output_token: Cost per output token + experiment_file: Path to experiment JSONL file (to extract trace_ids) Returns: Formatted dictionary matching the required structure """ + # Load trace_ids from experiment file (RAGAS drops custom fields during processing) + trace_ids = [] + with open(experiment_file, "r") as f: + for line in f: + data = json.loads(line) + trace_ids.append(data.get("trace_id")) + # Calculate overall scores (mean of each metric) overall_scores = ragas_result._repr_dict # Build individual results individual_results = ragas_result.to_pandas().to_dict(orient="records") + # Merge trace_ids back into individual_results (preserve by row order) + for i, result in enumerate(individual_results): + if i < len(trace_ids): + result["trace_id"] = trace_ids[i] + else: + logger.warning(f"No trace_id found for result {i}") + result["trace_id"] = None + # Extract token usage and calculate cost using TokenUsageParser # Check if token usage data was collected (some metrics don't use LLMs or use separate LLM instances) if ragas_result.cost_cb and hasattr(ragas_result.cost_cb, "usage_data") and ragas_result.cost_cb.usage_data: @@ -191,6 +208,7 @@ def main( ragas_result, cost_per_input_token=cost_per_input_token, cost_per_output_token=cost_per_output_token, + experiment_file="data/experiments/ragas_experiment.jsonl", ) # Ensure output directory exists diff --git a/scripts/otel_setup.py b/scripts/otel_setup.py new file mode 100644 index 0000000..0efa612 --- /dev/null +++ b/scripts/otel_setup.py @@ -0,0 +1,28 @@ +"""OpenTelemetry setup for testbench.""" + +import logging + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor + + +def setup_otel() -> None: + """Set up OpenTelemetry tracing for testbench.""" + + # Set log level for urllib to WARNING to reduce noise + logging.getLogger("urllib3").setLevel(logging.WARNING) + + # Traces + # Create resource with service name for proper trace identification + resource = Resource.create({"service.name": "testbench.run"}) + tracer_provider = trace_sdk.TracerProvider(resource=resource) + tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter())) + # Sets the global default tracer provider + trace.set_tracer_provider(tracer_provider) + + # Instrument HTTPX clients (this also transfers the trace context automatically) + HTTPXClientInstrumentor().instrument() diff --git a/scripts/publish.py b/scripts/publish.py index b1902f7..599b66b 100644 --- a/scripts/publish.py +++ b/scripts/publish.py @@ -57,7 +57,7 @@ def create_and_push_metrics(overall_scores: dict[str, float], workflow_name: str logger.info(f"Pushing metrics to OTLP endpoint at {otlp_endpoint}...") for metric_name, score in overall_scores.items(): - # Create a Gauge (using UpDownCounter as closest equivalent) + # Create a Gauge gauge = meter.create_gauge( name=f"ragas_evaluation_{metric_name}", description=f"Overall {metric_name} score from RAGAS evaluation", diff --git a/scripts/run.py b/scripts/run.py index 3b79de1..b93f592 100644 --- a/scripts/run.py +++ b/scripts/run.py @@ -14,9 +14,13 @@ Role, TextPart, ) +from opentelemetry import trace +from opentelemetry.trace import Status, StatusCode from pydantic import BaseModel from ragas import Dataset, experiment +from otel_setup import setup_otel + # Set up module-level logger logging.basicConfig(level=logging.INFO) logger: Logger = logging.getLogger(__name__) @@ -39,64 +43,95 @@ async def initialize_client(agent_url: str) -> Client: @experiment() -async def run_agent_experiment(row, agent_url: str) -> dict[str, str | list]: +async def run_agent_experiment(row, agent_url: str, workflow_name: str) -> dict[str, str | list]: """ Experiment function that processes each row from the dataset. Args: row: A dictionary containing 'user_input', 'retrieved_contexts', and 'reference' fields agent_url: The URL of the agent to query + workflow_name: Name of the test workflow for span labeling Returns: - Dictionary with original row data plus 'response' + Dictionary with original row data plus 'response' and 'trace_id' """ - try: - async with httpx.AsyncClient(): - client = await initialize_client(agent_url) - - # Get the input from the row - input_text = row.get("user_input") - - message = Message( - role=Role.user, - parts=[Part(TextPart(text=input_text))], - message_id=uuid4().hex, - ) - - logger.info(f"Processing: {input_text}") - - async for response in client.send_message(message): - # Client returns tuples, extract the task/message - if isinstance(response, tuple): - task, _ = response - if task: - artifacts: list = task.model_dump(mode="json", include={"artifacts"}).get("artifacts", []) - - # Extract the model response - if artifacts and artifacts[0].get("parts"): - output_text = artifacts[0]["parts"][0].get("text", "") - else: - logger.warning("No text found in artifacts") - else: - logger.warning(f"Unexpected response: {response}") - - except Exception as e: - logger.error(f'Error processing input "{row.get("user_input")}": {str(e)}') - output_text = f"ERROR: {str(e)}" - - # Return the original row data plus the results - result: dict[str, str | list] = { - **row, - "response": output_text, - } - - return result - - -async def main(agent_url: str) -> None: + # Get tracer for creating spans + tracer = trace.get_tracer("testbench.run") + + # Create span for this test case + # Span name includes user_input preview for debugging + user_input_preview = row.get("user_input", "")[:50] + span_name = f"query_agent: {user_input_preview}" + + with tracer.start_as_current_span(span_name) as span: + # Extract Trace ID from current span context + span_context = span.get_span_context() + trace_id = format(span_context.trace_id, "032x") # 32-char hex string + + # Add span attributes for filtering/debugging in Tempo UI + span.set_attribute("test.user_input", row.get("user_input", "")) + span.set_attribute("test.reference", row.get("reference", "")) + span.set_attribute("agent.url", agent_url) + span.set_attribute("workflow.name", workflow_name) + + try: + async with httpx.AsyncClient(): + client = await initialize_client(agent_url) + + # Get the input from the row + input_text = row.get("user_input") + + message = Message( + role=Role.user, + parts=[Part(TextPart(text=input_text))], + message_id=uuid4().hex, + ) + + logger.info(f"Processing: {input_text}") + + async for response in client.send_message(message): + # Client returns tuples, extract the task/message + if isinstance(response, tuple): + task, _ = response + if task: + artifacts: list = task.model_dump(mode="json", include={"artifacts"}).get("artifacts", []) + + # Extract the model response + if artifacts and artifacts[0].get("parts"): + output_text = artifacts[0]["parts"][0].get("text", "") + else: + logger.warning("No text found in artifacts") + else: + logger.warning(f"Unexpected response: {response}") + + # Mark span as successful + span.set_status(Status(StatusCode.OK)) + + except Exception as e: + logger.error(f'Error processing input "{row.get("user_input")}": {str(e)}') + output_text = f"ERROR: {str(e)}" + + # Record exception in span for debugging + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, description=str(e))) + + # Return the original row data plus results AND trace_id + result: dict[str, str | list] = { + **row, + "response": output_text, + "trace_id": trace_id, + } + + return result + + +async def main(agent_url: str, workflow_name: str) -> None: """Main function to load Ragas Dataset and run Experiment.""" + # Initialize OpenTelemetry tracing + setup_otel() + # Load existing Ragas dataset logger.info("Loading Ragas dataset from data/datasets/ragas_dataset.jsonl") dataset: Dataset[BaseModel] = Dataset.load(name="ragas_dataset", backend="local/jsonl", root_dir="./data") @@ -104,19 +139,25 @@ async def main(agent_url: str) -> None: # Run the experiment logger.info("Starting experiment...") - await run_agent_experiment.arun(dataset, name="ragas_experiment", agent_url=agent_url) + await run_agent_experiment.arun(dataset, name="ragas_experiment", agent_url=agent_url, workflow_name=workflow_name) logger.info("Experiment completed successfully") logger.info("Results saved to data/experiments/ragas_experiment.jsonl") if __name__ == "__main__": - # Parse parameter the script was called with (URL) + # Parse parameters the script was called with parser = argparse.ArgumentParser( description="Runs all queries from the Ragas dataset through the agent at the provided URL" ) parser.add_argument("url", help="URL to agent") + parser.add_argument( + "workflow_name", + nargs="?", + default="local-test", + help="Name of the test workflow (e.g., 'weather-assistant-test'). Default: 'local-test'", + ) args = parser.parse_args() - # Call main using the parsed URL - asyncio.run(main(args.url)) + # Call main with parsed arguments + asyncio.run(main(args.url, args.workflow_name)) diff --git a/scripts/setup.py b/scripts/setup.py index 0a6c512..351f0bb 100644 --- a/scripts/setup.py +++ b/scripts/setup.py @@ -58,7 +58,8 @@ def get_converter(url: str) -> Callable[[BytesIO], DataFrame]: def custom_convert_csv(input_file: BytesIO) -> DataFrame: """ - Converts a CSV input file to a Pandas DataFrame and, if it exists, turns 'retrieved_contexts' into a list (RAGAS requires 'retrieved_contexts' as a list of strings) + Converts a CSV input file to a Pandas DataFrame and, if it exists, turns 'retrieved_contexts' into a list + (RAGAS requires 'retrieved_contexts' as a list of strings) Args: input_file: The CSV input_file diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index 738d670..e9ce438 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -47,6 +47,7 @@ def experiment_data(temp_dir): "retrieved_contexts": ["Context about weather"], "reference": "Expected answer", "response": "The weather is sunny.", + "trace_id": "a1b2c3d4e5f6789012345678901234ab", } ] @@ -58,9 +59,20 @@ def experiment_data(temp_dir): # TestFormatEvaluationScores tests -def test_overall_scores_calculation(): +def test_overall_scores_calculation(tmp_path): """Test that overall scores are calculated correctly""" + # Create temporary experiment file with trace_ids + experiment_file = tmp_path / "experiment.jsonl" + test_data = [ + {"trace_id": "trace1"}, + {"trace_id": "trace2"}, + {"trace_id": "trace3"}, + ] + with open(experiment_file, "w") as f: + for item in test_data: + f.write(json.dumps(item) + "\n") + # Create mock result class MockTokenUsage: input_tokens = 100 @@ -81,16 +93,26 @@ def total_cost(self): mock_result = MockResult() - formatted = format_evaluation_scores(mock_result, 5.0 / 1e6, 15.0 / 1e6) + formatted = format_evaluation_scores(mock_result, 5.0 / 1e6, 15.0 / 1e6, str(experiment_file)) # Verify overall scores are correct assert abs(formatted.overall_scores["faithfulness"] - 0.8) < 0.01 assert abs(formatted.overall_scores["answer_relevancy"] - 0.75) < 0.01 -def test_individual_results_present(): +def test_individual_results_present(tmp_path): """Test that individual results are included""" + # Create temporary experiment file with trace_ids + experiment_file = tmp_path / "experiment.jsonl" + test_data = [ + {"trace_id": "trace1"}, + {"trace_id": "trace2"}, + ] + with open(experiment_file, "w") as f: + for item in test_data: + f.write(json.dumps(item) + "\n") + # Create mock result class MockTokenUsage: input_tokens = 100 @@ -111,15 +133,22 @@ def total_cost(self): mock_result = MockResult() - formatted = format_evaluation_scores(mock_result, 5.0 / 1e6, 15.0 / 1e6) + formatted = format_evaluation_scores(mock_result, 5.0 / 1e6, 15.0 / 1e6, str(experiment_file)) # Verify individual results assert len(formatted.individual_results) == 2 -def test_token_usage_placeholders(): +def test_token_usage_placeholders(tmp_path): """Test that token usage placeholders are returned when cost_cb is None""" + # Create temporary experiment file with trace_id + experiment_file = tmp_path / "experiment.jsonl" + test_data = [{"trace_id": "trace1"}] + with open(experiment_file, "w") as f: + for item in test_data: + f.write(json.dumps(item) + "\n") + # Create mock result class MockTokenUsage: input_tokens = 150 @@ -140,7 +169,7 @@ def total_cost(self, **kwargs): mock_result = MockResult() - formatted = format_evaluation_scores(mock_result, 5.0 / 1e6, 15.0 / 1e6) + formatted = format_evaluation_scores(mock_result, 5.0 / 1e6, 15.0 / 1e6, str(experiment_file)) # Verify placeholder token usage is returned (0 when cost_cb is None) assert formatted.total_tokens["input_tokens"] == 0 @@ -148,6 +177,54 @@ def total_cost(self, **kwargs): assert formatted.total_cost == 0.0 +def test_trace_id_preservation(tmp_path): + """Test that trace_ids from experiment file are preserved in individual_results""" + + # Create temporary experiment file with trace_ids + experiment_file = tmp_path / "experiment.jsonl" + test_data = [ + {"trace_id": "a1b2c3d4e5f6789012345678901234ab"}, + {"trace_id": "b2c3d4e5f6789012345678901234abc2"}, + {"trace_id": "c3d4e5f6789012345678901234abc34"}, + ] + with open(experiment_file, "w") as f: + for item in test_data: + f.write(json.dumps(item) + "\n") + + # Create mock result + class MockResult: + _repr_dict = {"faithfulness": 0.85} + cost_cb = None + + def to_pandas(self): + return pd.DataFrame( + { + "user_input": ["Q1", "Q2", "Q3"], + "faithfulness": [0.9, 0.8, 0.85], + } + ) + + def total_tokens(self): + class MockTokenUsage: + input_tokens = 100 + output_tokens = 50 + + return MockTokenUsage() + + def total_cost(self, **kwargs): + return 0.001 + + mock_result = MockResult() + + formatted = format_evaluation_scores(mock_result, 5.0 / 1e6, 15.0 / 1e6, str(experiment_file)) + + # Verify trace_ids are preserved in individual results + assert len(formatted.individual_results) == 3 + assert formatted.individual_results[0]["trace_id"] == "a1b2c3d4e5f6789012345678901234ab" + assert formatted.individual_results[1]["trace_id"] == "b2c3d4e5f6789012345678901234abc2" + assert formatted.individual_results[2]["trace_id"] == "c3d4e5f6789012345678901234abc34" + + # TestMain tests def test_main_no_metrics(experiment_data): """Test main function with no metrics provided""" diff --git a/tests/test_publish.py b/tests/test_publish.py index bc215f6..9eab789 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -17,6 +17,36 @@ from publish import create_and_push_metrics, get_overall_scores, publish_metrics +# Mock classes for OpenTelemetry meter provider (used by HTTPXClientInstrumentor) +# Use underscore prefix to avoid naming conflicts with test-specific mock classes +class _OtelMockMeter: + """Mock meter for instrumentation""" + + def create_counter(self, name, **kwargs): + return _OtelMockCounter() + + def create_histogram(self, name, **kwargs): + return _OtelMockHistogram() + + def create_gauge(self, name, **kwargs): + return _OtelMockGauge() + + +class _OtelMockCounter: + def add(self, amount, attributes=None): + pass + + +class _OtelMockHistogram: + def record(self, amount, attributes=None): + pass + + +class _OtelMockGauge: + def set(self, value, attributes=None): + pass + + # Fixtures @pytest.fixture def temp_dir(): @@ -117,6 +147,10 @@ def force_flush(self): def shutdown(self): pass + def get_meter(self, name, version=None, schema_url=None, attributes=None): + """Return a mock meter that HTTPXClientInstrumentor can use""" + return _OtelMockMeter() + def mock_provider_init(**kwargs): return MockProvider() @@ -175,6 +209,10 @@ def force_flush(self): def shutdown(self): pass + def get_meter(self, name, version=None, schema_url=None, attributes=None): + """Return a mock meter that HTTPXClientInstrumentor can use""" + return _OtelMockMeter() + def mock_provider_init(**kwargs): return MockProvider() @@ -404,6 +442,10 @@ def force_flush(self): def shutdown(self): pass + def get_meter(self, name, version=None, schema_url=None, attributes=None): + """Return a mock meter that HTTPXClientInstrumentor can use""" + return _OtelMockMeter() + def mock_provider_init(**kwargs): return MockProvider() diff --git a/tests/test_run.py b/tests/test_run.py index fce093b..bfbbe60 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -110,7 +110,11 @@ def mock_httpx_client(): } # Call the function - result = await run_agent_experiment.func(test_row, agent_url="http://test-agent:8000") + result = await run_agent_experiment.func( + test_row, + agent_url="http://test-agent:8000", + workflow_name="test-workflow" + ) # Verify result structure assert "user_input" in result @@ -151,7 +155,11 @@ def mock_httpx_client(): } # Call the function - result = await run_agent_experiment.func(test_row, agent_url="http://test-agent:8000") + result = await run_agent_experiment.func( + test_row, + agent_url="http://test-agent:8000", + workflow_name="test-workflow" + ) # Verify error is captured in response assert "response" in result @@ -202,12 +210,15 @@ async def mock_arun_tracked(*args, **kwargs): monkeypatch.setattr("run.run_agent_experiment.arun", mock_arun_tracked) # Run main - await main("http://test-agent:8000") + await main("http://test-agent:8000", "test-workflow") # Verify Dataset.load was called assert len(calls_to_load) == 1 # Verify experiment was run assert len(calls_to_arun) == 1 + + # Verify workflow_name was passed through to arun + assert calls_to_arun[0]["kwargs"]["workflow_name"] == "test-workflow" finally: os.chdir(original_cwd) diff --git a/uv.lock b/uv.lock index f9fd2c3..b593750 100644 --- a/uv.lock +++ b/uv.lock @@ -1571,6 +1571,37 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/e9/70d74a664d83976556cec395d6bfedd9b85ec1498b778367d5f93e373397/opentelemetry_exporter_otlp_proto_http-1.37.0-py3-none-any.whl", hash = "sha256:54c42b39945a6cc9d9a2a33decb876eabb9547e0dcb49df090122773447f1aef", size = 19576, upload-time = "2025-09-11T10:28:46.726Z" }, ] +[[package]] +name = "opentelemetry-instrumentation" +version = "0.58b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/36/7c307d9be8ce4ee7beb86d7f1d31027f2a6a89228240405a858d6e4d64f9/opentelemetry_instrumentation-0.58b0.tar.gz", hash = "sha256:df640f3ac715a3e05af145c18f527f4422c6ab6c467e40bd24d2ad75a00cb705", size = 31549, upload-time = "2025-09-11T11:42:14.084Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/db/5ff1cd6c5ca1d12ecf1b73be16fbb2a8af2114ee46d4b0e6d4b23f4f4db7/opentelemetry_instrumentation-0.58b0-py3-none-any.whl", hash = "sha256:50f97ac03100676c9f7fc28197f8240c7290ca1baa12da8bfbb9a1de4f34cc45", size = 33019, upload-time = "2025-09-11T11:41:00.624Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-httpx" +version = "0.58b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/07/21/ba3a0106795337716e5e324f58fd3c04f5967e330c0408d0d68d873454db/opentelemetry_instrumentation_httpx-0.58b0.tar.gz", hash = "sha256:3cd747e7785a06d06bd58875e8eb11595337c98c4341f4fe176ff1f734a90db7", size = 19887, upload-time = "2025-09-11T11:42:37.926Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/e7/6dc8ee4881889993fa4a7d3da225e5eded239c975b9831eff392abd5a5e4/opentelemetry_instrumentation_httpx-0.58b0-py3-none-any.whl", hash = "sha256:d3f5a36c7fed08c245f1b06d1efd91f624caf2bff679766df80981486daaccdb", size = 15197, upload-time = "2025-09-11T11:41:32.66Z" }, +] + [[package]] name = "opentelemetry-proto" version = "1.37.0" @@ -1610,6 +1641,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/90/68152b7465f50285d3ce2481b3aec2f82822e3f52e5152eeeaf516bab841/opentelemetry_semantic_conventions-0.58b0-py3-none-any.whl", hash = "sha256:5564905ab1458b96684db1340232729fce3b5375a06e140e8904c78e4f815b28", size = 207954, upload-time = "2025-09-11T10:28:59.218Z" }, ] +[[package]] +name = "opentelemetry-util-http" +version = "0.58b0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c6/5f/02f31530faf50ef8a41ab34901c05cbbf8e9d76963ba2fb852b0b4065f4e/opentelemetry_util_http-0.58b0.tar.gz", hash = "sha256:de0154896c3472c6599311c83e0ecee856c4da1b17808d39fdc5cce5312e4d89", size = 9411, upload-time = "2025-09-11T11:43:05.602Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/a3/0a1430c42c6d34d8372a16c104e7408028f0c30270d8f3eb6cccf2e82934/opentelemetry_util_http-0.58b0-py3-none-any.whl", hash = "sha256:6c6b86762ed43025fbd593dc5f700ba0aa3e09711aedc36fd48a13b23d8cb1e7", size = 7652, upload-time = "2025-09-11T11:42:09.682Z" }, +] + [[package]] name = "orjson" version = "3.11.3" @@ -2699,6 +2739,7 @@ dependencies = [ { name = "langchain-openai" }, { name = "opentelemetry-api" }, { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-instrumentation-httpx" }, { name = "opentelemetry-sdk" }, { name = "pandas" }, { name = "pandas-stubs" }, @@ -2727,6 +2768,7 @@ requires-dist = [ { name = "langchain-openai", specifier = ">=1.0.2" }, { name = "opentelemetry-api", specifier = ">=1.20.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.20.0" }, + { name = "opentelemetry-instrumentation-httpx", specifier = ">=0.45b0" }, { name = "opentelemetry-sdk", specifier = ">=1.20.0" }, { name = "pandas", specifier = ">=2.3.3" }, { name = "pandas-stubs", specifier = ">=2.3.0" }, @@ -2913,6 +2955,55 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/dd/56f0d8af71e475ed194d702f8b4cf9cea812c95e82ad823d239023c6558c/w3lib-2.3.1-py3-none-any.whl", hash = "sha256:9ccd2ae10c8c41c7279cd8ad4fe65f834be894fe7bfdd7304b991fd69325847b", size = 21751, upload-time = "2025-01-27T14:22:09.421Z" }, ] +[[package]] +name = "wrapt" +version = "1.17.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload-time = "2025-08-12T05:51:47.138Z" }, + { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload-time = "2025-08-12T05:51:35.906Z" }, + { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload-time = "2025-08-12T05:51:57.474Z" }, + { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload-time = "2025-08-12T05:52:34.784Z" }, + { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload-time = "2025-08-12T05:52:13.599Z" }, + { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload-time = "2025-08-12T05:52:14.56Z" }, + { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload-time = "2025-08-12T05:52:36.165Z" }, + { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload-time = "2025-08-12T05:53:07.123Z" }, + { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload-time = "2025-08-12T05:53:05.436Z" }, + { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload-time = "2025-08-12T05:52:54.367Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload-time = "2025-08-12T05:51:48.627Z" }, + { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025, upload-time = "2025-08-12T05:51:37.156Z" }, + { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108, upload-time = "2025-08-12T05:51:58.425Z" }, + { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072, upload-time = "2025-08-12T05:52:37.53Z" }, + { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214, upload-time = "2025-08-12T05:52:15.886Z" }, + { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105, upload-time = "2025-08-12T05:52:17.914Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766, upload-time = "2025-08-12T05:52:39.243Z" }, + { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711, upload-time = "2025-08-12T05:53:10.074Z" }, + { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885, upload-time = "2025-08-12T05:53:08.695Z" }, + { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896, upload-time = "2025-08-12T05:52:55.34Z" }, + { url = "https://files.pythonhosted.org/packages/02/a2/cd864b2a14f20d14f4c496fab97802001560f9f41554eef6df201cd7f76c/wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", size = 54132, upload-time = "2025-08-12T05:51:49.864Z" }, + { url = "https://files.pythonhosted.org/packages/d5/46/d011725b0c89e853dc44cceb738a307cde5d240d023d6d40a82d1b4e1182/wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", size = 39091, upload-time = "2025-08-12T05:51:38.935Z" }, + { url = "https://files.pythonhosted.org/packages/2e/9e/3ad852d77c35aae7ddebdbc3b6d35ec8013af7d7dddad0ad911f3d891dae/wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", size = 39172, upload-time = "2025-08-12T05:51:59.365Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f7/c983d2762bcce2326c317c26a6a1e7016f7eb039c27cdf5c4e30f4160f31/wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", size = 87163, upload-time = "2025-08-12T05:52:40.965Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0f/f673f75d489c7f22d17fe0193e84b41540d962f75fce579cf6873167c29b/wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", size = 87963, upload-time = "2025-08-12T05:52:20.326Z" }, + { url = "https://files.pythonhosted.org/packages/df/61/515ad6caca68995da2fac7a6af97faab8f78ebe3bf4f761e1b77efbc47b5/wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", size = 86945, upload-time = "2025-08-12T05:52:21.581Z" }, + { url = "https://files.pythonhosted.org/packages/d3/bd/4e70162ce398462a467bc09e768bee112f1412e563620adc353de9055d33/wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", size = 86857, upload-time = "2025-08-12T05:52:43.043Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b8/da8560695e9284810b8d3df8a19396a6e40e7518059584a1a394a2b35e0a/wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", size = 37178, upload-time = "2025-08-12T05:53:12.605Z" }, + { url = "https://files.pythonhosted.org/packages/db/c8/b71eeb192c440d67a5a0449aaee2310a1a1e8eca41676046f99ed2487e9f/wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", size = 39310, upload-time = "2025-08-12T05:53:11.106Z" }, + { url = "https://files.pythonhosted.org/packages/45/20/2cda20fd4865fa40f86f6c46ed37a2a8356a7a2fde0773269311f2af56c7/wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", size = 37266, upload-time = "2025-08-12T05:52:56.531Z" }, + { url = "https://files.pythonhosted.org/packages/77/ed/dd5cf21aec36c80443c6f900449260b80e2a65cf963668eaef3b9accce36/wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", size = 56544, upload-time = "2025-08-12T05:51:51.109Z" }, + { url = "https://files.pythonhosted.org/packages/8d/96/450c651cc753877ad100c7949ab4d2e2ecc4d97157e00fa8f45df682456a/wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", size = 40283, upload-time = "2025-08-12T05:51:39.912Z" }, + { url = "https://files.pythonhosted.org/packages/d1/86/2fcad95994d9b572db57632acb6f900695a648c3e063f2cd344b3f5c5a37/wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", size = 40366, upload-time = "2025-08-12T05:52:00.693Z" }, + { url = "https://files.pythonhosted.org/packages/64/0e/f4472f2fdde2d4617975144311f8800ef73677a159be7fe61fa50997d6c0/wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", size = 108571, upload-time = "2025-08-12T05:52:44.521Z" }, + { url = "https://files.pythonhosted.org/packages/cc/01/9b85a99996b0a97c8a17484684f206cbb6ba73c1ce6890ac668bcf3838fb/wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", size = 113094, upload-time = "2025-08-12T05:52:22.618Z" }, + { url = "https://files.pythonhosted.org/packages/25/02/78926c1efddcc7b3aa0bc3d6b33a822f7d898059f7cd9ace8c8318e559ef/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", size = 110659, upload-time = "2025-08-12T05:52:24.057Z" }, + { url = "https://files.pythonhosted.org/packages/dc/ee/c414501ad518ac3e6fe184753632fe5e5ecacdcf0effc23f31c1e4f7bfcf/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", size = 106946, upload-time = "2025-08-12T05:52:45.976Z" }, + { url = "https://files.pythonhosted.org/packages/be/44/a1bd64b723d13bb151d6cc91b986146a1952385e0392a78567e12149c7b4/wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", size = 38717, upload-time = "2025-08-12T05:53:15.214Z" }, + { url = "https://files.pythonhosted.org/packages/79/d9/7cfd5a312760ac4dd8bf0184a6ee9e43c33e47f3dadc303032ce012b8fa3/wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", size = 41334, upload-time = "2025-08-12T05:53:14.178Z" }, + { url = "https://files.pythonhosted.org/packages/46/78/10ad9781128ed2f99dbc474f43283b13fea8ba58723e98844367531c18e9/wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", size = 38471, upload-time = "2025-08-12T05:52:57.784Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" }, +] + [[package]] name = "xxhash" version = "3.5.0"