Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/agentevals/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,26 @@ class DebugLoadData(CamelModel):
count: int


class TraceConversionMetadata(CamelModel):
agent_name: str | None = None
model: str | None = None
start_time: int | None = None
user_input_preview: str | None = None
final_output_preview: str | None = None
session_name: str | None = None


class TraceConversionEntry(CamelModel):
trace_id: str
invocations: list[dict[str, Any]]
warnings: list[str] = Field(default_factory=list)
metadata: TraceConversionMetadata = Field(default_factory=TraceConversionMetadata)


class ConvertTracesData(CamelModel):
traces: list[TraceConversionEntry]


# ---------------------------------------------------------------------------
# SSE evaluation event models
# ---------------------------------------------------------------------------
Expand Down
140 changes: 140 additions & 0 deletions src/agentevals/api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import logging
import os
import re
import shutil
import tempfile
from typing import Any
Expand All @@ -24,12 +25,14 @@
EvalRunConfig,
OpenAIEvalDef,
)
from ..converter import convert_traces
from ..extraction import get_extractor
from ..runner import RunResult, get_loader, load_eval_set, run_evaluation
from ..trace_metrics import extract_performance_metrics, extract_trace_metadata
from .models import (
ApiKeyStatus,
ConfigData,
ConvertTracesData,
EvalSetValidation,
HealthData,
MetricInfo,
Expand All @@ -40,6 +43,8 @@
SSETraceProgress,
SSETraceProgressEvent,
StandardResponse,
TraceConversionEntry,
TraceConversionMetadata,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -257,6 +262,141 @@ async def validate_eval_set(
shutil.rmtree(temp_dir)


def _session_name_from_filename(filename: str) -> str | None:
"""Extract a session name from a trace filename, stripping known prefixes."""
base = re.sub(r"\.(jsonl?|json)$", "", filename, flags=re.IGNORECASE)
for prefix in ("trace_", "agentevals_"):
if base.startswith(prefix):
return base[len(prefix) :]
return None


def _serialize_invocation(inv) -> dict[str, Any]:
"""Serialize an ADK Invocation to a camelCase dict matching the frontend Invocation type."""
inv_dict: dict[str, Any] = {
"invocation_id": inv.invocation_id,
}
if inv.user_content is not None:
inv_dict["user_content"] = inv.user_content.model_dump(exclude_none=True)
if inv.final_response is not None:
inv_dict["final_response"] = inv.final_response.model_dump(exclude_none=True)
if inv.intermediate_data is not None:
inv_dict["intermediate_data"] = inv.intermediate_data.model_dump(exclude_none=True)
if inv.creation_timestamp is not None:
inv_dict["creation_timestamp"] = inv.creation_timestamp
return _camel_keys(inv_dict)


def _get_format_for_file(path: str, explicit_format: str) -> str:
"""Return the loader format for a single file, auto-detecting from extension."""
if explicit_format:
return explicit_format
return "otlp-json" if path.lower().endswith(".jsonl") else "jaeger-json"


@router.post("/convert", response_model=StandardResponse[ConvertTracesData])
async def convert_trace_files(
trace_files: list[UploadFile] = File(...),
trace_format: str = Form(""),
):
"""Convert trace files to invocations and metadata without running evaluation."""
temp_dir = tempfile.mkdtemp()
try:
saved_files: list[tuple[str, str]] = [] # (path, original_filename)
for idx, trace_file in enumerate(trace_files):
if not trace_file.filename:
continue

original = trace_file.filename
lower = original.lower()
if not (lower.endswith(".json") or lower.endswith(".jsonl")):
raise HTTPException(
status_code=400,
detail=f"Invalid file extension for {original}. Only .json and .jsonl files are allowed.",
)

safe_name = f"{idx}_{os.path.basename(original)}"
trace_path = os.path.join(temp_dir, safe_name)
with open(trace_path, "wb") as f: # noqa: ASYNC230
content = await trace_file.read()

if len(content) > 10 * 1024 * 1024:
raise HTTPException(
status_code=400,
detail=f"File {original} exceeds 10MB limit",
)

f.write(content)
saved_files.append((trace_path, original))

if not saved_files:
raise HTTPException(status_code=400, detail="No valid trace files provided")

all_traces = []
trace_to_filename: dict[str, str] = {}
load_warnings: list[str] = []
for path, original in saved_files:
fmt = _get_format_for_file(path, trace_format)
loader = get_loader(fmt)
try:
traces = loader.load(path)
for t in traces:
trace_to_filename[t.trace_id] = original
all_traces.extend(traces)
except Exception as exc:
msg = f"Failed to load '{original}': {exc}"
logger.warning(msg)
load_warnings.append(msg)

if not all_traces:
detail = "No traces found in uploaded files"
if load_warnings:
detail += ". Errors: " + "; ".join(load_warnings)
raise HTTPException(status_code=400, detail=detail)

conversion_results = convert_traces(all_traces)
trace_map = {t.trace_id: t for t in all_traces}

entries: list[TraceConversionEntry] = []
for conv_result in conversion_results:
invocations = [_serialize_invocation(inv) for inv in conv_result.invocations]
warnings = list(conv_result.warnings)

trace = trace_map.get(conv_result.trace_id)
meta = TraceConversionMetadata()
if trace:
meta_dict = extract_trace_metadata(trace)
filename = trace_to_filename.get(conv_result.trace_id, "")
session_name = _session_name_from_filename(filename)
meta = TraceConversionMetadata(
agent_name=meta_dict.get("agent_name"),
model=meta_dict.get("model"),
start_time=meta_dict.get("start_time"),
user_input_preview=meta_dict.get("user_input_preview"),
final_output_preview=meta_dict.get("final_output_preview"),
session_name=session_name,
)

entries.append(
TraceConversionEntry(
trace_id=conv_result.trace_id,
invocations=invocations,
warnings=warnings,
metadata=meta,
)
)

return StandardResponse(data=ConvertTracesData(traces=entries))

except HTTPException:
raise
except Exception as exc:
logger.exception("Trace conversion failed")
raise HTTPException(status_code=500, detail=f"Internal error: {exc!s}") from exc
finally:
shutil.rmtree(temp_dir)


@router.post("/evaluate", response_model=StandardResponse[RunResult])
async def evaluate_traces(
trace_files: list[UploadFile] = File(...),
Expand Down
30 changes: 29 additions & 1 deletion ui/src/api/client.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { RunResult, EvalConfig, TraceResult, MetricMetadata, StandardResponse } from '../lib/types';
import type { RunResult, EvalConfig, TraceResult, MetricMetadata, StandardResponse, ConvertTracesResponse } from '../lib/types';
import { config } from '../config';

const API_BASE_URL = `${config.api.baseUrl}/api`;
Expand All @@ -11,6 +11,34 @@ async function unwrap<T>(response: Response): Promise<T> {
return json.data;
}

export async function convertTraces(traceFiles: File[], traceFormat?: string): Promise<ConvertTracesResponse> {
const formData = new FormData();
traceFiles.forEach(file => formData.append('trace_files', file));
if (traceFormat) {
formData.append('trace_format', traceFormat);
}

const response = await fetch(`${API_BASE_URL}/convert`, {
method: 'POST',
body: formData,
});

if (!response.ok) {
let errorMessage = `API error: ${response.statusText}`;
try {
const errorData = await response.json();
if (errorData.detail) {
errorMessage = errorData.detail;
}
} catch {
// Fallback to statusText
}
throw new Error(errorMessage);
}

return unwrap<ConvertTracesResponse>(response);
}

export async function evaluateTracesAPI(
traceFiles: File[],
evalSetFile: File | null,
Expand Down
24 changes: 12 additions & 12 deletions ui/src/components/builder/TraceUploadZone.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,41 @@ import React, { useCallback } from 'react';
import { css } from '@emotion/react';
import { Upload, FileJson } from 'lucide-react';
import { message } from 'antd';
import { loadJaegerTraces } from '../../lib/trace-loader';
import { generateEvalSetFromTraces } from '../../lib/evalset-builder';
import { convertTraces } from '../../api/client';
import { generateEvalSet } from '../../lib/evalset-builder';
import { useTraceContext } from '../../context/TraceContext';

export const TraceUploadZone: React.FC = () => {
const { actions } = useTraceContext();

const handleFileUpload = useCallback(async (file: File) => {
try {
const content = await file.text();
const traces = await loadJaegerTraces(content);
const response = await convertTraces([file]);

if (traces.length === 0) {
if (response.traces.length === 0) {
message.error('No valid traces found in the file');
return;
}

const filename = file.name.replace('.json', '');
const evalSet = generateEvalSetFromTraces(traces, filename);
const filename = file.name.replace(/\.(jsonl?|json)$/i, '');
const traceData = response.traces.map(t => ({ traceId: t.traceId, invocations: t.invocations }));
const evalSet = generateEvalSet(traceData, filename);

actions.setBuilderEvalSet(evalSet);
message.success(`Loaded ${traces.length} trace(s) and generated EvalSet!`);
message.success(`Loaded ${response.traces.length} trace(s) and generated EvalSet!`);
} catch (error) {
console.error('Failed to load trace:', error);
message.error('Failed to load trace file. Please ensure it is a valid Jaeger JSON file.');
message.error('Failed to load trace file. Please ensure it is a valid trace file.');
}
}, [actions]);

const handleDrop = useCallback((e: React.DragEvent) => {
e.preventDefault();
const file = e.dataTransfer.files[0];
if (file && file.name.endsWith('.json')) {
if (file && (file.name.endsWith('.json') || file.name.endsWith('.jsonl'))) {
handleFileUpload(file);
} else {
message.error('Please upload a .json file');
message.error('Please upload a .json or .jsonl file');
}
}, [handleFileUpload]);

Expand Down Expand Up @@ -67,7 +67,7 @@ export const TraceUploadZone: React.FC = () => {
<input
type="file"
id="trace-file-input"
accept=".json"
accept=".json,.jsonl"
onChange={handleFileInput}
css={fileInputStyle}
/>
Expand Down
31 changes: 11 additions & 20 deletions ui/src/components/dashboard/TraceCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ import type { TraceResult } from '../../lib/types';
import { truncateTraceId, getStatusColor, getStatusGlow, copyToClipboard } from '../../lib/utils';
import { MetricScoreCard } from './MetricScoreCard';
import { useTraceContext } from '../../context/TraceContext';
import { loadJaegerTraces } from '../../lib/trace-loader';
import { generateEvalSetFromTraces } from '../../lib/evalset-builder';
import { generateEvalSet } from '../../lib/evalset-builder';

interface TraceCardProps {
traceResult: TraceResult;
Expand Down Expand Up @@ -162,31 +161,23 @@ export const TraceCard: React.FC<TraceCardProps> = ({ traceResult, threshold, on
copyToClipboard(traceId);
};

const handleCreateEvalSet = async (e: React.MouseEvent) => {
const handleCreateEvalSet = (e: React.MouseEvent) => {
e.stopPropagation();

try {
let matchingTrace = null;
let matchingFilename = '';

for (const file of state.traceFiles) {
const content = await file.text();
const traces = await loadJaegerTraces(content);
const found = traces.find(t => t.traceId === traceId);

if (found) {
matchingTrace = found;
matchingFilename = file.name.replace('.json', '');
break;
}
}
const metadata = state.traceMetadata.get(traceId);
const invocations = metadata?.invocations || [];

if (!matchingTrace) {
message.error('Could not find trace in uploaded files');
if (invocations.length === 0) {
message.error('No invocations found for this trace');
return;
}

const evalSet = generateEvalSetFromTraces([matchingTrace], matchingFilename);
const filename = metadata?.sessionId || traceId.substring(0, 12);
const evalSet = generateEvalSet(
[{ traceId, invocations }],
filename
);
actions.setBuilderEvalSet(evalSet);
actions.setCurrentView('builder');
message.success('EvalSet created! Edit and save when ready.');
Expand Down
Loading