From 1d42985441cda90918628a19a5c58f7d89b3b647 Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Mon, 30 Mar 2026 22:21:44 +0200 Subject: [PATCH 1/2] refactor to have a single source of truth for trace-to-invocation conversion --- src/agentevals/api/models.py | 20 + src/agentevals/api/routes.py | 130 ++++ ui/src/api/client.ts | 30 +- ui/src/components/builder/TraceUploadZone.tsx | 24 +- ui/src/components/dashboard/TraceCard.tsx | 31 +- .../components/inspector/InspectorHeader.tsx | 31 +- ui/src/components/inspector/InspectorView.tsx | 49 +- .../components/upload/TraceEditorDrawer.tsx | 25 +- ui/src/context/TraceProvider.tsx | 36 +- ui/src/lib/evalset-builder.ts | 25 +- ui/src/lib/trace-converter.ts | 734 ------------------ ui/src/lib/trace-helpers.ts | 63 ++ ui/src/lib/trace-metadata.ts | 381 +-------- ui/src/lib/trace-patcher.ts | 2 +- ui/src/lib/types.ts | 21 + 15 files changed, 352 insertions(+), 1250 deletions(-) delete mode 100644 ui/src/lib/trace-converter.ts create mode 100644 ui/src/lib/trace-helpers.ts diff --git a/src/agentevals/api/models.py b/src/agentevals/api/models.py index 8b28746..1acc0ee 100644 --- a/src/agentevals/api/models.py +++ b/src/agentevals/api/models.py @@ -111,6 +111,26 @@ class DebugLoadData(CamelModel): count: int +class TraceConversionMetadata(CamelModel): + agent_name: str | None = None + model: str | None = None + start_time: int | None = None + user_input_preview: str | None = None + final_output_preview: str | None = None + session_name: str | None = None + + +class TraceConversionEntry(CamelModel): + trace_id: str + invocations: list[dict[str, Any]] + warnings: list[str] = Field(default_factory=list) + metadata: TraceConversionMetadata = Field(default_factory=TraceConversionMetadata) + + +class ConvertTracesData(CamelModel): + traces: list[TraceConversionEntry] + + # --------------------------------------------------------------------------- # SSE evaluation event models # --------------------------------------------------------------------------- diff --git a/src/agentevals/api/routes.py b/src/agentevals/api/routes.py index c128300..32e90f3 100644 --- a/src/agentevals/api/routes.py +++ b/src/agentevals/api/routes.py @@ -6,6 +6,7 @@ import json import logging import os +import re import shutil import tempfile from typing import Any @@ -24,12 +25,14 @@ EvalRunConfig, OpenAIEvalDef, ) +from ..converter import convert_traces from ..extraction import get_extractor from ..runner import RunResult, get_loader, load_eval_set, run_evaluation from ..trace_metrics import extract_performance_metrics, extract_trace_metadata from .models import ( ApiKeyStatus, ConfigData, + ConvertTracesData, EvalSetValidation, HealthData, MetricInfo, @@ -40,6 +43,8 @@ SSETraceProgress, SSETraceProgressEvent, StandardResponse, + TraceConversionEntry, + TraceConversionMetadata, ) logger = logging.getLogger(__name__) @@ -257,6 +262,131 @@ async def validate_eval_set( shutil.rmtree(temp_dir) +def _session_name_from_filename(filename: str) -> str | None: + """Extract a session name from a trace filename, stripping known prefixes.""" + base = re.sub(r"\.(jsonl?|json)$", "", filename, flags=re.IGNORECASE) + for prefix in ("trace_", "agentevals_"): + if base.startswith(prefix): + return base[len(prefix) :] + return None + + +def _serialize_invocation(inv) -> dict[str, Any]: + """Serialize an ADK Invocation to a camelCase dict matching the frontend Invocation type.""" + inv_dict: dict[str, Any] = { + "invocation_id": inv.invocation_id, + } + if inv.user_content: + inv_dict["user_content"] = inv.user_content.model_dump(exclude_none=True) + if inv.final_response: + inv_dict["final_response"] = inv.final_response.model_dump(exclude_none=True) + if inv.intermediate_data: + inv_dict["intermediate_data"] = inv.intermediate_data.model_dump(exclude_none=True) + if inv.creation_timestamp: + inv_dict["creation_timestamp"] = inv.creation_timestamp + return _camel_keys(inv_dict) + + +@router.post("/convert", response_model=StandardResponse[ConvertTracesData]) +async def convert_trace_files( + trace_files: list[UploadFile] = File(...), + trace_format: str = Form(""), +): + """Convert trace files to invocations and metadata without running evaluation.""" + temp_dir = tempfile.mkdtemp() + try: + trace_paths = [] + for trace_file in trace_files: + if not trace_file.filename: + continue + + if not (trace_file.filename.endswith(".json") or trace_file.filename.endswith(".jsonl")): + raise HTTPException( + status_code=400, + detail=f"Invalid file extension for {trace_file.filename}. Only .json and .jsonl files are allowed.", + ) + + trace_path = os.path.join(temp_dir, trace_file.filename) + with open(trace_path, "wb") as f: # noqa: ASYNC230 + content = await trace_file.read() + + if len(content) > 10 * 1024 * 1024: + raise HTTPException( + status_code=400, + detail=f"File {trace_file.filename} exceeds 10MB limit", + ) + + f.write(content) + trace_paths.append(trace_path) + + if not trace_paths: + raise HTTPException(status_code=400, detail="No valid trace files provided") + + fmt = trace_format + if not fmt: + if trace_paths[0].endswith(".jsonl"): + fmt = "otlp-json" + else: + fmt = "jaeger-json" + + loader = get_loader(fmt) + all_traces = [] + trace_to_filename: dict[str, str] = {} + for path in trace_paths: + try: + traces = loader.load(path) + filename = os.path.basename(path) + for t in traces: + trace_to_filename[t.trace_id] = filename + all_traces.extend(traces) + except Exception as exc: + logger.warning(f"Failed to load trace file '{path}': {exc}") + + if not all_traces: + raise HTTPException(status_code=400, detail="No traces found in uploaded files") + + conversion_results = convert_traces(all_traces) + trace_map = {t.trace_id: t for t in all_traces} + + entries: list[TraceConversionEntry] = [] + for conv_result in conversion_results: + invocations = [_serialize_invocation(inv) for inv in conv_result.invocations] + + trace = trace_map.get(conv_result.trace_id) + meta = TraceConversionMetadata() + if trace: + meta_dict = extract_trace_metadata(trace) + filename = trace_to_filename.get(conv_result.trace_id, "") + session_name = _session_name_from_filename(filename) + meta = TraceConversionMetadata( + agent_name=meta_dict.get("agent_name"), + model=meta_dict.get("model"), + start_time=meta_dict.get("start_time"), + user_input_preview=meta_dict.get("user_input_preview"), + final_output_preview=meta_dict.get("final_output_preview"), + session_name=session_name, + ) + + entries.append( + TraceConversionEntry( + trace_id=conv_result.trace_id, + invocations=invocations, + warnings=conv_result.warnings, + metadata=meta, + ) + ) + + return StandardResponse(data=ConvertTracesData(traces=entries)) + + except HTTPException: + raise + except Exception as exc: + logger.exception("Trace conversion failed") + raise HTTPException(status_code=500, detail=f"Internal error: {exc!s}") from exc + finally: + shutil.rmtree(temp_dir) + + @router.post("/evaluate", response_model=StandardResponse[RunResult]) async def evaluate_traces( trace_files: list[UploadFile] = File(...), diff --git a/ui/src/api/client.ts b/ui/src/api/client.ts index eed7e51..42f05fa 100644 --- a/ui/src/api/client.ts +++ b/ui/src/api/client.ts @@ -1,4 +1,4 @@ -import type { RunResult, EvalConfig, TraceResult, MetricMetadata, StandardResponse } from '../lib/types'; +import type { RunResult, EvalConfig, TraceResult, MetricMetadata, StandardResponse, ConvertTracesResponse } from '../lib/types'; import { config } from '../config'; const API_BASE_URL = `${config.api.baseUrl}/api`; @@ -11,6 +11,34 @@ async function unwrap(response: Response): Promise { return json.data; } +export async function convertTraces(traceFiles: File[], traceFormat?: string): Promise { + const formData = new FormData(); + traceFiles.forEach(file => formData.append('trace_files', file)); + if (traceFormat) { + formData.append('trace_format', traceFormat); + } + + const response = await fetch(`${API_BASE_URL}/convert`, { + method: 'POST', + body: formData, + }); + + if (!response.ok) { + let errorMessage = `API error: ${response.statusText}`; + try { + const errorData = await response.json(); + if (errorData.detail) { + errorMessage = errorData.detail; + } + } catch { + // Fallback to statusText + } + throw new Error(errorMessage); + } + + return unwrap(response); +} + export async function evaluateTracesAPI( traceFiles: File[], evalSetFile: File | null, diff --git a/ui/src/components/builder/TraceUploadZone.tsx b/ui/src/components/builder/TraceUploadZone.tsx index 8cdfba2..4239c05 100644 --- a/ui/src/components/builder/TraceUploadZone.tsx +++ b/ui/src/components/builder/TraceUploadZone.tsx @@ -2,8 +2,8 @@ import React, { useCallback } from 'react'; import { css } from '@emotion/react'; import { Upload, FileJson } from 'lucide-react'; import { message } from 'antd'; -import { loadJaegerTraces } from '../../lib/trace-loader'; -import { generateEvalSetFromTraces } from '../../lib/evalset-builder'; +import { convertTraces } from '../../api/client'; +import { generateEvalSet } from '../../lib/evalset-builder'; import { useTraceContext } from '../../context/TraceContext'; export const TraceUploadZone: React.FC = () => { @@ -11,32 +11,32 @@ export const TraceUploadZone: React.FC = () => { const handleFileUpload = useCallback(async (file: File) => { try { - const content = await file.text(); - const traces = await loadJaegerTraces(content); + const response = await convertTraces([file]); - if (traces.length === 0) { + if (response.traces.length === 0) { message.error('No valid traces found in the file'); return; } - const filename = file.name.replace('.json', ''); - const evalSet = generateEvalSetFromTraces(traces, filename); + const filename = file.name.replace(/\.(jsonl?|json)$/i, ''); + const traceData = response.traces.map(t => ({ traceId: t.traceId, invocations: t.invocations })); + const evalSet = generateEvalSet(traceData, filename); actions.setBuilderEvalSet(evalSet); - message.success(`Loaded ${traces.length} trace(s) and generated EvalSet!`); + message.success(`Loaded ${response.traces.length} trace(s) and generated EvalSet!`); } catch (error) { console.error('Failed to load trace:', error); - message.error('Failed to load trace file. Please ensure it is a valid Jaeger JSON file.'); + message.error('Failed to load trace file. Please ensure it is a valid trace file.'); } }, [actions]); const handleDrop = useCallback((e: React.DragEvent) => { e.preventDefault(); const file = e.dataTransfer.files[0]; - if (file && file.name.endsWith('.json')) { + if (file && (file.name.endsWith('.json') || file.name.endsWith('.jsonl'))) { handleFileUpload(file); } else { - message.error('Please upload a .json file'); + message.error('Please upload a .json or .jsonl file'); } }, [handleFileUpload]); @@ -67,7 +67,7 @@ export const TraceUploadZone: React.FC = () => { diff --git a/ui/src/components/dashboard/TraceCard.tsx b/ui/src/components/dashboard/TraceCard.tsx index 1edb7b4..b758414 100644 --- a/ui/src/components/dashboard/TraceCard.tsx +++ b/ui/src/components/dashboard/TraceCard.tsx @@ -7,8 +7,7 @@ import type { TraceResult } from '../../lib/types'; import { truncateTraceId, getStatusColor, getStatusGlow, copyToClipboard } from '../../lib/utils'; import { MetricScoreCard } from './MetricScoreCard'; import { useTraceContext } from '../../context/TraceContext'; -import { loadJaegerTraces } from '../../lib/trace-loader'; -import { generateEvalSetFromTraces } from '../../lib/evalset-builder'; +import { generateEvalSet } from '../../lib/evalset-builder'; interface TraceCardProps { traceResult: TraceResult; @@ -162,31 +161,23 @@ export const TraceCard: React.FC = ({ traceResult, threshold, on copyToClipboard(traceId); }; - const handleCreateEvalSet = async (e: React.MouseEvent) => { + const handleCreateEvalSet = (e: React.MouseEvent) => { e.stopPropagation(); try { - let matchingTrace = null; - let matchingFilename = ''; - - for (const file of state.traceFiles) { - const content = await file.text(); - const traces = await loadJaegerTraces(content); - const found = traces.find(t => t.traceId === traceId); - - if (found) { - matchingTrace = found; - matchingFilename = file.name.replace('.json', ''); - break; - } - } + const metadata = state.traceMetadata.get(traceId); + const invocations = metadata?.invocations || []; - if (!matchingTrace) { - message.error('Could not find trace in uploaded files'); + if (invocations.length === 0) { + message.error('No invocations found for this trace'); return; } - const evalSet = generateEvalSetFromTraces([matchingTrace], matchingFilename); + const filename = metadata?.sessionId || traceId.substring(0, 12); + const evalSet = generateEvalSet( + [{ traceId, invocations }], + filename + ); actions.setBuilderEvalSet(evalSet); actions.setCurrentView('builder'); message.success('EvalSet created! Edit and save when ready.'); diff --git a/ui/src/components/inspector/InspectorHeader.tsx b/ui/src/components/inspector/InspectorHeader.tsx index 898fdec..2a739e4 100644 --- a/ui/src/components/inspector/InspectorHeader.tsx +++ b/ui/src/components/inspector/InspectorHeader.tsx @@ -5,8 +5,7 @@ import { Button, message } from 'antd'; import { truncateTraceId } from '../../lib/utils'; import type { TraceResult } from '../../lib/types'; import { useTraceContext } from '../../context/TraceContext'; -import { loadJaegerTraces } from '../../lib/trace-loader'; -import { generateEvalSetFromTraces } from '../../lib/evalset-builder'; +import { generateEvalSet } from '../../lib/evalset-builder'; interface InspectorHeaderProps { traceResult: TraceResult; onBack: () => void; @@ -25,29 +24,21 @@ export const InspectorHeader: React.FC = ({ setTimeout(() => setCopied(false), 2000); }; - const handleCreateEvalSet = async () => { + const handleCreateEvalSet = () => { try { - let matchingTrace = null; - let matchingFilename = ''; - - for (const file of state.traceFiles) { - const content = await file.text(); - const traces = await loadJaegerTraces(content); - const found = traces.find(t => t.traceId === traceResult.traceId); - - if (found) { - matchingTrace = found; - matchingFilename = file.name.replace('.json', ''); - break; - } - } + const metadata = state.traceMetadata.get(traceResult.traceId); + const invocations = metadata?.invocations || []; - if (!matchingTrace) { - message.error('Could not find trace in uploaded files'); + if (invocations.length === 0) { + message.error('No invocations found for this trace'); return; } - const evalSet = generateEvalSetFromTraces([matchingTrace], matchingFilename); + const filename = metadata?.sessionId || traceResult.traceId.substring(0, 12); + const evalSet = generateEvalSet( + [{ traceId: traceResult.traceId, invocations }], + filename + ); actions.setBuilderEvalSet(evalSet); actions.setCurrentView('builder'); message.success('EvalSet created! Edit and save when ready.'); diff --git a/ui/src/components/inspector/InspectorView.tsx b/ui/src/components/inspector/InspectorView.tsx index d5e6f0d..19b6d39 100644 --- a/ui/src/components/inspector/InspectorView.tsx +++ b/ui/src/components/inspector/InspectorView.tsx @@ -5,9 +5,7 @@ import { InspectorHeader } from './InspectorHeader'; import { InspectorLayout } from './InspectorLayout'; import { InvocationSummaryPanel } from './InvocationSummaryPanel'; import { ComparisonPanel } from './ComparisonPanel'; -import type { Trace, Invocation } from '../../lib/types'; -import { loadJaegerTraces } from '../../lib/trace-loader'; -import { convertTracesToInvocations } from '../../lib/trace-converter'; +import type { Invocation } from '../../lib/types'; import { readFileAsText } from '../../lib/utils'; export const InspectorView: React.FC = () => { @@ -37,10 +35,10 @@ export const InspectorView: React.FC = () => { return state.results.find(r => r.traceId === state.selectedTraceId); }, [state.tableRows, state.results, state.selectedTraceId]); - // Load trace data when component mounts + // Load invocations from state (already converted by backend) and eval set from file useEffect(() => { - const loadTraceData = async () => { - if (!traceResult || !state.traceFiles.length) { + const loadData = async () => { + if (!traceResult) { setError('Trace data not available'); setLoading(false); return; @@ -50,47 +48,21 @@ export const InspectorView: React.FC = () => { setLoading(true); setError(null); - // Find the matching trace file by reading and parsing each one - let foundTrace: Trace | null = null; - for (const file of state.traceFiles) { - const content = await readFileAsText(file); - const traces = await loadJaegerTraces(content); - const matchingTrace = traces.find(t => t.traceId === traceResult.traceId); - if (matchingTrace) { - foundTrace = matchingTrace; - break; - } - } - - if (!foundTrace) { - setError('Could not find trace in uploaded files'); - setLoading(false); - return; - } - - // Convert to invocations - const conversionResults = convertTracesToInvocations([foundTrace]); - const result = conversionResults.get(foundTrace.traceId); - - if (result) { - setInvocations(result.invocations); - } else { - setInvocations([]); - } + const tableRow = state.tableRows.get(traceResult.traceId); + const metadata = state.traceMetadata.get(traceResult.traceId); + const invs = tableRow?.invocations || metadata?.invocations || []; + setInvocations(invs); - // Load evalset if available if (state.evalSetFile) { try { const evalSetContent = await readFileAsText(state.evalSetFile); const evalSet = JSON.parse(evalSetContent); - // Extract expected invocations from eval cases const expectedInvs: Invocation[] = []; if (evalSet.eval_cases) { for (const evalCase of evalSet.eval_cases) { if (evalCase.conversation) { for (const inv of evalCase.conversation) { - // Map eval case format to Invocation format const intermediateData = inv.intermediate_data || {}; expectedInvs.push({ invocationId: inv.invocationId || evalCase.eval_id, @@ -105,7 +77,6 @@ export const InspectorView: React.FC = () => { } } } - console.log('Loaded expected invocations:', expectedInvs); setExpectedInvocations(expectedInvs); } catch (err) { console.error('Error loading evalset:', err); @@ -121,8 +92,8 @@ export const InspectorView: React.FC = () => { } }; - loadTraceData(); - }, [traceResult, state.traceFiles]); + loadData(); + }, [traceResult, state.traceMetadata, state.tableRows, state.evalSetFile]); // Handle back to dashboard const handleBack = () => { diff --git a/ui/src/components/upload/TraceEditorDrawer.tsx b/ui/src/components/upload/TraceEditorDrawer.tsx index 40dbf81..2d92d4e 100644 --- a/ui/src/components/upload/TraceEditorDrawer.tsx +++ b/ui/src/components/upload/TraceEditorDrawer.tsx @@ -7,7 +7,7 @@ import { InvocationEditor } from '../builder/InvocationEditor'; import { RawJsonPreview } from './RawJsonPreview'; import { readFileAsText } from '../../lib/utils'; import { loadJaegerTraces } from '../../lib/trace-loader'; -import { convertTracesToInvocations } from '../../lib/trace-converter'; +import { convertTraces } from '../../api/client'; import { parseTraceFileForEditing, buildEditMappings, applyEditsAndSerialize } from '../../lib/trace-patcher'; import type { Invocation, ParsedTraceFile, SpanEditMapping } from '../../lib/types'; @@ -39,26 +39,23 @@ export const TraceEditorDrawer: React.FC = ({ file, file setError(null); setDirty(false); - readFileAsText(file) - .then(async (content) => { + Promise.all([ + readFileAsText(file).then(async (content) => { const parsed = parseTraceFileForEditing(content, file.name); setParsedFile(parsed); const traces = await loadJaegerTraces(content); - const conversionResults = convertTracesToInvocations(traces); const mappings = buildEditMappings(traces, parsed); setEditMappings(mappings); - - const groups: TraceGroup[] = []; - for (const trace of traces) { - const result = conversionResults.get(trace.traceId); - if (result && result.invocations.length > 0) { - groups.push({ traceId: trace.traceId, invocations: result.invocations }); - } - } + }), + convertTraces([file]).then((response) => { + const groups: TraceGroup[] = response.traces + .filter(t => t.invocations.length > 0) + .map(t => ({ traceId: t.traceId, invocations: t.invocations })); setTraceGroups(groups); - setLoading(false); - }) + }), + ]) + .then(() => setLoading(false)) .catch((err) => { setError(err instanceof Error ? err.message : 'Failed to parse trace file'); setLoading(false); diff --git a/ui/src/context/TraceProvider.tsx b/ui/src/context/TraceProvider.tsx index 274e6df..c724685 100644 --- a/ui/src/context/TraceProvider.tsx +++ b/ui/src/context/TraceProvider.tsx @@ -3,8 +3,7 @@ import type { ReactNode } from 'react'; import { TraceContext } from './TraceContext'; import type { TraceState } from './TraceContext'; import type { ViewType, EvalSet, EvalSetMetadata, EvalCase, LiveSession, AnnotationQueue, Annotation } from '../lib/types'; -import { evaluateTracesStreaming, getConfig, healthCheck } from '../api/client'; -import { extractMetadataFromTraceFile } from '../lib/trace-metadata'; +import { evaluateTracesStreaming, convertTraces, getConfig, healthCheck } from '../api/client'; interface TraceProviderProps { children: ReactNode; @@ -52,19 +51,30 @@ export const TraceProvider: React.FC = ({ children }) => { setTraceFiles: async (files: File[]) => { setState((prev) => ({ ...prev, traceFiles: files, isLoadingMetadata: true })); - const metadataMap = new Map(); - for (const file of files) { - try { - const metadataList = await extractMetadataFromTraceFile(file); - for (const metadata of metadataList) { - metadataMap.set(metadata.traceId, metadata); - } - } catch (error) { - console.error(`Failed to extract metadata from ${file.name}:`, error); + try { + const response = await convertTraces(files); + const metadataMap = new Map(); + for (const entry of response.traces) { + metadataMap.set(entry.traceId, { + traceId: entry.traceId, + sessionId: entry.metadata?.sessionName || entry.metadata?.agentName || entry.traceId.substring(0, 12), + agentName: entry.metadata?.agentName, + startTime: entry.metadata?.startTime, + model: entry.metadata?.model, + userInputPreview: entry.metadata?.userInputPreview, + finalOutputPreview: entry.metadata?.finalOutputPreview, + invocations: entry.invocations, + }); } + setState((prev) => ({ ...prev, traceMetadata: metadataMap, isLoadingMetadata: false })); + } catch (error) { + console.error('Failed to convert traces:', error); + setState((prev) => ({ + ...prev, + isLoadingMetadata: false, + errors: [error instanceof Error ? error.message : 'Failed to convert trace files'], + })); } - - setState((prev) => ({ ...prev, traceMetadata: metadataMap, isLoadingMetadata: false })); }, setEvalSet: (file: File | null) => diff --git a/ui/src/lib/evalset-builder.ts b/ui/src/lib/evalset-builder.ts index 39d34f0..d77379a 100644 --- a/ui/src/lib/evalset-builder.ts +++ b/ui/src/lib/evalset-builder.ts @@ -1,5 +1,4 @@ -import type { Trace, EvalSet, EvalCase } from './types'; -import { convertTracesToInvocations } from './trace-converter'; +import type { Invocation, EvalSet, EvalCase } from './types'; /** * Convert camelCase keys to snake_case recursively @@ -22,30 +21,24 @@ function convertCamelToSnake(obj: any): any { } /** - * Generate an EvalSet from selected traces - * - * @param traces - Array of traces to convert to eval set - * @param baseFilename - Base filename for naming the eval set - * @returns Complete EvalSet object with auto-generated metadata + * Generate an EvalSet from pre-converted invocations (backend is source of truth). */ -export function generateEvalSetFromTraces( - traces: Trace[], +export function generateEvalSet( + invocationsByTrace: Array<{ traceId: string; invocations: Invocation[] }>, baseFilename: string ): EvalSet { const timestamp = new Date().toISOString().split('T')[0]; const cleanFilename = baseFilename.replace(/\.json$/i, '').replace(/[^a-z0-9_]/gi, '_'); const evalSetId = `evalset_${cleanFilename}_${timestamp}`; - const conversionResults = convertTracesToInvocations(traces); const evalCases: EvalCase[] = []; - for (const trace of traces) { - const result = conversionResults.get(trace.traceId); - if (!result || result.invocations.length === 0) continue; + for (const { traceId, invocations } of invocationsByTrace) { + if (invocations.length === 0) continue; - result.invocations.forEach((invocation, idx) => { + invocations.forEach((invocation, idx) => { evalCases.push({ - eval_id: `${trace.traceId.substring(0, 8)}_case_${idx + 1}`, + eval_id: `${traceId.substring(0, 8)}_case_${idx + 1}`, conversation: [invocation], }); }); @@ -54,7 +47,7 @@ export function generateEvalSetFromTraces( return { eval_set_id: evalSetId, name: `Eval Set for ${baseFilename}`, - description: `Generated from ${traces.length} trace(s) on ${new Date().toLocaleString()}`, + description: `Generated from ${invocationsByTrace.length} trace(s) on ${new Date().toLocaleString()}`, eval_cases: evalCases, }; } diff --git a/ui/src/lib/trace-converter.ts b/ui/src/lib/trace-converter.ts deleted file mode 100644 index 0c8543e..0000000 --- a/ui/src/lib/trace-converter.ts +++ /dev/null @@ -1,734 +0,0 @@ -import type { Trace, Span, Invocation, Content, ToolCall, ToolResponse, IntermediateData } from './types'; -import { safeJsonParse } from './utils'; - -export const ADK_SCOPE = 'gcp.vertex.agent'; - -export const USER_ROLES = ['user', 'human']; -export const ASSISTANT_ROLES = ['assistant', 'model', 'ai']; - -export function getInputMessagesAttr(span: Span): string | undefined { - return span.tags['gen_ai.input.messages'] - || span.tags['gen_ai.prompt'] - || span.tags['gen_ai.request.messages']; -} - -export function getOutputMessagesAttr(span: Span): string | undefined { - return span.tags['gen_ai.output.messages'] - || span.tags['gen_ai.completion'] - || span.tags['gen_ai.response.messages']; -} - -interface ConversionResult { - invocations: Invocation[]; - warnings: string[]; -} - -export function detectTraceFormat(trace: Trace): 'adk' | 'genai' { - const check = (spans: Span[]): 'adk' | 'genai' | null => { - let hasGenai = false; - for (const span of spans) { - if (span.tags['otel.scope.name'] === ADK_SCOPE) { - return 'adk'; - } - if (!hasGenai && (span.tags['gen_ai.request.model'] || span.tags['gen_ai.system'])) { - hasGenai = true; - } - } - return hasGenai ? 'genai' : null; - }; - - const initial = check(trace.allSpans.slice(0, 10)); - if (initial) return initial; - - if (trace.allSpans.length > 10) { - const full = check(trace.allSpans); - if (full) return full; - } - - return 'adk'; -} - -export function convertTracesToInvocations(traces: Trace[]): Map { - const results = new Map(); - - for (const trace of traces) { - const format = detectTraceFormat(trace); - console.log(`Converting trace ${trace.traceId} (format: ${format}):`); - console.log(` Total spans: ${trace.allSpans.length}`); - - if (format === 'genai') { - results.set(trace.traceId, convertGenAITrace(trace)); - } else { - results.set(trace.traceId, convertADKTrace(trace)); - } - } - - return results; -} - -function convertADKTrace(trace: Trace): ConversionResult { - const warnings: string[] = []; - const invocations: Invocation[] = []; - - trace.allSpans.forEach((span, idx) => { - console.log(` Span ${idx}: ${span.operationName}, scope: ${span.tags['otel.scope.name']}`); - }); - - const agentSpans = trace.allSpans.filter( - (span) => - span.operationName.includes('invoke_agent') && - span.tags['otel.scope.name'] === ADK_SCOPE - ); - - console.log(` Found ${agentSpans.length} invoke_agent spans with ADK scope`); - - for (const agentSpan of agentSpans) { - try { - const invocation = convertAgentSpanToInvocation(agentSpan); - if (invocation) { - invocations.push(invocation); - console.log(` Created invocation: ${invocation.invocationId}`); - } else { - console.log(` convertAgentSpanToInvocation returned null for span ${agentSpan.spanId}`); - } - } catch (error) { - const errorMsg = `Failed to convert span ${agentSpan.spanId}: ${error instanceof Error ? error.message : 'Unknown error'}`; - warnings.push(errorMsg); - console.error(` ${errorMsg}`); - } - } - - console.log(` Final invocations count: ${invocations.length}`); - return { invocations, warnings }; -} - -/** - * Recursively find child spans by operation name prefix - * (replicates Python's _find_children_by_op) - */ -export function findChildrenByOperation(root: Span, opPrefix: string): Span[] { - const results: Span[] = []; - walkSpanTree(root, opPrefix, results); - results.sort((a, b) => a.startTime - b.startTime); - return results; -} - -/** - * Recursive walker for span tree - * (replicates Python's _walk) - */ -function walkSpanTree(span: Span, opPrefix: string, acc: Span[]): void { - for (const child of span.children) { - if (child.operationName.startsWith(opPrefix)) { - acc.push(child); - } - walkSpanTree(child, opPrefix, acc); - } -} - -/** - * Convert single agent span to Invocation - */ -function convertAgentSpanToInvocation(agentSpan: Span): Invocation | null { - console.log(` Converting agent span ${agentSpan.spanId}:`); - console.log(` Children count: ${agentSpan.children.length}`); - - // Recursively find child spans by operation name (like Python's _find_children_by_op) - const llmSpans = findChildrenByOperation(agentSpan, 'call_llm'); - const toolSpans = findChildrenByOperation(agentSpan, 'execute_tool'); - - console.log(` LLM spans: ${llmSpans.length}, Tool spans: ${toolSpans.length}`); - - if (llmSpans.length === 0) { - console.log(` Skipping: No LLM spans found`); - return null; // No LLM calls, skip - } - - // Extract user content from first LLM span - const userContent = extractUserContent(llmSpans[0]); - if (!userContent) { - console.log(` Skipping: Failed to extract user content`); - return null; - } - - // Extract final response from last LLM span - const finalResponse = extractFinalResponse(llmSpans[llmSpans.length - 1]); - if (!finalResponse) { - console.log(` Skipping: Failed to extract final response`); - return null; - } - - // Extract tool trajectory - const { toolUses, toolResponses } = extractToolTrajectory(toolSpans, llmSpans); - - return { - invocationId: agentSpan.spanId, - userContent, - finalResponse, - intermediateData: { - toolUses, - toolResponses, - }, - creationTimestamp: agentSpan.startTime, - }; -} - -/** - * Extract user content from LLM request - */ -function extractUserContent(llmSpan: Span): Content | null { - const requestJson = llmSpan.tags['gcp.vertex.agent.llm_request']; - if (!requestJson) return null; - - const request = safeJsonParse(requestJson, null); - if (!request || !request.contents) return null; - - // Find last user message with text parts (skip function_response parts) - for (let i = request.contents.length - 1; i >= 0; i--) { - const content = request.contents[i]; - if (content.role === 'user') { - const textParts = content.parts?.filter((p: any) => p.text !== undefined); - if (textParts && textParts.length > 0) { - return { - role: 'user', - parts: textParts, - }; - } - } - } - - return null; -} - -/** - * Extract final response from LLM response - */ -function extractFinalResponse(llmSpan: Span): Content | null { - const responseJson = llmSpan.tags['gcp.vertex.agent.llm_response']; - if (!responseJson) return null; - - const response = safeJsonParse(responseJson, null); - if (!response || !response.content) return null; - - // Extract text parts only (skip function_call parts for final response) - const textParts = response.content.parts?.filter((p: any) => p.text !== undefined) || []; - - return { - role: 'model', - parts: textParts, - }; -} - -/** - * Extract tool trajectory from execute_tool spans or LLM function calls - */ -function extractToolTrajectory( - toolSpans: Span[], - llmSpans: Span[] -): { toolUses: ToolCall[]; toolResponses: ToolResponse[] } { - const toolUses: ToolCall[] = []; - const toolResponses: ToolResponse[] = []; - - // Prefer execute_tool spans if available - if (toolSpans.length > 0) { - for (const toolSpan of toolSpans) { - const toolName = toolSpan.tags['gen_ai.tool.name']; - const toolCallId = toolSpan.tags['gen_ai.tool.call.id']; - const argsJson = toolSpan.tags['gcp.vertex.agent.tool_call_args']; - const responseJson = toolSpan.tags['gcp.vertex.agent.tool_response']; - - if (toolName) { - const args = safeJsonParse>(argsJson || '{}', {}); - toolUses.push({ - name: toolName, - args, - id: toolCallId, - }); - - if (responseJson) { - const response = safeJsonParse>(responseJson, {}); - toolResponses.push({ - name: toolName, - response, - id: toolCallId, - }); - } - } - } - } else { - // Fallback: extract from LLM function calls - for (const llmSpan of llmSpans) { - const responseJson = llmSpan.tags['gcp.vertex.agent.llm_response']; - if (!responseJson) continue; - - const response = safeJsonParse(responseJson, null); - if (!response || !response.content || !response.content.parts) continue; - - const functionCalls = response.content.parts.filter((p: any) => p.functionCall); - for (const part of functionCalls) { - if (part.functionCall) { - toolUses.push({ - name: part.functionCall.name, - args: part.functionCall.args || {}, - id: part.functionCall.id, - }); - } - } - } - } - - return { toolUses, toolResponses }; -} - -function isBroadcastEnriched(span: Span): boolean { - const messagesAttr = getInputMessagesAttr(span); - if (!messagesAttr) return false; - - const messages = safeJsonParse(messagesAttr, []); - if (!Array.isArray(messages)) return false; - - const userCount = messages.filter( - (m: any) => typeof m === 'object' && m !== null && USER_ROLES.includes(m.role) - ).length; - return userCount > 1; -} - -function trimCumulativeOutput(span: Span, outputMessages: any[]): any[] { - const inputAttr = getInputMessagesAttr(span); - if (!inputAttr) return outputMessages; - - const inputMessages = safeJsonParse(inputAttr, []); - if (!Array.isArray(inputMessages)) return outputMessages; - - const userCount = inputMessages.filter( - (m: any) => typeof m === 'object' && m !== null && USER_ROLES.includes(m.role) - ).length; - if (userCount <= 1) return outputMessages; - - const previousTurns = userCount - 1; - let textResponsesSeen = 0; - - for (let i = 0; i < outputMessages.length; i++) { - const msg = outputMessages[i]; - if (typeof msg !== 'object' || !msg || !ASSISTANT_ROLES.includes(msg.role)) continue; - const content = extractTextFromGenAIMessage(msg); - if (content) { - textResponsesSeen++; - if (textResponsesSeen >= previousTurns) { - return outputMessages.slice(i + 1); - } - } - } - - return outputMessages; -} - -function isGenAIInvocationSpan(span: Span): boolean { - const opLower = span.operationName.toLowerCase(); - return ['agent', 'chain', 'executor', 'workflow'].some(kw => opLower.includes(kw)); -} - -export function extractTextFromGenAIMessage(msg: any): string { - if (typeof msg.content === 'string' && msg.content) { - return msg.content; - } - if (Array.isArray(msg.content)) { - const parts = msg.content - .filter((item: any) => typeof item === 'object' && item.text) - .map((item: any) => item.text as string); - if (parts.length > 0) return parts.join(' '); - } - // Parts-based format (OTel GenAI semconv v1.36.0+) - if (Array.isArray(msg.parts)) { - const parts = msg.parts - .filter((p: any) => typeof p === 'object' && p.type === 'text') - .map((p: any) => (p.content || p.text || '') as string) - .filter(Boolean); - if (parts.length > 0) return parts.join(' '); - } - return ''; -} - -export function extractToolCallsFromGenAIMessage(msg: any): ToolCall[] { - const result: ToolCall[] = []; - if (Array.isArray(msg.tool_calls)) { - for (const tc of msg.tool_calls) { - if (tc.type === 'function' && tc.function) { - const args = safeJsonParse>(tc.function.arguments || '{}', {}); - result.push({ name: tc.function.name, args, id: tc.id }); - } - } - } - // Parts-based format (OTel GenAI semconv v1.36.0+) - if (result.length === 0 && Array.isArray(msg.parts)) { - for (const part of msg.parts) { - if (typeof part === 'object' && part.type === 'tool_call') { - const args = typeof part.arguments === 'string' - ? safeJsonParse>(part.arguments, {}) - : (part.arguments || {}); - result.push({ name: part.name, args, id: part.id }); - } - } - } - return result; -} - -function convertGenAITrace(trace: Trace): ConversionResult { - const warnings: string[] = []; - const invocations: Invocation[] = []; - - const llmSpans = trace.allSpans.filter(span => - span.tags['gen_ai.request.model'] || span.tags['gen_ai.system'] - ); - - console.log(` Found ${llmSpans.length} GenAI LLM spans`); - - if (llmSpans.length === 0) { - console.log(` No GenAI LLM spans found, treating trace as single invocation`); - return { invocations: [], warnings }; - } - - const llmRootSpans = trace.rootSpans.filter(span => - span.tags['gen_ai.request.model'] || span.tags['gen_ai.system'] - ); - - // Multi-turn extraction applies only when message content is broadcast-enriched - // (every span has the full history). Per-span enriched traces (OTLP path) have - // each span with only its own messages — each should be a separate invocation. - if (llmSpans.length > 1 && !llmRootSpans.some(isGenAIInvocationSpan) && isBroadcastEnriched(llmSpans[0])) { - console.log(` Multi-turn conversation detected (${llmSpans.length} LLM spans, broadcast-enriched)`); - const multiTurnInvocations = convertGenAIMultiTurn(llmSpans, trace); - invocations.push(...multiTurnInvocations); - } else { - const rootSpansToConvert = llmRootSpans.length > 0 - ? llmRootSpans - : trace.rootSpans.slice(0, 1); - for (const rootSpan of rootSpansToConvert) { - try { - const invocation = convertGenAIRootSpan(rootSpan, trace); - if (invocation) { - invocations.push(invocation); - } - } catch (error) { - warnings.push(`Failed to convert root span: ${error instanceof Error ? error.message : 'Unknown error'}`); - } - } - } - - const deduplicated = deduplicateInvocations(invocations); - console.log(` Final invocations count: ${deduplicated.length} (before dedup: ${invocations.length})`); - return { invocations: deduplicated, warnings }; -} - -function deduplicateInvocations(invocations: Invocation[]): Invocation[] { - if (invocations.length <= 1) return invocations; - - const getUserText = (inv: Invocation): string => - inv.userContent.parts - .filter(p => p.text) - .map(p => p.text) - .join(' '); - - const seen = new Map(); - const alwaysKeep = new Set(); - invocations.forEach((inv, i) => { - const text = getUserText(inv); - if (!text.trim()) { - alwaysKeep.add(i); - } else { - seen.set(text, i); - } - }); - - if (seen.size + alwaysKeep.size === invocations.length) return invocations; - - const keep = new Set([...alwaysKeep, ...seen.values()]); - return invocations.filter((_, i) => keep.has(i)); -} - -function convertGenAIMultiTurn(llmSpans: Span[], trace: Trace): Invocation[] { - const invocations: Invocation[] = []; - - // Get messages from the first LLM span (should have full conversation history) - const firstLlmSpan = llmSpans[0]; - const inputMessagesAttr = getInputMessagesAttr(firstLlmSpan) || '[]'; - const outputMessagesAttr = getOutputMessagesAttr(firstLlmSpan) || '[]'; - - const allInputMessages = safeJsonParse(inputMessagesAttr, []); - const allOutputMessages = safeJsonParse(outputMessagesAttr, []); - - if (!Array.isArray(allInputMessages) || !Array.isArray(allOutputMessages)) { - console.warn(' Input or output messages are not arrays, falling back to single invocation'); - const invocation = convertGenAIRootSpan(firstLlmSpan, trace); - return invocation ? [invocation] : []; - } - - const userMessages = allInputMessages.filter(msg => - USER_ROLES.includes(msg.role) - ); - const assistantMessages = allOutputMessages.filter(msg => - ASSISTANT_ROLES.includes(msg.role) - ); - - console.log(` Multi-turn: ${userMessages.length} user, ${assistantMessages.length} assistant messages`); - - let assistantIdx = 0; - - for (let userIdx = 0; userIdx < userMessages.length; userIdx++) { - const userMsg = userMessages[userIdx]; - const userText = extractTextFromGenAIMessage(userMsg); - - if (!userText) { - continue; - } - - const userContent: Content = { - role: 'user', - parts: [{ text: userText }] - }; - - const toolUses: ToolCall[] = []; - let finalResponseText = ''; - - while (assistantIdx < assistantMessages.length) { - const assistantMsg = assistantMessages[assistantIdx]; - - for (const tc of extractToolCallsFromGenAIMessage(assistantMsg)) { - toolUses.push(tc); - } - - const content = extractTextFromGenAIMessage(assistantMsg); - if (content) { - finalResponseText = content; - assistantIdx++; - break; - } - - assistantIdx++; - } - - const finalResponse: Content = { - role: 'model', - parts: [{ text: finalResponseText }] - }; - - const intermediateData: IntermediateData = { - toolUses, - toolResponses: [] - }; - - invocations.push({ - invocationId: `genai-turn-${userIdx + 1}-${firstLlmSpan.spanId.substring(0, 8)}`, - userContent, - finalResponse, - intermediateData, - creationTimestamp: firstLlmSpan.startTime - }); - } - - return invocations; -} - -function convertGenAIRootSpan(rootSpan: Span, _trace: Trace): Invocation | null { - const llmSpans = findDescendantLLMSpans(rootSpan); - const toolSpans = findDescendantToolSpans(rootSpan); - - console.log(` Converting GenAI root span ${rootSpan.spanId}:`); - console.log(` LLM spans: ${llmSpans.length}, Tool spans: ${toolSpans.length}`); - - if (llmSpans.length === 0) { - console.log(` Skipping: No LLM spans found`); - return null; - } - - const userContent = extractGenAIUserContent(llmSpans[0]); - if (!userContent) { - console.log(` Skipping: Failed to extract user content`); - return null; - } - - const finalResponse = extractGenAIFinalResponse(llmSpans[llmSpans.length - 1]); - if (!finalResponse) { - console.log(` Skipping: Failed to extract final response`); - return null; - } - - // Extract tool calls from both tool spans and LLM output messages - const { toolUses, toolResponses } = extractGenAIToolTrajectory(toolSpans, llmSpans); - - return { - invocationId: rootSpan.spanId, - userContent, - finalResponse, - intermediateData: { - toolUses, - toolResponses, - }, - creationTimestamp: rootSpan.startTime, - }; -} - -export function findDescendantLLMSpans(root: Span): Span[] { - const results: Span[] = []; - const queue = [root]; - - while (queue.length > 0) { - const span = queue.shift()!; - if (span.tags['gen_ai.request.model'] || span.tags['gen_ai.system']) { - results.push(span); - } - queue.push(...span.children); - } - - results.sort((a, b) => a.startTime - b.startTime); - return results; -} - -function findDescendantToolSpans(root: Span): Span[] { - const results: Span[] = []; - const queue = [root]; - - while (queue.length > 0) { - const span = queue.shift()!; - if (span.tags['gen_ai.tool.name']) { - results.push(span); - } - queue.push(...span.children); - } - - results.sort((a, b) => a.startTime - b.startTime); - return results; -} - -function extractGenAIUserContent(llmSpan: Span): Content | null { - const messagesAttr = getInputMessagesAttr(llmSpan); - if (!messagesAttr) return null; - - const messages = safeJsonParse(messagesAttr, null); - if (!messages || !Array.isArray(messages)) return null; - - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i]; - if (USER_ROLES.includes(msg.role)) { - const text = extractTextFromGenAIMessage(msg); - if (text) { - return { role: 'user', parts: [{ text }] }; - } - } - } - - return null; -} - -function extractGenAIFinalResponse(llmSpan: Span): Content | null { - const completionAttr = getOutputMessagesAttr(llmSpan); - if (!completionAttr) return null; - - const messages = safeJsonParse(completionAttr, null); - if (!messages || !Array.isArray(messages)) return null; - - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i]; - if (ASSISTANT_ROLES.includes(msg.role)) { - const text = extractTextFromGenAIMessage(msg); - return { role: 'model', parts: [{ text }] }; - } - } - - return null; -} - -function extractGenAIToolTrajectory(toolSpans: Span[], llmSpans: Span[]): { toolUses: ToolCall[]; toolResponses: ToolResponse[] } { - const toolCallsById = new Map(); - const toolCallsNoId: ToolCall[] = []; - const toolResponses: ToolResponse[] = []; - - for (const toolSpan of toolSpans) { - const toolName = toolSpan.tags['gen_ai.tool.name']; - const toolCallId = toolSpan.tags['gen_ai.tool.call.id']; - const argsAttr = toolSpan.tags['gen_ai.tool.call.arguments'] || toolSpan.tags['gen_ai.tool.arguments']; - const resultAttr = toolSpan.tags['gen_ai.tool.call.result'] || toolSpan.tags['gen_ai.tool.result']; - - if (toolName) { - let args = safeJsonParse>(argsAttr || '{}', {}); - - // Fallback: extract args from gen_ai.input.messages when tool span - // doesn't have gen_ai.tool.call.arguments (e.g. Strands) - if (Object.keys(args).length === 0) { - const inputMsgsAttr = toolSpan.tags['gen_ai.input.messages']; - if (inputMsgsAttr) { - const inputMsgs = safeJsonParse(inputMsgsAttr, []); - if (Array.isArray(inputMsgs)) { - for (const msg of inputMsgs) { - if (typeof msg !== 'object') continue; - for (const tc of extractToolCallsFromGenAIMessage(msg)) { - if (tc.name === toolName && Object.keys(tc.args).length > 0) { - args = tc.args; - break; - } - } - if (Object.keys(args).length > 0) break; - } - } - } - } - - const tc: ToolCall = { name: toolName, args, id: toolCallId }; - if (toolCallId) { - toolCallsById.set(toolCallId, tc); - } else { - toolCallsNoId.push(tc); - } - - if (resultAttr) { - const response = safeJsonParse>(resultAttr, {}); - toolResponses.push({ - name: toolName, - response, - id: toolCallId, - }); - } - } - } - - for (const llmSpan of llmSpans) { - const completionAttr = getOutputMessagesAttr(llmSpan); - if (!completionAttr) continue; - - let messages = safeJsonParse(completionAttr, null); - if (!messages || !Array.isArray(messages)) continue; - - messages = trimCumulativeOutput(llmSpan, messages); - - for (const msg of messages) { - if (ASSISTANT_ROLES.includes(msg.role)) { - for (const tc of extractToolCallsFromGenAIMessage(msg)) { - if (tc.id && toolCallsById.has(tc.id)) { - // Prefer LLM message version if it has richer args - const existing = toolCallsById.get(tc.id)!; - if (Object.keys(tc.args).length > 0 && Object.keys(existing.args).length === 0) { - toolCallsById.set(tc.id, tc); - } - } else if (tc.id) { - toolCallsById.set(tc.id, tc); - } else { - toolCallsNoId.push(tc); - } - } - } - } - } - - const toolUses = [...toolCallsById.values(), ...toolCallsNoId]; - return { toolUses, toolResponses }; -} - -/** - * Get invocations for a specific trace - */ -export function getInvocationsForTrace( - trace: Trace, - conversionResults: Map -): ConversionResult | undefined { - return conversionResults.get(trace.traceId); -} diff --git a/ui/src/lib/trace-helpers.ts b/ui/src/lib/trace-helpers.ts new file mode 100644 index 0000000..b225ea7 --- /dev/null +++ b/ui/src/lib/trace-helpers.ts @@ -0,0 +1,63 @@ +import type { Trace, Span } from './types'; + +export const ADK_SCOPE = 'gcp.vertex.agent'; + +export const USER_ROLES = ['user', 'human']; +export const ASSISTANT_ROLES = ['assistant', 'model', 'ai']; + +export function detectTraceFormat(trace: Trace): 'adk' | 'genai' { + const check = (spans: Span[]): 'adk' | 'genai' | null => { + let hasGenai = false; + for (const span of spans) { + if (span.tags['otel.scope.name'] === ADK_SCOPE) { + return 'adk'; + } + if (!hasGenai && (span.tags['gen_ai.request.model'] || span.tags['gen_ai.input.messages'])) { + hasGenai = true; + } + } + return hasGenai ? 'genai' : null; + }; + + const initial = check(trace.allSpans.slice(0, 10)); + if (initial) return initial; + + if (trace.allSpans.length > 10) { + const full = check(trace.allSpans); + if (full) return full; + } + + return 'adk'; +} + +export function findChildrenByOperation(root: Span, opPrefix: string): Span[] { + const results: Span[] = []; + walkSpanTree(root, opPrefix, results); + results.sort((a, b) => a.startTime - b.startTime); + return results; +} + +function walkSpanTree(span: Span, opPrefix: string, acc: Span[]): void { + for (const child of span.children) { + if (child.operationName.startsWith(opPrefix)) { + acc.push(child); + } + walkSpanTree(child, opPrefix, acc); + } +} + +export function findDescendantLLMSpans(root: Span): Span[] { + const results: Span[] = []; + const queue = [root]; + + while (queue.length > 0) { + const span = queue.shift()!; + if (span.tags['gen_ai.request.model'] || span.tags['gen_ai.input.messages']) { + results.push(span); + } + queue.push(...span.children); + } + + results.sort((a, b) => a.startTime - b.startTime); + return results; +} diff --git a/ui/src/lib/trace-metadata.ts b/ui/src/lib/trace-metadata.ts index 4b52930..1d734d4 100644 --- a/ui/src/lib/trace-metadata.ts +++ b/ui/src/lib/trace-metadata.ts @@ -1,13 +1,4 @@ -import { readFileAsText, safeJsonParse } from './utils'; -import type { Trace, Span, Invocation } from './types'; -import { - ASSISTANT_ROLES, - USER_ROLES, - convertTracesToInvocations, - extractTextFromGenAIMessage, - getInputMessagesAttr, - getOutputMessagesAttr, -} from './trace-converter'; +import type { Invocation } from './types'; export interface TraceMetadata { traceId: string; @@ -19,373 +10,3 @@ export interface TraceMetadata { finalOutputPreview?: string; invocations?: Invocation[]; } - -const TAG_SCOPE = 'otel.scope.name'; -const ADK_SCOPE = 'gcp.vertex.agent'; -const TAG_AGENT_NAME = 'gen_ai.agent.name'; -const TAG_MODEL = 'gen_ai.request.model'; -const TAG_LLM_REQUEST = 'gcp.vertex.agent.llm_request'; -const TAG_LLM_RESPONSE = 'gcp.vertex.agent.llm_response'; - -function findAdkSpans(trace: Trace, operation: string): Span[] { - const matches: Span[] = []; - - for (const span of trace.allSpans) { - if (span.tags?.[TAG_SCOPE] !== ADK_SCOPE) { - continue; - } - if (span.operationName.startsWith(operation)) { - matches.push(span); - } - } - - matches.sort((a, b) => a.startTime - b.startTime); - return matches; -} - -function extractTextPreview(text: string, maxLength: number = 100): string { - if (!text) return ''; - return text.length > maxLength ? text.substring(0, maxLength) + '...' : text; -} - -function extractUserInputPreview(llmRequestTag: string): string { - const llmRequest = safeJsonParse(llmRequestTag, {}); - const contents = llmRequest.contents || []; - - for (let i = contents.length - 1; i >= 0; i--) { - const content = contents[i]; - if (content.role === 'user') { - const parts = content.parts || []; - const textParts = parts - .filter((p: any) => p.text) - .map((p: any) => p.text); - if (textParts.length > 0) { - const fullText = textParts.join(' '); - return extractTextPreview(fullText); - } - } - } - - return ''; -} - -function extractFinalOutputPreview(llmResponseTag: string): string { - const llmResponse = safeJsonParse(llmResponseTag, {}); - const content = llmResponse.content || {}; - const parts = content.parts || []; - const textParts = parts - .filter((p: any) => p.text) - .map((p: any) => p.text); - - if (textParts.length > 0) { - const fullText = textParts.join(' '); - return extractTextPreview(fullText); - } - - return ''; -} - -function buildSpanTree(trace: Trace): void { - const spanMap = new Map(); - for (const span of trace.allSpans) { - spanMap.set(span.spanId, span); - span.children = []; - } - - for (const span of trace.allSpans) { - if (span.parentSpanId) { - const parent = spanMap.get(span.parentSpanId); - if (parent) { - parent.children.push(span); - } - } - } - - trace.rootSpans = trace.allSpans.filter( - span => !span.parentSpanId || !spanMap.has(span.parentSpanId) - ); -} - -function detectTraceFormat(trace: Trace): 'adk' | 'genai' { - const check = (spans: Span[]): 'adk' | 'genai' | null => { - let hasGenai = false; - for (const span of spans) { - if (span.tags?.[TAG_SCOPE] === ADK_SCOPE) { - return 'adk'; - } - if (!hasGenai && (span.tags?.['gen_ai.request.model'] || span.tags?.['gen_ai.system'])) { - hasGenai = true; - } - } - return hasGenai ? 'genai' : null; - }; - - const initial = check(trace.allSpans.slice(0, 10)); - if (initial) return initial; - - if (trace.allSpans.length > 10) { - const full = check(trace.allSpans); - if (full) return full; - } - - return 'adk'; -} - -export function extractTraceMetadata(trace: Trace, sessionName?: string): TraceMetadata { - const format = detectTraceFormat(trace); - - if (format === 'genai') { - return extractGenAIMetadata(trace, sessionName); - } else { - const metadata = extractADKMetadata(trace); - if (sessionName) { - metadata.sessionId = sessionName; - if (!metadata.agentName) metadata.agentName = sessionName; - } - return metadata; - } -} - -function extractADKMetadata(trace: Trace): TraceMetadata { - const metadata: TraceMetadata = { - traceId: trace.traceId, - }; - - const invokeSpans = findAdkSpans(trace, 'invoke_agent'); - if (invokeSpans.length > 0) { - const invokeSpan = invokeSpans[0]; - metadata.agentName = invokeSpan.tags?.[TAG_AGENT_NAME]; - metadata.startTime = invokeSpan.startTime; - } - - const callLlmSpans = findAdkSpans(trace, 'call_llm'); - if (callLlmSpans.length > 0) { - const firstLlm = callLlmSpans[0]; - metadata.model = firstLlm.tags?.[TAG_MODEL]; - - const llmRequestTag = firstLlm.tags?.[TAG_LLM_REQUEST]; - if (llmRequestTag) { - metadata.userInputPreview = extractUserInputPreview(llmRequestTag); - } - - const lastLlm = callLlmSpans[callLlmSpans.length - 1]; - const llmResponseTag = lastLlm.tags?.[TAG_LLM_RESPONSE]; - if (llmResponseTag) { - metadata.finalOutputPreview = extractFinalOutputPreview(llmResponseTag); - } - } - - metadata.sessionId = metadata.agentName || trace.traceId.substring(0, 12); - - return metadata; -} - -function extractGenAIMetadata(trace: Trace, sessionName?: string): TraceMetadata { - const metadata: TraceMetadata = { - traceId: trace.traceId, - }; - - if (sessionName) { - metadata.sessionId = sessionName; - } - - const llmSpans = trace.allSpans.filter(span => - span.tags?.['gen_ai.request.model'] || span.tags?.['gen_ai.system'] - ); - - if (llmSpans.length > 0) { - const firstLlm = llmSpans[0]; - metadata.model = firstLlm.tags?.['gen_ai.request.model']; - metadata.startTime = firstLlm.startTime; - - const agentName = firstLlm.tags?.['gen_ai.agent.name']; - if (agentName) { - metadata.agentName = agentName; - if (!metadata.sessionId) metadata.sessionId = agentName; - } else if (sessionName) { - metadata.agentName = sessionName; - } else { - const rootSpan = trace.rootSpans[0]; - metadata.agentName = rootSpan?.operationName || 'GenAI Agent'; - metadata.sessionId = trace.traceId.substring(0, 12); - } - - for (const span of llmSpans) { - const messagesAttr = getInputMessagesAttr(span); - if (messagesAttr) { - metadata.userInputPreview = extractGenAIUserPreview(messagesAttr); - break; - } - } - - const lastLlm = llmSpans[llmSpans.length - 1]; - const completionAttr = getOutputMessagesAttr(lastLlm); - if (completionAttr) { - metadata.finalOutputPreview = extractGenAIOutputPreview(completionAttr); - } - } else { - metadata.agentName = 'GenAI Agent'; - metadata.sessionId = trace.traceId.substring(0, 12); - } - - return metadata; -} - -function extractGenAIUserPreview(messagesAttr: string): string { - const messages = safeJsonParse(messagesAttr, []); - if (!Array.isArray(messages)) return ''; - - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i]; - if (USER_ROLES.includes(msg.role)) { - const text = extractTextFromGenAIMessage(msg); - if (text) return extractTextPreview(text); - } - } - - return ''; -} - -function extractGenAIOutputPreview(completionAttr: string): string { - const messages = safeJsonParse(completionAttr, []); - if (!Array.isArray(messages)) return ''; - - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i]; - if (ASSISTANT_ROLES.includes(msg.role)) { - const text = extractTextFromGenAIMessage(msg); - if (text) return extractTextPreview(text); - } - } - - return ''; -} - -function parseOtlpJsonl(content: string): Trace[] { - const lines = content.trim().split('\n'); - const spansByTrace: Map = new Map(); - - for (const line of lines) { - if (!line.trim()) continue; - - const otlpSpan = safeJsonParse(line, null); - if (!otlpSpan) continue; - - const traceId = otlpSpan.traceId; - if (!traceId) continue; - - const tags: Record = {}; - for (const attr of otlpSpan.attributes || []) { - const value = attr.value?.stringValue || attr.value?.intValue || attr.value?.doubleValue || attr.value?.boolValue; - if (value !== undefined) { - tags[attr.key] = value; - } - } - - const startTimeNs = parseInt(otlpSpan.startTimeUnixNano || '0'); - const endTimeNs = parseInt(otlpSpan.endTimeUnixNano || '0'); - const startTimeUs = Math.floor(startTimeNs / 1000); - const durationUs = Math.floor((endTimeNs - startTimeNs) / 1000); - - const span: Span = { - traceId, - spanId: otlpSpan.spanId, - parentSpanId: otlpSpan.parentSpanId || null, - operationName: otlpSpan.name, - startTime: startTimeUs, - duration: durationUs, - tags, - logs: [], - children: [], - }; - - if (!spansByTrace.has(traceId)) { - spansByTrace.set(traceId, []); - } - spansByTrace.get(traceId)!.push(span); - } - - const traces: Trace[] = []; - for (const [traceId, spans] of spansByTrace.entries()) { - traces.push({ - traceId, - rootSpans: [], - allSpans: spans, - }); - } - - return traces; -} - -function extractSessionNameFromFilename(filename: string): string | undefined { - const base = filename.replace(/\.(jsonl?|json)$/i, ''); - for (const prefix of ['trace_', 'agentevals_']) { - if (base.startsWith(prefix)) { - return base.slice(prefix.length); - } - } - return undefined; -} - -export async function extractMetadataFromTraceFile(file: File): Promise { - const content = await readFileAsText(file); - const sessionName = extractSessionNameFromFilename(file.name); - - let traces: Trace[] = []; - - const trimmedContent = content.trim(); - if (trimmedContent.startsWith('{') && !trimmedContent.startsWith('{"data"')) { - traces = parseOtlpJsonl(content); - } else { - const jaegerData = safeJsonParse(content, null); - - if (!jaegerData || !jaegerData.data) { - throw new Error('Invalid trace format'); - } - - for (const jaegerTrace of jaegerData.data) { - const traceId = jaegerTrace.traceID; - const spans: Span[] = []; - - for (const jaegerSpan of jaegerTrace.spans || []) { - const tags: Record = {}; - for (const tag of jaegerSpan.tags || []) { - tags[tag.key] = tag.value; - } - - spans.push({ - traceId, - spanId: jaegerSpan.spanID, - parentSpanId: jaegerSpan.references?.[0]?.spanID || null, - operationName: jaegerSpan.operationName, - startTime: jaegerSpan.startTime, - duration: jaegerSpan.duration, - tags, - logs: [], - children: [], - }); - } - - traces.push({ - traceId, - rootSpans: [], - allSpans: spans, - }); - } - } - - for (const trace of traces) { - buildSpanTree(trace); - } - - const invocationsMap = convertTracesToInvocations(traces); - - return traces.map(trace => { - const metadata = extractTraceMetadata(trace, sessionName); - const conversionResult = invocationsMap.get(trace.traceId); - if (conversionResult) { - metadata.invocations = conversionResult.invocations; - } - return metadata; - }); -} diff --git a/ui/src/lib/trace-patcher.ts b/ui/src/lib/trace-patcher.ts index 6393e5f..972a640 100644 --- a/ui/src/lib/trace-patcher.ts +++ b/ui/src/lib/trace-patcher.ts @@ -6,7 +6,7 @@ import { findDescendantLLMSpans, USER_ROLES, ASSISTANT_ROLES, -} from './trace-converter'; +} from './trace-helpers'; export function parseTraceFileForEditing(content: string, fileName: string): ParsedTraceFile { const trimmed = content.trim(); diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index 51fef78..f2f1b75 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -77,6 +77,27 @@ export interface Invocation { creationTimestamp?: number; } +// Trace conversion API response types +export interface TraceConversionMetadata { + agentName?: string; + model?: string; + startTime?: number; + userInputPreview?: string; + finalOutputPreview?: string; + sessionName?: string; +} + +export interface TraceConversionEntry { + traceId: string; + invocations: Invocation[]; + warnings: string[]; + metadata: TraceConversionMetadata; +} + +export interface ConvertTracesResponse { + traces: TraceConversionEntry[]; +} + // Evaluation results export type EvalStatus = 'PASSED' | 'FAILED' | 'NOT_EVALUATED' | 'ERROR'; From 19d116fe378e4ed5c1990d8c13975ed072eae9a7 Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Tue, 31 Mar 2026 11:01:54 +0200 Subject: [PATCH 2/2] address review comments, cleanup --- src/agentevals/api/routes.py | 62 ++++++++++++++++++++--------------- ui/src/lib/evalset-builder.ts | 21 +----------- ui/src/lib/trace-helpers.ts | 14 ++++++-- 3 files changed, 49 insertions(+), 48 deletions(-) diff --git a/src/agentevals/api/routes.py b/src/agentevals/api/routes.py index 32e90f3..46803e9 100644 --- a/src/agentevals/api/routes.py +++ b/src/agentevals/api/routes.py @@ -276,17 +276,24 @@ def _serialize_invocation(inv) -> dict[str, Any]: inv_dict: dict[str, Any] = { "invocation_id": inv.invocation_id, } - if inv.user_content: + if inv.user_content is not None: inv_dict["user_content"] = inv.user_content.model_dump(exclude_none=True) - if inv.final_response: + if inv.final_response is not None: inv_dict["final_response"] = inv.final_response.model_dump(exclude_none=True) - if inv.intermediate_data: + if inv.intermediate_data is not None: inv_dict["intermediate_data"] = inv.intermediate_data.model_dump(exclude_none=True) - if inv.creation_timestamp: + if inv.creation_timestamp is not None: inv_dict["creation_timestamp"] = inv.creation_timestamp return _camel_keys(inv_dict) +def _get_format_for_file(path: str, explicit_format: str) -> str: + """Return the loader format for a single file, auto-detecting from extension.""" + if explicit_format: + return explicit_format + return "otlp-json" if path.lower().endswith(".jsonl") else "jaeger-json" + + @router.post("/convert", response_model=StandardResponse[ConvertTracesData]) async def convert_trace_files( trace_files: list[UploadFile] = File(...), @@ -295,55 +302,57 @@ async def convert_trace_files( """Convert trace files to invocations and metadata without running evaluation.""" temp_dir = tempfile.mkdtemp() try: - trace_paths = [] - for trace_file in trace_files: + saved_files: list[tuple[str, str]] = [] # (path, original_filename) + for idx, trace_file in enumerate(trace_files): if not trace_file.filename: continue - if not (trace_file.filename.endswith(".json") or trace_file.filename.endswith(".jsonl")): + original = trace_file.filename + lower = original.lower() + if not (lower.endswith(".json") or lower.endswith(".jsonl")): raise HTTPException( status_code=400, - detail=f"Invalid file extension for {trace_file.filename}. Only .json and .jsonl files are allowed.", + detail=f"Invalid file extension for {original}. Only .json and .jsonl files are allowed.", ) - trace_path = os.path.join(temp_dir, trace_file.filename) + safe_name = f"{idx}_{os.path.basename(original)}" + trace_path = os.path.join(temp_dir, safe_name) with open(trace_path, "wb") as f: # noqa: ASYNC230 content = await trace_file.read() if len(content) > 10 * 1024 * 1024: raise HTTPException( status_code=400, - detail=f"File {trace_file.filename} exceeds 10MB limit", + detail=f"File {original} exceeds 10MB limit", ) f.write(content) - trace_paths.append(trace_path) + saved_files.append((trace_path, original)) - if not trace_paths: + if not saved_files: raise HTTPException(status_code=400, detail="No valid trace files provided") - fmt = trace_format - if not fmt: - if trace_paths[0].endswith(".jsonl"): - fmt = "otlp-json" - else: - fmt = "jaeger-json" - - loader = get_loader(fmt) all_traces = [] trace_to_filename: dict[str, str] = {} - for path in trace_paths: + load_warnings: list[str] = [] + for path, original in saved_files: + fmt = _get_format_for_file(path, trace_format) + loader = get_loader(fmt) try: traces = loader.load(path) - filename = os.path.basename(path) for t in traces: - trace_to_filename[t.trace_id] = filename + trace_to_filename[t.trace_id] = original all_traces.extend(traces) except Exception as exc: - logger.warning(f"Failed to load trace file '{path}': {exc}") + msg = f"Failed to load '{original}': {exc}" + logger.warning(msg) + load_warnings.append(msg) if not all_traces: - raise HTTPException(status_code=400, detail="No traces found in uploaded files") + detail = "No traces found in uploaded files" + if load_warnings: + detail += ". Errors: " + "; ".join(load_warnings) + raise HTTPException(status_code=400, detail=detail) conversion_results = convert_traces(all_traces) trace_map = {t.trace_id: t for t in all_traces} @@ -351,6 +360,7 @@ async def convert_trace_files( entries: list[TraceConversionEntry] = [] for conv_result in conversion_results: invocations = [_serialize_invocation(inv) for inv in conv_result.invocations] + warnings = list(conv_result.warnings) trace = trace_map.get(conv_result.trace_id) meta = TraceConversionMetadata() @@ -371,7 +381,7 @@ async def convert_trace_files( TraceConversionEntry( trace_id=conv_result.trace_id, invocations=invocations, - warnings=conv_result.warnings, + warnings=warnings, metadata=meta, ) ) diff --git a/ui/src/lib/evalset-builder.ts b/ui/src/lib/evalset-builder.ts index d77379a..99bf9f9 100644 --- a/ui/src/lib/evalset-builder.ts +++ b/ui/src/lib/evalset-builder.ts @@ -1,24 +1,5 @@ import type { Invocation, EvalSet, EvalCase } from './types'; - -/** - * Convert camelCase keys to snake_case recursively - */ -function convertCamelToSnake(obj: any): any { - if (Array.isArray(obj)) { - return obj.map(convertCamelToSnake); - } - - if (obj !== null && typeof obj === 'object') { - const converted: any = {}; - for (const [key, value] of Object.entries(obj)) { - const snakeKey = key.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`); - converted[snakeKey] = convertCamelToSnake(value); - } - return converted; - } - - return obj; -} +import { convertCamelToSnake } from './utils'; /** * Generate an EvalSet from pre-converted invocations (backend is source of truth). diff --git a/ui/src/lib/trace-helpers.ts b/ui/src/lib/trace-helpers.ts index b225ea7..c375ca9 100644 --- a/ui/src/lib/trace-helpers.ts +++ b/ui/src/lib/trace-helpers.ts @@ -5,6 +5,16 @@ export const ADK_SCOPE = 'gcp.vertex.agent'; export const USER_ROLES = ['user', 'human']; export const ASSISTANT_ROLES = ['assistant', 'model', 'ai']; +function isGenAISpan(span: Span): boolean { + return !!( + span.tags['gen_ai.request.model'] || + span.tags['gen_ai.system'] || + span.tags['gen_ai.input.messages'] || + span.tags['gen_ai.prompt'] || + span.tags['gen_ai.request.messages'] + ); +} + export function detectTraceFormat(trace: Trace): 'adk' | 'genai' { const check = (spans: Span[]): 'adk' | 'genai' | null => { let hasGenai = false; @@ -12,7 +22,7 @@ export function detectTraceFormat(trace: Trace): 'adk' | 'genai' { if (span.tags['otel.scope.name'] === ADK_SCOPE) { return 'adk'; } - if (!hasGenai && (span.tags['gen_ai.request.model'] || span.tags['gen_ai.input.messages'])) { + if (!hasGenai && isGenAISpan(span)) { hasGenai = true; } } @@ -52,7 +62,7 @@ export function findDescendantLLMSpans(root: Span): Span[] { while (queue.length > 0) { const span = queue.shift()!; - if (span.tags['gen_ai.request.model'] || span.tags['gen_ai.input.messages']) { + if (isGenAISpan(span)) { results.push(span); } queue.push(...span.children);