@@ -37,6 +37,19 @@ const MOCK_NOT_NULLISH = Symbol('not-nullish')
3737 * }} ExpectedLLMObsSpanEvent
3838 */
3939
40+ /**
41+ * @typedef {{
42+ * label: string,
43+ * traceId: string,
44+ * spanId: string,
45+ * metricType: 'categorical' | 'score',
46+ * mlApp: string,
47+ * timestamp?: number,
48+ * value: string | number,
49+ * tags?: { [key: string]: string },
50+ * }} ExpectedLLMObsEvaluationMetrics
51+ */
52+
4053/**
4154 *
4255 * @param {object } actual
@@ -202,6 +215,11 @@ function assertLlmObsSpanEvent (actual, expected) {
202215 const expectedTags = expectedLLMObsTags ( { span, tags, error, sessionId } )
203216 const sortedExpectedTags = [ ...expectedTags . sort ( ) ]
204217 const sortedActualTags = [ ...actualTags . sort ( ) ]
218+ if ( sortedExpectedTags . length !== sortedActualTags . length ) {
219+ assert . fail (
220+ `tags have different length than expected (${ sortedExpectedTags . length } !== ${ sortedActualTags . length } )`
221+ )
222+ }
205223 for ( let i = 0 ; i < sortedExpectedTags . length ; i ++ ) {
206224 assert . equal (
207225 sortedActualTags [ i ] ,
@@ -251,6 +269,66 @@ function assertLlmObsSpanEvent (actual, expected) {
251269 assert . deepStrictEqual ( actual , expectedSpanEvent )
252270}
253271
272+ /**
273+ * Asserts that the actual LLMObs evaluation metric matches the evaluation metric created from the expected fields.
274+ *
275+ * Dynamic fields, like tags and timestamp, can be asserted with mock values.
276+ * @param {object } actual
277+ * @param {ExpectedLLMObsEvaluationMetrics } expected
278+ */
279+ function assertLlmObsEvaluationMetric ( actual , expected ) {
280+ const {
281+ label,
282+ traceId = MOCK_STRING ,
283+ spanId = MOCK_STRING ,
284+ metricType,
285+ mlApp,
286+ timestamp = MOCK_NUMBER ,
287+ value,
288+ tags
289+ } = expected
290+
291+ const actualTags = actual . tags
292+ const actualTimestamp = actual . timestamp_ms
293+
294+ delete actual . tags
295+ delete actual . timestamp_ms
296+
297+ assertWithMockValues ( actualTimestamp , timestamp , 'timestamp_ms' )
298+
299+ const expectedTags = [
300+ `ddtrace.version:${ tracerVersion } ` ,
301+ `ml_app:${ mlApp } ` ,
302+ ...Object . entries ( tags ?? { } ) . map ( ( [ key , value ] ) => `${ key } :${ value } ` ) ,
303+ ]
304+ const sortedExpectedTags = [ ...expectedTags . sort ( ) ]
305+ const sortedActualTags = [ ...actualTags . sort ( ) ]
306+ if ( sortedExpectedTags . length !== sortedActualTags . length ) {
307+ assert . fail (
308+ `Tags have different length than expected (${ sortedExpectedTags . length } !== ${ sortedActualTags . length } ).
309+ Diff: ${ util . inspect ( sortedExpectedTags ) } !== ${ util . inspect ( sortedActualTags ) } .`
310+ )
311+ }
312+ for ( let i = 0 ; i < sortedExpectedTags . length ; i ++ ) {
313+ assert . equal (
314+ sortedActualTags [ i ] ,
315+ sortedExpectedTags [ i ] ,
316+ `tags[${ i } ] does not match expected (${ sortedExpectedTags [ i ] } !== ${ sortedActualTags [ i ] } )`
317+ )
318+ }
319+
320+ const expectedEvaluationMetric = {
321+ span_id : spanId ,
322+ trace_id : traceId ,
323+ label,
324+ metric_type : metricType ,
325+ ml_app : mlApp ,
326+ [ `${ metricType } _value` ] : value ,
327+ }
328+
329+ assert . deepStrictEqual ( actual , expectedEvaluationMetric )
330+ }
331+
254332function expectedLLMObsTags ( {
255333 span,
256334 error,
@@ -294,7 +372,10 @@ function fromBuffer (spanProperty, isNumber = false) {
294372 * @param {string } options.plugin
295373 * @param {object } options.tracerConfigOptions
296374 * @param {object } options.closeOptions
297- * @returns {function(): Promise<{ apmSpans: Array, llmobsSpans: Array }> }
375+ * @returns {{
376+ * getEvents: () => Promise<{ apmSpans: Array<object>, llmobsSpans: Array<object> }>,
377+ * getEvaluationMetrics: () => Promise<Array<ExpectedLLMObsEvaluationMetrics>>
378+ * }}
298379 */
299380function useLlmObs ( {
300381 plugin,
@@ -332,24 +413,33 @@ function useLlmObs ({
332413 return agent . close ( { ritmReset : false , ...closeOptions } )
333414 } )
334415
335- return async function ( numLlmObsSpans = 1 ) {
336- // get apm spans from the agent
337- const apmSpans = await apmTracesPromise
338- resetTracesPromises ( )
339-
340- // get llmobs span events requests from the agent
341- // because llmobs process spans on span finish and submits periodically,
342- // we need to aggregate all of the span events
343- // tests should know how many spans they expect to see, otherwise tests will timeout
344- const llmobsSpans = [ ]
345-
346- while ( llmobsSpans . length < numLlmObsSpans ) {
347- await new Promise ( resolve => setImmediate ( resolve ) )
348- const llmobsSpanEventsRequests = agent . getLlmObsSpanEventsRequests ( true )
349- llmobsSpans . push ( ...getLlmObsSpansFromRequests ( llmobsSpanEventsRequests ) )
416+ return {
417+ getEvents : async function ( numLlmObsSpans = 1 ) {
418+ // get apm spans from the agent
419+ const apmSpans = await apmTracesPromise
420+ resetTracesPromises ( )
421+
422+ // get llmobs span events requests from the agent
423+ // because llmobs process spans on span finish and submits periodically,
424+ // we need to aggregate all of the span events
425+ // tests should know how many spans they expect to see, otherwise tests will timeout
426+ const llmobsSpans = [ ]
427+
428+ while ( llmobsSpans . length < numLlmObsSpans ) {
429+ await new Promise ( resolve => setImmediate ( resolve ) )
430+ const llmobsSpanEventsRequests = agent . getLlmObsSpanEventsRequests ( true )
431+ llmobsSpans . push ( ...getLlmObsSpansFromRequests ( llmobsSpanEventsRequests ) )
432+ }
433+
434+ return { apmSpans, llmobsSpans : llmobsSpans . sort ( ( a , b ) => a . start_ns - b . start_ns ) }
435+ } ,
436+
437+ getEvaluationMetrics : function ( ) {
438+ const evaluationMetricsRequests = agent . getLlmObsEvaluationMetricsRequests ( true )
439+ return evaluationMetricsRequests
440+ . flatMap ( request => request . data . attributes . metrics )
441+ . sort ( ( a , b ) => a . timestamp_ms - b . timestamp_ms )
350442 }
351-
352- return { apmSpans, llmobsSpans : llmobsSpans . sort ( ( a , b ) => a . start_ns - b . start_ns ) }
353443 }
354444}
355445
@@ -405,6 +495,7 @@ function assertPromptTracking (
405495}
406496
407497module . exports = {
498+ assertLlmObsEvaluationMetric,
408499 assertLlmObsSpanEvent,
409500 assertPromptTracking,
410501 useLlmObs,
0 commit comments