Skip to content

Commit c3ecaf6

Browse files
authored
feat: LLM metrics for non-streaming requests in frontend (#2427)
1 parent 72ec5f5 commit c3ecaf6

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

lib/llm/src/http/service/openai.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,11 @@ async fn completions(
290290

291291
Ok(sse_stream.into_response())
292292
} else {
293-
// TODO: report ISL/OSL for non-streaming requests
293+
// Tap the stream to collect metrics for non-streaming requests without altering items
294+
let stream = stream.inspect(move |response| {
295+
process_metrics_only(response, &mut response_collector);
296+
});
297+
294298
let response = NvCreateCompletionResponse::from_annotated_stream(stream)
295299
.await
296300
.map_err(|e| {
@@ -515,7 +519,10 @@ async fn chat_completions(
515519

516520
Ok(sse_stream.into_response())
517521
} else {
518-
// TODO: report ISL/OSL for non-streaming requests
522+
let stream = stream.inspect(move |response| {
523+
process_metrics_only(response, &mut response_collector);
524+
});
525+
519526
let response = NvCreateChatCompletionResponse::from_annotated_stream(stream)
520527
.await
521528
.map_err(|e| {
@@ -911,6 +918,17 @@ impl<T> From<Annotated<T>> for EventConverter<T> {
911918
}
912919
}
913920

921+
fn process_metrics_only<T>(
922+
annotated: &Annotated<T>,
923+
response_collector: &mut ResponseMetricCollector,
924+
) {
925+
// update metrics
926+
if let Ok(Some(metrics)) = LLMMetricAnnotation::from_annotation(annotated) {
927+
response_collector.observe_current_osl(metrics.output_tokens);
928+
response_collector.observe_response(metrics.input_tokens, metrics.chunk_tokens);
929+
}
930+
}
931+
914932
fn process_event_converter<T: Serialize>(
915933
annotated: EventConverter<T>,
916934
response_collector: &mut ResponseMetricCollector,

0 commit comments

Comments
 (0)