@@ -59,6 +59,41 @@ pub use crate::protocols::common::llm_backend::{BackendOutput, PreprocessedReque
5959
6060pub const ANNOTATION_FORMATTED_PROMPT : & str = "formatted_prompt" ;
6161pub const ANNOTATION_TOKEN_IDS : & str = "token_ids" ;
62+ pub const ANNOTATION_LLM_METRICS : & str = "llm_metrics" ;
63+ #[ derive( Debug , Clone , serde:: Serialize , serde:: Deserialize ) ]
64+ pub struct LLMMetricAnnotation {
65+ pub input_tokens : usize ,
66+ pub output_tokens : usize ,
67+ pub chunk_tokens : usize ,
68+ }
69+
70+ impl LLMMetricAnnotation {
71+ /// Convert this metrics struct to an Annotated event
72+ pub fn to_annotation < T > ( & self ) -> Result < Annotated < T > , serde_json:: Error > {
73+ Annotated :: from_annotation ( ANNOTATION_LLM_METRICS , self )
74+ }
75+
76+ /// Extract LLM metrics from an Annotated event, if present
77+ pub fn from_annotation < T > (
78+ annotation : & Annotated < T > ,
79+ ) -> Result < Option < LLMMetricAnnotation > , Box < dyn std:: error:: Error > > {
80+ if annotation. event . is_none ( ) {
81+ return Ok ( None ) ;
82+ }
83+ if annotation. event . as_ref ( ) . unwrap ( ) != ANNOTATION_LLM_METRICS {
84+ return Ok ( None ) ;
85+ }
86+ let comments = annotation
87+ . comment
88+ . as_ref ( )
89+ . ok_or ( "missing comments block" ) ?;
90+ if comments. len ( ) != 1 {
91+ return Err ( "malformed comments block - expected exactly 1 comment" . into ( ) ) ;
92+ }
93+ let metrics: LLMMetricAnnotation = serde_json:: from_str ( & comments[ 0 ] ) ?;
94+ Ok ( Some ( metrics) )
95+ }
96+ }
6297
6398pub struct OpenAIPreprocessor {
6499 mdcsum : String ,
@@ -251,9 +286,20 @@ impl OpenAIPreprocessor {
251286 . map_err ( |e| e. to_string ( ) )
252287 } ) ;
253288
254- response. chunk_tokens = Some ( chunk_tokens) ;
255- response. input_tokens = Some ( isl) ;
256- response. output_tokens = Some ( current_osl) ;
289+ // Create LLM metrics annotation
290+ let llm_metrics = LLMMetricAnnotation {
291+ input_tokens : isl,
292+ output_tokens : current_osl,
293+ chunk_tokens,
294+ } ;
295+
296+ if let Ok ( metrics_annotated) = llm_metrics. to_annotation :: < ( ) > ( ) {
297+ // Only set event if not already set to avoid overriding existing events (like errors)
298+ if response. event . is_none ( ) {
299+ response. event = metrics_annotated. event ;
300+ }
301+ response. comment = metrics_annotated. comment ;
302+ }
257303
258304 tracing:: trace!(
259305 request_id = inner. context. id( ) ,
0 commit comments