55"""
66
77import time
8- from typing import Any , Dict , List , Optional , Union
8+ from typing import Any , Dict , List , Optional , Tuple , Union
99
1010from .core import ConfigLoader , DataValidator , EvaluationData , EvaluationResult , TurnData
1111from .llm_managers .llm_manager import LLMManager
@@ -80,7 +80,9 @@ def __init__(self, llm_manager: LLMManager):
8080 "custom" : self .custom_metrics ,
8181 }
8282
83- def evaluate_metric (self , framework : str , metric_name : str , request : EvaluationRequest ):
83+ def evaluate_metric (
84+ self , framework : str , metric_name : str , request : EvaluationRequest
85+ ) -> Tuple [Optional [float ], str ]:
8486 """Route evaluation to appropriate handler."""
8587 if framework in self .handlers :
8688 # Create shared EvaluationScope
@@ -155,7 +157,7 @@ def run_evaluation(self, evaluation_data: List[EvaluationData]) -> List[Evaluati
155157 print (f"\n ✅ Evaluation complete: { len (self .results )} results generated" )
156158 return self .results
157159
158- def _process_conversation (self , conv_data : EvaluationData ):
160+ def _process_conversation (self , conv_data : EvaluationData ) -> None :
159161 """Process single conversation - handle turn and conversation level metrics."""
160162 print (f"\n 📋 Evaluating: { conv_data .conversation_group_id } " )
161163
@@ -175,15 +177,15 @@ def _process_conversation(self, conv_data: EvaluationData):
175177 print (f"🗣️ Conversation-level metrics: { conv_data .conversation_metrics } " )
176178 self ._evaluate_conversation (conv_data )
177179
178- def _evaluate_turn (self , conv_data : EvaluationData , turn_idx : int , turn_data : TurnData ):
180+ def _evaluate_turn (self , conv_data : EvaluationData , turn_idx : int , turn_data : TurnData ) -> None :
179181 """Evaluate single turn with specified turn metrics."""
180182 for metric_identifier in conv_data .turn_metrics :
181183 request = EvaluationRequest .for_turn (conv_data , metric_identifier , turn_idx , turn_data )
182184 result = self ._evaluate_metric (request )
183185 if result :
184186 self .results .append (result )
185187
186- def _evaluate_conversation (self , conv_data : EvaluationData ):
188+ def _evaluate_conversation (self , conv_data : EvaluationData ) -> None :
187189 """Evaluate conversation-level metrics."""
188190 for metric_identifier in conv_data .conversation_metrics :
189191 request = EvaluationRequest .for_conversation (conv_data , metric_identifier )
0 commit comments