removing backward compatibility for turn data + coderabbit fixes

arin-deloatch · arin-deloatch · commit 96e39ba1137c · 2025-11-11T14:25:15.000-07:00
diff --git a/src/lightspeed_evaluation/core/metrics/deepeval.py b/src/lightspeed_evaluation/core/metrics/deepeval.py
@@ -119,9 +119,13 @@ def evaluate(
                 return None, f"DeepEval {metric_name} evaluation failed: {str(e)}"
 
         # Otherwise, assume it's a GEval metric
-        # Note: metric_name should NOT have "geval:" prefix here
+        normalized_metric_name = (
+            metric_name.split(":", 1)[1]
+            if metric_name.startswith("geval:")
+            else metric_name
+        )
         return self.geval_handler.evaluate(
-            metric_name=metric_name,
+            metric_name=normalized_metric_name,
             conv_data=conv_data,
             _turn_idx=scope.turn_idx,
             turn_data=scope.turn_data,
diff --git a/src/lightspeed_evaluation/core/metrics/geval.py b/src/lightspeed_evaluation/core/metrics/geval.py
@@ -285,12 +285,6 @@ def _evaluate_turn(  # pylint: disable=R0913,R0917
 
         # Convert evaluation_params to enum values if valid, otherwise use defaults
         converted_params = self._convert_evaluation_params(evaluation_params)
-        if not converted_params:
-            # If no valid params, use sensible defaults for turn evaluation
-            converted_params = [
-                LLMTestCaseParams.INPUT,
-                LLMTestCaseParams.ACTUAL_OUTPUT,
-            ]
 
         # Create GEval metric with runtime configuration
         metric_kwargs: dict[str, Any] = {
@@ -302,6 +296,18 @@ def _evaluate_turn(  # pylint: disable=R0913,R0917
             "top_logprobs": 5,
         }
 
+        # Only set evaluation_params if we have valid enum conversions
+        # or if no params were provided at all (then use defaults)
+        if converted_params is None:
+            if not evaluation_params:
+                metric_kwargs["evaluation_params"] = [
+                    LLMTestCaseParams.INPUT,
+                    LLMTestCaseParams.ACTUAL_OUTPUT,
+                ]
+            # else: leave unset so GEval can auto-detect from custom strings
+        else:
+            metric_kwargs["evaluation_params"] = converted_params
+
         # Add evaluation steps if provided
         if evaluation_steps:
             metric_kwargs["evaluation_steps"] = evaluation_steps
@@ -320,12 +326,7 @@ def _evaluate_turn(  # pylint: disable=R0913,R0917
             test_case_kwargs["expected_output"] = turn_data.expected_response
 
         if turn_data.contexts:
-            # Normalize contexts: handle both dict and string formats
-            normalized_contexts = [
-                ctx.get("content", str(ctx)) if isinstance(ctx, dict) else str(ctx)
-                for ctx in turn_data.contexts
-            ]
-            test_case_kwargs["context"] = normalized_contexts
+            test_case_kwargs["context"] = turn_data.contexts
 
         # Create test case for a single turn
         test_case = LLMTestCase(**test_case_kwargs)
@@ -385,12 +386,6 @@ def _evaluate_conversation(  # pylint: disable=R0913,R0917,R0914
         """
         # Convert evaluation_params to enum values if valid, otherwise use defaults
         converted_params = self._convert_evaluation_params(evaluation_params)
-        if not converted_params:
-            # If no valid params, use sensible defaults for conversation evaluation
-            converted_params = [
-                LLMTestCaseParams.INPUT,
-                LLMTestCaseParams.ACTUAL_OUTPUT,
-            ]
 
         # Configure the GEval metric for conversation-level evaluation
         metric_kwargs: dict[str, Any] = {
@@ -402,6 +397,15 @@ def _evaluate_conversation(  # pylint: disable=R0913,R0917,R0914
             "top_logprobs": 5,  # Vertex/Gemini throws an error if over 20.
         }
 
+        if converted_params is None:
+            if not evaluation_params:
+                metric_kwargs["evaluation_params"] = [
+                    LLMTestCaseParams.INPUT,
+                    LLMTestCaseParams.ACTUAL_OUTPUT,
+                ]
+        else:
+            metric_kwargs["evaluation_params"] = converted_params
+
         # Add evaluation steps if provided
         if evaluation_steps:
             metric_kwargs["evaluation_steps"] = evaluation_steps