Skip to content

Commit 96e39ba

Browse files
committed
removing backward compatibility for turn data + coderabbit fixes
1 parent 9fd462d commit 96e39ba

File tree

2 files changed

+28
-20
lines changed

2 files changed

+28
-20
lines changed

src/lightspeed_evaluation/core/metrics/deepeval.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,13 @@ def evaluate(
119119
return None, f"DeepEval {metric_name} evaluation failed: {str(e)}"
120120

121121
# Otherwise, assume it's a GEval metric
122-
# Note: metric_name should NOT have "geval:" prefix here
122+
normalized_metric_name = (
123+
metric_name.split(":", 1)[1]
124+
if metric_name.startswith("geval:")
125+
else metric_name
126+
)
123127
return self.geval_handler.evaluate(
124-
metric_name=metric_name,
128+
metric_name=normalized_metric_name,
125129
conv_data=conv_data,
126130
_turn_idx=scope.turn_idx,
127131
turn_data=scope.turn_data,

src/lightspeed_evaluation/core/metrics/geval.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -285,12 +285,6 @@ def _evaluate_turn( # pylint: disable=R0913,R0917
285285

286286
# Convert evaluation_params to enum values if valid, otherwise use defaults
287287
converted_params = self._convert_evaluation_params(evaluation_params)
288-
if not converted_params:
289-
# If no valid params, use sensible defaults for turn evaluation
290-
converted_params = [
291-
LLMTestCaseParams.INPUT,
292-
LLMTestCaseParams.ACTUAL_OUTPUT,
293-
]
294288

295289
# Create GEval metric with runtime configuration
296290
metric_kwargs: dict[str, Any] = {
@@ -302,6 +296,18 @@ def _evaluate_turn( # pylint: disable=R0913,R0917
302296
"top_logprobs": 5,
303297
}
304298

299+
# Only set evaluation_params if we have valid enum conversions
300+
# or if no params were provided at all (then use defaults)
301+
if converted_params is None:
302+
if not evaluation_params:
303+
metric_kwargs["evaluation_params"] = [
304+
LLMTestCaseParams.INPUT,
305+
LLMTestCaseParams.ACTUAL_OUTPUT,
306+
]
307+
# else: leave unset so GEval can auto-detect from custom strings
308+
else:
309+
metric_kwargs["evaluation_params"] = converted_params
310+
305311
# Add evaluation steps if provided
306312
if evaluation_steps:
307313
metric_kwargs["evaluation_steps"] = evaluation_steps
@@ -320,12 +326,7 @@ def _evaluate_turn( # pylint: disable=R0913,R0917
320326
test_case_kwargs["expected_output"] = turn_data.expected_response
321327

322328
if turn_data.contexts:
323-
# Normalize contexts: handle both dict and string formats
324-
normalized_contexts = [
325-
ctx.get("content", str(ctx)) if isinstance(ctx, dict) else str(ctx)
326-
for ctx in turn_data.contexts
327-
]
328-
test_case_kwargs["context"] = normalized_contexts
329+
test_case_kwargs["context"] = turn_data.contexts
329330

330331
# Create test case for a single turn
331332
test_case = LLMTestCase(**test_case_kwargs)
@@ -385,12 +386,6 @@ def _evaluate_conversation( # pylint: disable=R0913,R0917,R0914
385386
"""
386387
# Convert evaluation_params to enum values if valid, otherwise use defaults
387388
converted_params = self._convert_evaluation_params(evaluation_params)
388-
if not converted_params:
389-
# If no valid params, use sensible defaults for conversation evaluation
390-
converted_params = [
391-
LLMTestCaseParams.INPUT,
392-
LLMTestCaseParams.ACTUAL_OUTPUT,
393-
]
394389

395390
# Configure the GEval metric for conversation-level evaluation
396391
metric_kwargs: dict[str, Any] = {
@@ -402,6 +397,15 @@ def _evaluate_conversation( # pylint: disable=R0913,R0917,R0914
402397
"top_logprobs": 5, # Vertex/Gemini throws an error if over 20.
403398
}
404399

400+
if converted_params is None:
401+
if not evaluation_params:
402+
metric_kwargs["evaluation_params"] = [
403+
LLMTestCaseParams.INPUT,
404+
LLMTestCaseParams.ACTUAL_OUTPUT,
405+
]
406+
else:
407+
metric_kwargs["evaluation_params"] = converted_params
408+
405409
# Add evaluation steps if provided
406410
if evaluation_steps:
407411
metric_kwargs["evaluation_steps"] = evaluation_steps

0 commit comments

Comments
 (0)