lightspeed-core · tisnik · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025
diff --git a/config/evaluation_data.yaml b/config/evaluation_data.yaml
@@ -7,66 +7,67 @@
     - "ragas:faithfulness"
     - "ragas:response_relevancy"
     - "ragas:context_precision_without_reference"
-  
-  turn_metrics_metadata: 
-    "ragas:faithfulness": 
+
+  turn_metrics_metadata:
+    "ragas:faithfulness":
       threshold: 0.99
   conversation_metrics: []
   conversation_metrics_metadata: {}
-  
+
   turns:
-    - turn_id: 1
+    - turn_id: "1"
       query: "User query"
       response: "API response"
       contexts:
-        - content: "Context 1"
-        - content: "Context 2"
+        - "Context 1"
+        - "Context 2"
       expected_response: "Expected Response"
 
 - conversation_group_id: "conv_group_2"
   description: "conversation group description"
 
   turn_metrics:
     - "ragas:context_recall"
-    - "ragas:context_relevance" 
+    - "ragas:context_relevance"
     - "ragas:context_precision_with_reference"
-  
+
   turn_metrics_metadata: {}
   conversation_metrics: []
   conversation_metrics_metadata: {}
-  
+
   turns:
-    - turn_id: 1
+    - turn_id: "1"
       query: "User Query"
       response: "API Response"
       contexts:
-        - content: "Context 1"
+        - "Context 1"
       expected_response: "Expected Response"
 
 - conversation_group_id: "conv_group_3"
-
+  description: "conversation group description"
+
   turn_metrics:
     - "custom:answer_correctness"
-  
+
   turn_metrics_metadata: {}
-  
+
   conversation_metrics:
     - "deepeval:conversation_completeness"
     - "deepeval:conversation_relevancy"
-  
+
   conversation_metrics_metadata: {}
-  
+
   turns:
-    - turn_id: 1
+    - turn_id: "1"
       query: "User Query 1"
       response: "API Response 1"
       contexts:
-        - content: "Context"
+        - "Context"
       expected_response: "Expected Response 1"
-    
-    - turn_id: 2
+
+    - turn_id: "2"
       query: "User Query 2"
       response: "API Response 2"
       contexts:
-        - content: "Context"
+        - "Context"
       expected_response: "Expected Response 2"
diff --git a/config/system.yaml b/config/system.yaml
@@ -34,47 +34,47 @@ metrics_metadata:
     "ragas:faithfulness":
       threshold: 0.8
       description: "How faithful the response is to the provided context"
-      
+
     "ragas:response_relevancy":
       threshold: 0.8
       description: "How relevant the response is to the question"
-    
+
     # Ragas Context/Retrieval Evaluation metrics
     "ragas:context_recall":
       threshold: 0.8
       description: "Did we fetch every fact the answer needs?"
-      
+
     "ragas:context_relevance":
       threshold: 0.7
       description: "Is what we retrieved actually relevant to user query?"
-      
+
     "ragas:context_precision_with_reference":
       threshold: 0.7
       description: "How precise the retrieved context is (with reference)"
-      
+
     "ragas:context_precision_without_reference":
       threshold: 0.7
       description: "How precise the retrieved context is (without reference)"
-    
+
     # Custom metrics
     "custom:answer_correctness":
       threshold: 0.75
       description: "Correctness vs expected answer using custom LLM evaluation"
 
     "custom:tool_eval":
       description: "Tool call evaluation comparing expected vs actual tool calls"
-  
+
   # Conversation-level metrics metadata
   conversation_level:
     # DeepEval metrics
     "deepeval:conversation_completeness":
       threshold: 0.8
       description: "How completely the conversation addresses user intentions"
-      
+
     "deepeval:conversation_relevancy":
       threshold: 0.7
       description: "How relevant the conversation is to the topic/context"
-      
+
     "deepeval:knowledge_retention":
       threshold: 0.7
       description: "How well the model retains information from previous turns"
@@ -87,11 +87,11 @@ output:
     - csv                   # Detailed results CSV
     - json                  # Summary JSON with statistics
     - txt                   # Human-readable summary
-  
+
   # CSV columns to include
   csv_columns:
     - "conversation_group_id"
-    - "turn_id" 
+    - "turn_id"
     - "metric_identifier"
     - "score"
     - "threshold"
@@ -105,7 +105,7 @@ output:
 visualization:
   figsize: [12, 8]            # Graph size (width, height)
   dpi: 300                    # Image resolution
-  
+
   # Graph types to generate
   enabled_graphs:
     - "pass_rates"            # Pass rate bar chart