rh-ecosystem-edge · andrej1991 · Sep 17, 2025 · coderabbitai · Sep 18, 2025
diff --git a/test/evals/eval.py b/test/evals/eval.py
@@ -88,7 +88,7 @@ def parse_args():
 result_summary = evaluator.get_result_summary()
 
 failed_evals_count = result_summary["FAIL"] + result_summary["ERROR"]
-if failed_evals_count > 2:
+if failed_evals_count:
     print(f"❌ {failed_evals_count} evaluation(s) failed!")
     sys.exit(1)
 

diff --git a/test/evals/eval_data.yaml b/test/evals/eval_data.yaml
@@ -40,8 +40,8 @@
   conversation:
     - eval_id: static_networking_support
       eval_query: Create a cluster with static networking
-      eval_types: [response_eval:sub-string]
-      expected_keywords: ["I do not support creating clusters with static networking", "assisted-installer web-based wizard"]
+      eval_types: [response_eval:accuracy]
+      expected_response: "I do not support creating clusters with static networking, however, you can use the assisted-installer web-based wizard to configure a cluster with static networking."
 
 - conversation_group: sno_requirements_conv
   conversation:
@@ -88,16 +88,16 @@
   conversation:
     - eval_id: create_eval_test_multinode
       eval_types: [tool_eval, response_eval:accuracy, response_eval:sub-string]
-      eval_query: Create a multi-node cluster named 'eval-test-multinode-uniq-cluster-name' with OpenShift 4.18.22 and domain test.local
+      eval_query: Create a multi-node cluster named 'eval-test-multinode-uniq-cluster-name' with OpenShift 4.18.22 and domain test.local.
       expected_tool_calls:
         - - tool_name: create_cluster
             arguments:
               name: "eval-test-multinode-uniq-cluster-name"
               version: "4\\.18\\.22"
               base_domain: "test\\.local"
               single_node: "(?i:false)"
-              cpu_architecture: None
-              ssh_public_key: None
+              cpu_architecture: "x86_64"
+              ssh_public_key: ""
       expected_keywords: ["eval-test-multinode-uniq-cluster-name", "ID", "Discovery ISO", "cluster"]
       expected_response: I have created a cluster with name eval-test-multinode-uniq-cluster-name. Next, you'll need to download the Discovery ISO, then boot your hosts with it. Would you like me to get the Discovery ISO download URL?
     - eval_id: set_ssh_key_eval_test_ssh
@@ -118,12 +118,8 @@
               cluster_id: "[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"
       expected_keywords: ["Discovery ISO"]
     - eval_id: host_booted_but_not_discovered
-      eval_query: I booted the hosts
-      eval_types: [tool_eval, response_eval:accuracy]
-      expected_tool_calls:
-        - - tool_name: cluster_info
-            arguments:
-              cluster_id: "[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"
+      eval_query: I booted the hosts for the cluster you just created
+      eval_types: [response_eval:accuracy]
       expected_response: "hosts hasn't been discovered yet."
 
 - conversation_group: list_clusters_conv
@@ -178,19 +174,3 @@
       expected_response: I can help you install OpenShift on-premise using the Assisted Installer, either on bare metal servers or virtual machines (e.g., vSphere, KVM, libvirt). I do not support public cloud platforms like AWS, Azure, or GCP.
       description: Ensure the assistant concisely states supported on-prem baremetal/VM scope and references Assisted Installer
 
-- conversation_group: cluster_id_from_name
-  conversation:
-    - eval_id: create_single_node_cluser
-      eval_query: Create a multi-node cluster named 'eval-test2-uniq-cluster-name' with OpenShift 4.18.22 and domain test.local. I do not have an SSH key to provide.
-      eval_types: [response_eval:accuracy, response_eval:sub-string]
-      expected_keywords: ["eval-test2-uniq-cluster-name", "ID", "Discovery ISO", "download", "cluster"]
-      expected_response: I have created a cluster with name eval-test-uniq-cluster-name. Next, you'll need to download the Discovery ISO, then boot your hosts with it. Would you like me to get the Discovery ISO download URL?
-    - eval_id: cluster_name_tool_call
-      eval_query: Show me information on cluster eval-test2-uniq-cluster-name
-      eval_types: [tool_eval, response_eval:sub-string]
-      expected_tool_calls:
-        - - tool_name: cluster_info
-            arguments:
-              cluster_id: "[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"
-      expected_keywords: ["cluster", "eval-test2-uniq-cluster-name", "test.local", "4.18.22"]
-      description: Test handling requesting a cluster by name