elastic · benwtrent · Mar 24, 2025 · Oct 29, 2024 · Dec 11, 2024 · Mar 4, 2025
diff --git a/it/test_all_tracks_and_challenges.py b/it/test_all_tracks_and_challenges.py
@@ -21,7 +21,7 @@
 
 
 class TestTrackRepository:
-    skip_tracks = ["elastic/logs", "elastic/security", "k8s_metrics", "sql", "elser-ingest-speedtest", "msmarco-v2-vector"]
+    skip_tracks = ["elastic/logs", "elastic/security", "k8s_metrics", "sql", "elser-ingest-speedtest", "msmarco-v2-vector", "openai_vector"]
     disable_assertions = {
         "http_logs": ["append-no-conflicts", "runtime-fields"],
         "nyc_taxis": ["update-aggs-only"],

diff --git a/openai_vector/README.md b/openai_vector/README.md
@@ -51,6 +51,7 @@ This track accepts the following parameters with Rally 0.8.0+ using `--track-par
 - standalone_search_clients (default: 8)
 - standalone_search_iterations (default: 10000)
 - vector_index_type (default: "hnsw"): The index kind for storing the vectors.
+- vector_ops (default: [[10, 20, 0], [10, 20, 1], [10, 20, 2], [10, 50, 1], [10, 50, 2], [10, 100, 1], [100, 120, 1], [100, 120, 2], [100, 200, 1], [100, 200, 2], [100, 500, 1], [100, 500, 2]]): The vector search operations, formattied [k, num_candidates, oversample], where `oversample` indicates the ratio of extra `k` to gather and then rescore.
 
 ### License
 

diff --git a/openai_vector/challenges/default.json b/openai_vector/challenges/default.json
@@ -42,61 +42,56 @@
         "retry-until-success": true,
         "include-in-reporting": false
       }
-    },
+    }
     {# serverless-post-ingest-sleep-marker-start #}{%- if post_ingest_sleep|default(false) -%}
-    {
+    ,{
       "name": "post-ingest-sleep",
       "operation": {
         "operation-type": "sleep",
         "duration": {{ post_ingest_sleep_duration|default(30) }}
       }
-    },
+    }
     {%- endif -%}{# serverless-post-ingest-sleep-marker-end #}
+    {%- for i in range(p_search_ops|length) %},
     {
-      "name": "standalone-search-knn-10-100-single-client",
-      "operation": "knn-search-10-100",
-      "warmup-iterations": 100,
-      "iterations": {{ standalone_search_iterations | default(10000) | int }}
-    },
-    {
-      "name": "standalone-knn-search-100-1000-single-client",
-      "operation": "knn-search-100-1000",
-      "warmup-iterations": 100,
+      {%- if p_search_ops[i][2] > 0 -%}
+        "name": "standalone-search-knn-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-{{p_search_ops[i][2]}}-single-client",
+        "operation": "knn-search-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-{{p_search_ops[i][2]}}"
+      {%- else -%}
+        "name": "standalone-search-knn-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-single-client",
+        "operation": "knn-search-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}"
+      {%- endif -%},
+      "warmup-iterations": 1000,
       "iterations": {{ standalone_search_iterations | default(10000) | int }}
     },
     {
-      "name": "standalone-search-knn-10-100-multiple-clients",
-      "operation": "knn-search-10-100",
-      "warmup-iterations": 100,
+      {%- if p_search_ops[i][2] > 0 -%}
+        "name": "standalone-search-knn-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-{{p_search_ops[i][2]}}-multiple-clients",
+        "operation": "knn-search-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-{{p_search_ops[i][2]}}"
+      {%- else -%}
+        "name": "standalone-search-knn-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-multiple-clients",
+        "operation": "knn-search-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}"
+      {%- endif -%},
+      "warmup-iterations": 1000,
       "clients": {{ standalone_search_clients | default(8) | int }},
       "iterations": {{ standalone_search_iterations | default(10000) | int }}
-    },
+    }
+    {%- endfor %},
     {
-      "name": "standalone-search-knn-100-1000-multiple-clients",
-      "operation": "knn-search-100-1000",
-      "warmup-iterations": 100,
-      "clients": {{ standalone_search_clients | default(8) | int }},
-      "iterations": {{ standalone_search_iterations | default(10000) | int }}
-    },
+      "name": "parallel-documents-indexing-bulk",
+      "operation": "parallel-documents-indexing",
+      "warmup-time-period": 60,
+      "clients": {{ parallel_indexing_bulk_clients | default(1) | int }},
+      "target-throughput": {{ parallel_indexing_bulk_target_throughput | default(1) | int }}
+    }
+    {%- for i in range(p_search_ops|length) %},
     {
-      "parallel": {
-        "tasks": [
-          {
-            "name": "parallel-documents-indexing-bulk",
-            "operation": "parallel-documents-indexing",
-            "clients": {{ parallel_indexing_bulk_clients | default(1) | int }},
-            "time-period": {{ parallel_indexing_time_period | default(1800) | int }},
-            "target-throughput": {{ parallel_indexing_bulk_target_throughput | default(1) | int }}
-          },
-          {
-            "name": "parallel-documents-indexing-search-knn-10-100",
-            "operation": "knn-search-10-100",
-            "clients": {{ parallel_indexing_search_clients | default(3) | int }},
-            "time-period": {{ parallel_indexing_time_period | default(1800) | int }},
-            "target-throughput": {{ parallel_indexing_search_target_throughput | default(100) | int }}
-          }
-        ]
-      }
+      {%- if p_search_ops[i][2] > 0 -%}
+        "operation": "knn-recall-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-{{p_search_ops[i][2]}}"
+      {%- else -%}
+        "operation": "knn-recall-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}"
+      {%- endif -%}
     }
+    {%- endfor %}
   ]
 }
diff --git a/openai_vector/index-vectors-only-mapping.json b/openai_vector/index-vectors-only-mapping.json
@@ -2,7 +2,7 @@
   "settings": {
     {# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%}
       {% if preload_pagecache %}
-    "index.store.preload": [ "vec", "vex", "vem"],
+    "index.store.preload": [ "vec", "vex", "vem", "veq", "veqm", "veb", "vebm"],
       {% endif %}
     "index.number_of_shards": {{number_of_shards | default(1)}},
     "index.number_of_replicas": {{number_of_replicas | default(0)}}
@@ -21,7 +21,7 @@
         "index": true,
         "similarity": "dot_product",
         "index_options": {
-          "type": {{ vector_index_type | default("hnsw") | tojson }}
+          "type": {{ vector_index_type | default("int8_hnsw") | tojson }}
         }
       }
     }

diff --git a/openai_vector/index-vectors-only-with-docid-mapping.json b/openai_vector/index-vectors-only-with-docid-mapping.json
@@ -0,0 +1,29 @@
+{
+  "settings": {
+    {# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%}
+      {% if preload_pagecache %}
+    "index.store.preload": [ "vec", "vex", "vem", "veq", "veqm", "veb", "vebm"],
+      {% endif %}
+    "index.number_of_shards": {{number_of_shards | default(1)}},
+    "index.number_of_replicas": {{number_of_replicas | default(0)}}
+    {%- endif -%}{# non-serverless-index-settings-marker-end #}
+  },
+  "mappings": {
+    "dynamic": false,
+    "properties": {
+      "docid": {
+        "type": "keyword"
+      },
+      "emb": {
+        "type": "dense_vector",
+        "element_type": "float",
+        "dims": 1536,
+        "index": true,
+        "similarity": "dot_product",
+        "index_options": {
+          "type": {{ vector_index_type | default("int8_hnsw") | tojson }}
+        }
+      }
+    }
+  }
+}
diff --git a/openai_vector/index-vectors-with-text-mapping.json b/openai_vector/index-vectors-with-text-mapping.json
@@ -2,7 +2,7 @@
   "settings": {
     {# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%}
       {% if preload_pagecache %}
-    "index.store.preload": [ "vec", "vex", "vem"],
+    "index.store.preload": [ "vec", "vex", "vem", "veq", "veqm", "veb", "vebm"],
       {% endif %}
     "index.number_of_shards": {{number_of_shards | default(1)}},
     "index.number_of_replicas": {{number_of_replicas | default(0)}}
@@ -26,7 +26,7 @@
         "index": true,
         "similarity": "dot_product",
         "index_options": {
-          "type": {{ vector_index_type | default("hnsw") | tojson }}
+          "type": {{ vector_index_type | default("int8_hnsw") | tojson }}
         }
       }
     }

diff --git a/openai_vector/open_ai_true_top_1000.json.bz2 b/openai_vector/open_ai_true_top_1000.json.bz2
diff --git a/openai_vector/operations/default.json b/openai_vector/operations/default.json
@@ -24,18 +24,31 @@
   "corpora": "openai-parallel-indexing",
   "bulk-size": {{parallel_indexing_bulk_size | default(500)}},
   "ingest-percentage": {{parallel_indexing_ingest_percentage | default(100)}}
-},
+}
+{%- set p_search_ops = (search_ops | default([(10, 20, 0), (10, 20, 1), (10, 20, 2), (10, 50, 1), (10, 50, 2), (10, 100, 1), (100, 120, 1), (100, 120, 2), (100, 200, 1), (100, 200, 2), (100, 500, 1), (100, 500, 2)]))%}
+{%- for i in range(p_search_ops|length) %},
 {
-  "name": "knn-search-10-100",
+  {%- if p_search_ops[i][2] > 0 -%}
+    "name": "knn-search-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-{{p_search_ops[i][2]}}"
+  {%- else -%}
+    "name": "knn-search-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}"
+  {%- endif -%},
   "operation-type": "search",
   "param-source": "knn-param-source",
-  "k": 10,
-  "num-candidates": 100
+  "k": {{p_search_ops[i][0]}},
+  "num-candidates": {{p_search_ops[i][1]}},
+  "oversample": {{p_search_ops[i][2]}}
 },
 {
-  "name": "knn-search-100-1000",
-  "operation-type": "search",
-  "param-source": "knn-param-source",
-  "k": 100,
-  "num-candidates": 1000
+  {%- if p_search_ops[i][2] > 0 -%}
+    "name": "knn-recall-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}-{{p_search_ops[i][2]}}"
+  {%- else -%}
+    "name": "knn-recall-{{p_search_ops[i][0]}}-{{p_search_ops[i][1]}}"
+  {%- endif -%},
+  "operation-type": "knn-recall",
+  "param-source": "knn-recall-param-source",
+  "k": {{p_search_ops[i][0]}},
+  "num-candidates": {{p_search_ops[i][1]}},
+  "oversample": {{p_search_ops[i][2]}}
 }
+{%- endfor %}
diff --git a/openai_vector/track.json b/openai_vector/track.json
@@ -6,7 +6,7 @@
   "indices": [
     {
       "name": "openai",
-      "body": "index-{{ mapping_type | default("vectors-only") }}-mapping.json"
+      "body": "index-{{ mapping_type | default("vectors-only-with-docid") }}-mapping.json"
     }
   ],
   "corpora": [