Skip to content
40 changes: 40 additions & 0 deletions big5/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ curl -XPUT "http://localhost:9200/_cluster/settings" \
}'
```

### gRPC Operations Support

Limited gRPC support is provided for the big5 workload over an protobuf/gRPC transport. gRPC operations can be found in `operations/grpc.json` with new operations added as support is expanded for gRPC APIs in OpenSearch. All supported big5 operations can be run with the `big5/test_procedures/grpc/grpc-schedule.json` (`--test-procedure="grpc-big5"`). To benchmark with the gRPC transport ensure the `transport-grpc` plugin is installed on the cluster and enabled in settings. See the `transport-grpc` [README.md](https://github.com/opensearch-project/OpenSearch/tree/main/modules/transport-grpc#readme) for guidance on enabling and using this transport. Note that the gRPC transport starts on a seperate endpoint from the default REST API, specify this endpoint with `--grpc-target-hosts=<host:port>`.

### gRPC Operations Support

This workload includes limited gRPC/protobuf support for big5 operations. that provide an alternative query interface to OpenSearch. Find supported gRPC operations in `operations/grpc.json`.

- **grpc-index-append**: Bulk ingestion of big5 index
- **grpc-match-all**: Match all query.
- **grpc-term**: Simple term query on `log.file.path`.

### Parameters

This workload allows the following parameters to be specified using `--workload-params`:
Expand Down Expand Up @@ -299,6 +311,34 @@ Running ppl-terms-significant-2 [
------------------------------------------------------
```

#### gRPC Test Procedure

```bash
opensearch-benchmark run \
--pipeline=benchmark-only \
--workload-path="big5" \
--test-procedure="grpc-big5" \
--target-host=http://localhost:9200 \
--grpc-target-hosts=http://localhost:9400
```

```
[INFO] [Test Run ID]: 8a193e44-6a36-4dff-a516-5bc07c10d382
[INFO] Running test with workload [big5], test_procedure [grpc-big5] and cluster_config ['external'] with version [3.4.0-SNAPSHOT].

Running delete-index [100% done]
Running create-index [100% done]
Running check-cluster-health [100% done]
Running grpc-index-append [100% done]
Running refresh-after-index [100% done]
Running force-merge [100% done]
Running refresh-after-force-merge [100% done]
Running wait-until-merges-finish [100% done]
Running grpc-match-all [100% done]
Running grpc-term [100% done]
------------------------------------------------------
```

### Considerations when Using Larger Data Corpora

There are several points to note when carrying out performance runs using large data corpora:
Expand Down
31 changes: 31 additions & 0 deletions big5/operations/grpc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"name": "grpc-index-append",
"operation-type": "proto-bulk",
"bulk-size": {{bulk_size | default(500)}},
"ingest-percentage": {{ingest_percentage | default(100)}}
},
{
"name": "grpc-match-all",
"operation-type": "proto-search",
"index": "{{index_name | default('big5')}}",
"body": {
"query": {
"match_all": {}
}
}
},
{
"name": "grpc-term",
"operation-type": "proto-search",
"index": "{{index_name | default('big5')}}",
"request-timeout": 7200,
"body": {
"query": {
"term": {
"log.file.path": {
"value": "/var/log/messages/birdknight"
}
}
}
}
}
9 changes: 9 additions & 0 deletions big5/test_procedures/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
{{ benchmark.collect(parts="common/big5-schedule.json") }}
]
},
{
"name": "grpc-big5",
"default": false,
"schedule": [
{% with default_index_settings={}, index_name="big5" %}
{{ benchmark.collect(parts="grpc/grpc-schedule.json") }}
{% endwith %}
]
},
{
"name": "test",
"default": false,
Expand Down
72 changes: 72 additions & 0 deletions big5/test_procedures/grpc/grpc-schedule.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"operation": "delete-index"
},
{
"operation": {
"operation-type": "create-index",
"settings": {{index_settings | default(default_index_settings | default({})) | tojson}}
}
},
{
"name": "check-cluster-health",
"operation": {
"operation-type": "cluster-health",
"index": "{{ index_name }}",
"request-params": {
"wait_for_status": "{{cluster_health | default('green')}}",
"wait_for_no_relocating_shards": "true"
},
"retry-until-success": true
}
},
{
"operation": "grpc-index-append",
"warmup-time-period": {{ warmup_time_period | default(120) | tojson }},
"clients": {{bulk_indexing_clients | default(8)}},
"ignore-response-error-level": "{{error_level | default('non-fatal')}}"
},
{
"name": "refresh-after-index",
"operation": "refresh"
},
{
"operation": {
"operation-type": "force-merge",
"request-timeout": {{ request_timeout | default(60) | tojson }}{%- if max_num_segments is defined %},
"max-num-segments": {{ max_num_segments | tojson }}
{%- endif %}
}
},
{
"name": "refresh-after-force-merge",
"operation": "refresh"
},
{
"name": "wait-until-merges-finish",
"operation": {
"operation-type": "index-stats",
"index": "_all",
"condition": {
"path": "_all.total.merges.current",
"expected-value": 0
},
"retry-until-success": true,
"include-in-reporting": false
}
},
{
"name": "grpc-match-all",
"operation": "grpc-match-all",
"warmup-iterations": {{ warmup_iterations | default(200) | tojson }},
"iterations": {{ test_iterations | default(100) | tojson }},
"target-throughput": {{ target_throughput | default(2) | tojson }},
"clients": {{ search_clients | default(1) }}
},
{
"name": "grpc-term",
"operation": "grpc-term",
"warmup-iterations": {{ warmup_iterations | default(200) | tojson }},
"iterations": {{ test_iterations | default(100) | tojson }},
"target-throughput": {{ target_throughput | default(2) | tojson }},
"clients": {{ search_clients | default(1) }}
}
4 changes: 4 additions & 0 deletions vectorsearch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ You can define the underlying configuration of the vector search algorithm like
method definition . Check [vector search method definitions]([https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions)
for more details.

### gRPC No Train Test

The No Train Test procedure with search components executed with gRPC/protobuf over the `transport-grpc` plugin. To utilize this procedure ensure the `transport-grpc` plugin is [installed and enabled](https://github.com/opensearch-project/OpenSearch/tree/main/modules/transport-grpc#readme) on your cluster. Specify the this procedure with `--test-procedure="grpc-no-train-test"`, and provide the gRPC transport endpoint with `--grpc-target-hosts=<host:port>`.

### No Train Test Index Only
This procedure is used to index only vector search index which requires no training. This will be useful if
you are interested in benchmarking only indexing operation.
Expand Down
10 changes: 10 additions & 0 deletions vectorsearch/test_procedures/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
{{ benchmark.collect(parts="common/search-only-schedule.json") }}
]
},
{
"name": "grpc-no-train-test",
"description": "no-train-test operations with knn query over gRPC/protobuf.",
"default": false,
"schedule": [
{{ benchmark.collect(parts="common/index-only-schedule.json") }},
{{ benchmark.collect(parts="common/force-merge-schedule.json") }},
{{ benchmark.collect(parts="grpc/search-only-schedule.json") }}
]
},
{
"name": "no-train-test-index-only",
"description": "Perform only indexing operation for vector search",
Expand Down
37 changes: 37 additions & 0 deletions vectorsearch/test_procedures/grpc/search-only-schedule.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"name" : "warmup-indices",
"operation" : "warmup-indices",
"index": "{{ target_index_name | default('target_index') }}"
},
{
"operation": {
"name": "grpc-prod-queries",
"operation-type": "proto-vector-search",
"index": "{{ target_index_name | default('target_index') }}",
"detailed-results": false,
"calculate-recall": false,
"response-compression-enabled": false,
{% if query_k is defined %}
"k": {{ query_k }},
{% endif %}
{% if query_max_distance is defined %}
"max_distance": {{ query_max_distance }},
{% endif %}
{% if query_min_score is defined %}
"min_score": {{ query_min_score }},
{% endif %}
"field" : "{{ target_field_name | default('target_field') }}",
"data_set_format" : "{{ query_data_set_format | default('hdf5') }}",
"data_set_path" : "{{ query_data_set_path }}",
"data_set_corpus" : "{{ query_data_set_corpus }}",
"neighbors_data_set_path" : "{{ neighbors_data_set_path }}",
"neighbors_data_set_corpus" : "{{ neighbors_data_set_corpus }}",
"neighbors_data_set_format" : "{{ neighbors_data_set_format | default('hdf5') }}",
"num_vectors" : {{ query_count | default(-1) }},
"id-field-name": "{{ id_field_name }}",
"body": {{ query_body | default ({}) | tojson }},
"filter_body": {{ filter_body | default ({}) | tojson }},
"filter_type": {{filter_type | default ({}) | tojson }}
},
"clients": {{ search_clients | default(1)}}
}
Loading