From f78c491255a9b7e270b4798907f98acd3ad270cf Mon Sep 17 00:00:00 2001 From: Salvatore Campagna Date: Mon, 30 Sep 2024 10:32:01 +0200 Subject: [PATCH 1/6] feature: include a variable to control synthetic_source_keep parameter Later on we will use this to change the behavior in our nightlies and run benchmarks on both `elastic/logs` and `elastic/security`. --- elastic/logs/README.md | 1 + .../logs/templates/component/track-shared-logsdb-mode.json | 4 +++- elastic/logs/track.json | 1 + elastic/security/README.md | 1 + .../templates/component/track-shared-logsdb-mode.json | 7 ++++++- 5 files changed, 12 insertions(+), 2 deletions(-) diff --git a/elastic/logs/README.md b/elastic/logs/README.md index b7aac8c27..78bffd3d3 100644 --- a/elastic/logs/README.md +++ b/elastic/logs/README.md @@ -220,6 +220,7 @@ The following parameters are available: * `corpora_uri_base` (default: `https://rally-tracks.elastic.co`) - Specify the base location of the datasets used by this track. * `lifecycle` (default: unset to fall back on Serverless detection) - Specifies the lifecycle management feature to use for data streams. Use `ilm` for index lifecycle management or `dlm` for data lifecycle management. By default, `dlm` will be used for benchmarking Serverless Elasticsearch. * `workflow-request-cache` (default: `true`) - Explicit control of request cache query parameter in searches executed in a workflow. This can be further overriden at an operation level with `request-cache` parameter. +* `synthetic_source_keep` (default: unset) - Allows overriding the default synthetic source behaviour for all field types with the following values: `none` (equivalent to unset) - no source is stored, `arrays` - source stored as is only for multi-value (array) fields, `all` - source is stored as is for single-value and multi-value fields. ### Data Download Parameters diff --git a/elastic/logs/templates/component/track-shared-logsdb-mode.json b/elastic/logs/templates/component/track-shared-logsdb-mode.json index a2b08886b..4284e88ca 100644 --- a/elastic/logs/templates/component/track-shared-logsdb-mode.json +++ b/elastic/logs/templates/component/track-shared-logsdb-mode.json @@ -4,9 +4,11 @@ {% if index_mode %} "index": { "mode": {{ index_mode | tojson }}, + {% if p_synthetic_source_keep and p_synthetic_source_keep != 'none' %} "mapping": { - "synthetic_source_keep": "arrays" + "synthetic_source_keep": "{{ p_synthetic_source_keep }}" } + {% endif %} } {% endif %} } diff --git a/elastic/logs/track.json b/elastic/logs/track.json index f50d8d1dd..f32ddfa2e 100644 --- a/elastic/logs/track.json +++ b/elastic/logs/track.json @@ -21,6 +21,7 @@ {% set p_query_time_period = (query_time_period | default(900)) %} {% set p_query_request_params = (query_request_params | default({}))%} {% set p_include_esql_queries = (include_esql_queries | default(build_flavor != "serverless")) %} +{% set p_synthetic_source_keep = (synthetic_source_keep | default)} {% set p_throttle_indexing = (throttle_indexing | default(false)) %} {% set p_max_download_gb = (max_total_download_gb | default(2 * num_corpus)) %} diff --git a/elastic/security/README.md b/elastic/security/README.md index c1e8d3f29..625f0bdaa 100644 --- a/elastic/security/README.md +++ b/elastic/security/README.md @@ -84,6 +84,7 @@ The following parameters are available: * `wait_for_status` (default: `green`) - The track creates Data Streams prior to indexing. All created Data Streams must at least reach this status before indexing commences. Reduce to `yellow` for clusters where green isn't possible e.g. single node. * `corpora_uri_base` (default: `https://rally-tracks.elastic.co`) - Specify the base location of the datasets used by this track. * `index_mode` (default: unset) - A parameter meant to be used internally which defines one of the available indexing modes, "standard", "logsdb" or "time_series". If not set, "standard" is used. +* `synthetic_source_keep` (default: unset) - Allows overriding the default synthetic source behaviour for all field types with the following values: `none` (equivalent to unset) - no source is stored, `arrays` - source stored as is only for multi-value (array) fields, `all` - source is stored as is for single-value and multi-value fields. ### Data Generation Parameters diff --git a/elastic/security/templates/component/track-shared-logsdb-mode.json b/elastic/security/templates/component/track-shared-logsdb-mode.json index 1f6860869..3785327d3 100644 --- a/elastic/security/templates/component/track-shared-logsdb-mode.json +++ b/elastic/security/templates/component/track-shared-logsdb-mode.json @@ -4,9 +4,14 @@ {% if index_mode %} "index": { "mode": {{ index_mode | tojson }}, + {% if p_synthetic_source_keep and p_synthetic_source_keep != 'none' %} + "mapping": { + "synthetic_source_keep": "{{ p_synthetic_source_keep }}" + }, + {% endif %} "sort.field": [ "host.hostname", "@timestamp" ], "sort.order": [ "asc", "desc" ], - "sort.missing": ["_first", "_last"] + "sort.missing": ["_first", "_last"], } {% endif %} } From 07396cab24be512bddb1228071445eddb1ea3d00 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna Date: Mon, 30 Sep 2024 10:36:57 +0200 Subject: [PATCH 2/6] fix: misplaced comma --- .../security/templates/component/track-shared-logsdb-mode.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elastic/security/templates/component/track-shared-logsdb-mode.json b/elastic/security/templates/component/track-shared-logsdb-mode.json index 3785327d3..e29bd11ad 100644 --- a/elastic/security/templates/component/track-shared-logsdb-mode.json +++ b/elastic/security/templates/component/track-shared-logsdb-mode.json @@ -11,7 +11,7 @@ {% endif %} "sort.field": [ "host.hostname", "@timestamp" ], "sort.order": [ "asc", "desc" ], - "sort.missing": ["_first", "_last"], + "sort.missing": ["_first", "_last"] } {% endif %} } From ea404d7d8d1c608d35a8dfa34d17f8768145abc9 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna Date: Mon, 30 Sep 2024 11:21:02 +0200 Subject: [PATCH 3/6] fix: 'all' not allowed --- elastic/logs/README.md | 2 +- elastic/security/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/elastic/logs/README.md b/elastic/logs/README.md index 78bffd3d3..d68b52bf9 100644 --- a/elastic/logs/README.md +++ b/elastic/logs/README.md @@ -220,7 +220,7 @@ The following parameters are available: * `corpora_uri_base` (default: `https://rally-tracks.elastic.co`) - Specify the base location of the datasets used by this track. * `lifecycle` (default: unset to fall back on Serverless detection) - Specifies the lifecycle management feature to use for data streams. Use `ilm` for index lifecycle management or `dlm` for data lifecycle management. By default, `dlm` will be used for benchmarking Serverless Elasticsearch. * `workflow-request-cache` (default: `true`) - Explicit control of request cache query parameter in searches executed in a workflow. This can be further overriden at an operation level with `request-cache` parameter. -* `synthetic_source_keep` (default: unset) - Allows overriding the default synthetic source behaviour for all field types with the following values: `none` (equivalent to unset) - no source is stored, `arrays` - source stored as is only for multi-value (array) fields, `all` - source is stored as is for single-value and multi-value fields. +* `synthetic_source_keep` (default: unset) - Allows overriding the default synthetic source behaviour for all field types with the following values: `none` (equivalent to unset) - no source is stored, `arrays` - source stored as is only for multi-value (array) fields. ### Data Download Parameters diff --git a/elastic/security/README.md b/elastic/security/README.md index 625f0bdaa..635948592 100644 --- a/elastic/security/README.md +++ b/elastic/security/README.md @@ -84,7 +84,7 @@ The following parameters are available: * `wait_for_status` (default: `green`) - The track creates Data Streams prior to indexing. All created Data Streams must at least reach this status before indexing commences. Reduce to `yellow` for clusters where green isn't possible e.g. single node. * `corpora_uri_base` (default: `https://rally-tracks.elastic.co`) - Specify the base location of the datasets used by this track. * `index_mode` (default: unset) - A parameter meant to be used internally which defines one of the available indexing modes, "standard", "logsdb" or "time_series". If not set, "standard" is used. -* `synthetic_source_keep` (default: unset) - Allows overriding the default synthetic source behaviour for all field types with the following values: `none` (equivalent to unset) - no source is stored, `arrays` - source stored as is only for multi-value (array) fields, `all` - source is stored as is for single-value and multi-value fields. +* `synthetic_source_keep` (default: unset) - Allows overriding the default synthetic source behaviour for all field types with the following values: `none` (equivalent to unset) - no source is stored, `arrays` - source stored as is only for multi-value (array) fields. ### Data Generation Parameters From d95e807762f29308bb1664d7b52a1053eaff71ac Mon Sep 17 00:00:00 2001 From: Salvatore Campagna Date: Mon, 30 Sep 2024 11:31:34 +0200 Subject: [PATCH 4/6] fix: p_synthetic_source_keep missing and trailing % --- elastic/logs/track.json | 2 +- elastic/security/track.json | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/elastic/logs/track.json b/elastic/logs/track.json index f32ddfa2e..0daa7a87c 100644 --- a/elastic/logs/track.json +++ b/elastic/logs/track.json @@ -21,7 +21,7 @@ {% set p_query_time_period = (query_time_period | default(900)) %} {% set p_query_request_params = (query_request_params | default({}))%} {% set p_include_esql_queries = (include_esql_queries | default(build_flavor != "serverless")) %} -{% set p_synthetic_source_keep = (synthetic_source_keep | default)} +{% set p_synthetic_source_keep = (synthetic_source_keep | default) %} {% set p_throttle_indexing = (throttle_indexing | default(false)) %} {% set p_max_download_gb = (max_total_download_gb | default(2 * num_corpus)) %} diff --git a/elastic/security/track.json b/elastic/security/track.json index 9aa022cdf..6a199791e 100644 --- a/elastic/security/track.json +++ b/elastic/security/track.json @@ -9,6 +9,7 @@ {% set p_number_of_shards = (number_of_shards | default(1)) %} {% set p_number_of_replicas = (number_of_replicas | default(1)) %} {% set p_skip_delete_component_template = (skip_delete_component_template | default(false) ) %} +{% set p_synthetic_source_keep = (synthetic_source_keep | default) %} {% set p_integration_ratios = (integration_ratios | default({ "auditbeat": { "corpora": { From e284fc6ac8abe422f8b0703d6124ad250e78b494 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna Date: Mon, 30 Sep 2024 13:56:15 +0200 Subject: [PATCH 5/6] fix: no need to have empty default --- elastic/logs/track.json | 2 +- elastic/security/track.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/elastic/logs/track.json b/elastic/logs/track.json index 0daa7a87c..d0584c8f7 100644 --- a/elastic/logs/track.json +++ b/elastic/logs/track.json @@ -21,7 +21,7 @@ {% set p_query_time_period = (query_time_period | default(900)) %} {% set p_query_request_params = (query_request_params | default({}))%} {% set p_include_esql_queries = (include_esql_queries | default(build_flavor != "serverless")) %} -{% set p_synthetic_source_keep = (synthetic_source_keep | default) %} +{% set p_synthetic_source_keep = synthetic_source_keep %} {% set p_throttle_indexing = (throttle_indexing | default(false)) %} {% set p_max_download_gb = (max_total_download_gb | default(2 * num_corpus)) %} diff --git a/elastic/security/track.json b/elastic/security/track.json index 6a199791e..3c711683a 100644 --- a/elastic/security/track.json +++ b/elastic/security/track.json @@ -9,7 +9,7 @@ {% set p_number_of_shards = (number_of_shards | default(1)) %} {% set p_number_of_replicas = (number_of_replicas | default(1)) %} {% set p_skip_delete_component_template = (skip_delete_component_template | default(false) ) %} -{% set p_synthetic_source_keep = (synthetic_source_keep | default) %} +{% set p_synthetic_source_keep = synthetic_source_keep %} {% set p_integration_ratios = (integration_ratios | default({ "auditbeat": { "corpora": { From 7a83af5d3fdf8cd91629ebc24f679a6684a715a0 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna Date: Mon, 30 Sep 2024 14:43:40 +0200 Subject: [PATCH 6/6] fix: remove p_synthetic_source_keep --- .../logs/templates/component/track-shared-logsdb-mode.json | 4 ++-- elastic/logs/track.json | 1 - .../templates/component/track-shared-logsdb-mode.json | 4 ++-- elastic/security/track.json | 1 - 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/elastic/logs/templates/component/track-shared-logsdb-mode.json b/elastic/logs/templates/component/track-shared-logsdb-mode.json index 4284e88ca..dce96a167 100644 --- a/elastic/logs/templates/component/track-shared-logsdb-mode.json +++ b/elastic/logs/templates/component/track-shared-logsdb-mode.json @@ -4,9 +4,9 @@ {% if index_mode %} "index": { "mode": {{ index_mode | tojson }}, - {% if p_synthetic_source_keep and p_synthetic_source_keep != 'none' %} + {% if synthetic_source_keep and synthetic_source_keep != 'none' %} "mapping": { - "synthetic_source_keep": "{{ p_synthetic_source_keep }}" + "synthetic_source_keep": "{{ synthetic_source_keep }}" } {% endif %} } diff --git a/elastic/logs/track.json b/elastic/logs/track.json index d0584c8f7..f50d8d1dd 100644 --- a/elastic/logs/track.json +++ b/elastic/logs/track.json @@ -21,7 +21,6 @@ {% set p_query_time_period = (query_time_period | default(900)) %} {% set p_query_request_params = (query_request_params | default({}))%} {% set p_include_esql_queries = (include_esql_queries | default(build_flavor != "serverless")) %} -{% set p_synthetic_source_keep = synthetic_source_keep %} {% set p_throttle_indexing = (throttle_indexing | default(false)) %} {% set p_max_download_gb = (max_total_download_gb | default(2 * num_corpus)) %} diff --git a/elastic/security/templates/component/track-shared-logsdb-mode.json b/elastic/security/templates/component/track-shared-logsdb-mode.json index e29bd11ad..adc476be7 100644 --- a/elastic/security/templates/component/track-shared-logsdb-mode.json +++ b/elastic/security/templates/component/track-shared-logsdb-mode.json @@ -4,9 +4,9 @@ {% if index_mode %} "index": { "mode": {{ index_mode | tojson }}, - {% if p_synthetic_source_keep and p_synthetic_source_keep != 'none' %} + {% if synthetic_source_keep and synthetic_source_keep != 'none' %} "mapping": { - "synthetic_source_keep": "{{ p_synthetic_source_keep }}" + "synthetic_source_keep": "{{ synthetic_source_keep }}" }, {% endif %} "sort.field": [ "host.hostname", "@timestamp" ], diff --git a/elastic/security/track.json b/elastic/security/track.json index 3c711683a..9aa022cdf 100644 --- a/elastic/security/track.json +++ b/elastic/security/track.json @@ -9,7 +9,6 @@ {% set p_number_of_shards = (number_of_shards | default(1)) %} {% set p_number_of_replicas = (number_of_replicas | default(1)) %} {% set p_skip_delete_component_template = (skip_delete_component_template | default(false) ) %} -{% set p_synthetic_source_keep = synthetic_source_keep %} {% set p_integration_ratios = (integration_ratios | default({ "auditbeat": { "corpora": {