Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 66 additions & 21 deletions big5/operations/ppl.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`process.name`, 'kernel') | sort + `@timestamp` | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}process.name=kernel{% else %}match(`process.name`, 'kernel'){% endif %} | sort + `@timestamp` | head 10"
}
},
{
Expand All @@ -40,7 +40,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`process.name`, 'kernel') | sort + `@timestamp` | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}process.name=kernel{% else %}match(`process.name`, 'kernel'){% endif %} | sort + `@timestamp` | head 10"
}
},
{
Expand Down Expand Up @@ -76,7 +76,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream` | sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` | head 10"
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}bucket_nullable = false {% endif %}count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream` | sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` | head 10"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need a space between } and c?

{% endif %}count() 

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the current format is correct and functional.

}
},
{
Expand All @@ -85,7 +85,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats count() by `process.name`, `cloud.region` | sort - `process.name`, + `cloud.region` | head 10"
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}bucket_nullable = false {% endif %}count() by `process.name`, `cloud.region` | sort - `process.name`, + `cloud.region` | head 10"
}
},
{
Expand All @@ -112,7 +112,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`process.name`, 'kernel') | sort - `@timestamp` | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}process.name=kernel{% else %}match(`process.name`, 'kernel'){% endif %} | sort - `@timestamp` | head 10"
}
},
{
Expand All @@ -121,7 +121,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`process.name`, 'kernel') | sort - `@timestamp` | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}process.name=kernel{% else %}match(`process.name`, 'kernel'){% endif %} | sort - `@timestamp` | head 10"
}
},
{
Expand All @@ -148,7 +148,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`process.name`, 'kernel') | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}process.name=kernel{% else %}match(`process.name`, 'kernel'){% endif %} | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | head 10"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand this correctly, if the version is equal or greater than 3.3.0 then use process.name, else use "`process.name`" ? Would this cause existing customer queries to break when they upgrade from lower versions to 3.3.0 and above or is it backward compatible?

Copy link
Contributor Author

@noCharger noCharger Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand this correctly, if the version is equal or greater than 3.3.0 then use process.name, else use "process.name" ? Would this cause existing customer queries to break when they upgrade from lower versions to 3.3.0 and above or is it backward compatible?

opensearch-project/sql#4152 corrected usage of match function

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK we are not supposed to have breaking changes in minor version updates.
@penghuo @peterzhuamazon @getsaurabh02

Copy link
Member

@peterzhuamazon peterzhuamazon Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes breaking changes is only in major release, especially API.
I think it is ok to make an API deprecated but still support until next major, but not ok to completely alter its functionalities to the point the original behavior is removed.

cc: @getsaurabh02 into the conversation here, and whether we should restore the original behavior, possibly in a patch or in 3.4.0.

Thanks.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@peterzhuamazon @noCharger
I don’t consider this a breaking change. The reason is that using the match function within the search command is not by design. In PPL 3.2, the documentation only describes using match within the where clause.

}
},
{
Expand All @@ -157,7 +157,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | stats count() as country by `aws.cloudwatch.log_stream` | sort - country | head 50"
"query": "source = {{index_name | default('big5')}} | stats {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}bucket_nullable = false {% endif %}count() as country by `aws.cloudwatch.log_stream` | sort - country | head 50"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a new param introduced in 3.3.0?

Copy link
Contributor Author

@noCharger noCharger Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, as mentioned in description section

}
},
{
Expand All @@ -166,7 +166,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | stats count() as station by `aws.cloudwatch.log_stream` | sort - station | head 500"
"query": "source = {{index_name | default('big5')}} | stats {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}bucket_nullable = false {% endif %}count() as station by `aws.cloudwatch.log_stream` | sort - station | head 500"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as comment above about spacing.

}
},
{
Expand All @@ -175,7 +175,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-05 00:00:00' and `@timestamp` < '2023-01-05 05:00:00' | stats count() by `process.name`, `cloud.region` | sort - `count()`"
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-05 00:00:00' and `@timestamp` < '2023-01-05 05:00:00' | stats {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}bucket_nullable = false {% endif %}count() by `process.name`, `cloud.region` | sort - `count()` | head 10"
}
},
{
Expand All @@ -184,7 +184,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} query_string(['message'], 'monkey jackal bear') | where `@timestamp` >= '2023-01-03 00:00:00' and `@timestamp` < '2023-01-03 10:00:00' | sort + `@timestamp` | head 10"
"query": "source = {{index_name | default('big5')}} | where query_string(['message'], 'monkey jackal bear') and `@timestamp` >= '2023-01-03 00:00:00' and `@timestamp` < '2023-01-03 10:00:00' | sort + `@timestamp` | head 10"
}
},
{
Expand All @@ -193,7 +193,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} query_string(['message'], 'monkey jackal bear') | where `@timestamp` >= '2023-01-03 00:00:00' and `@timestamp` < '2023-01-03 10:00:00' | head 10"
"query": "source = {{index_name | default('big5')}} | where query_string(['message'], 'monkey jackal bear') and `@timestamp` >= '2023-01-03 00:00:00' and `@timestamp` < '2023-01-03 10:00:00' | head 10"
}
},
{
Expand All @@ -202,7 +202,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} query_string(['message'], 'monkey jackal bear') | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}| where query_string(['message'], 'monkey jackal bear'){% else %}query_string(['message'], 'monkey jackal bear'){% endif %} | head 10"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wold queries still work in 3.3.0 without the where clause?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, it's also introduced from opensearch-project/sql#4152

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It’s not a breaking change — we never intended to support complex WHERE clauses in the search command. As documented here and here only simple filters are supported.

The earlier support for complex conditions was unintentional, resulting from the use of logicalExpression in the search command. This has been corrected in the current release, giving us a clearer distinction and roadmap for both the commands.

@noCharger Could you please reword the description? This is essentially a rectification of the benchmark workload queries.

}
},
{
Expand All @@ -211,7 +211,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | eval range_bucket = case(`metrics.size` < -10, 'range_1', `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', `metrics.size` >= 2000, 'range_6') | stats min(`metrics.tmin`) as tmin, avg(`metrics.size`) as tavg, max(`metrics.size`) as tmax by range_bucket, span(`@timestamp`, 1h) as auto_span | sort + range_bucket, + auto_span"
"query": "source = {{index_name | default('big5')}} | eval range_bucket = case(`metrics.size` < -10, 'range_1', `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', `metrics.size` >= 2000, 'range_6') | {% if distribution_version.split('.') | map('int') | list >= '3.4.0'.split('.') | map('int') | list %}bin `@timestamp` bins=10 | stats min(`metrics.tmin`) as tmin, avg(`metrics.size`) as tavg, max(`metrics.size`) as tmax by range_bucket, `@timestamp`{% else %}stats min(`metrics.tmin`) as tmin, avg(`metrics.size`) as tavg, max(`metrics.size`) as tmax by range_bucket, span(`@timestamp`, 1h) as auto_span | sort + range_bucket, + auto_span{% endif %}"
}
},
{
Expand All @@ -220,7 +220,52 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | eval range_bucket = case(`metrics.size` < -10, 'range_1', `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', `metrics.size` >= 2000, 'range_6') | stats count() by range_bucket, span(`@timestamp`, 1h) as auto_span | sort + range_bucket, + auto_span"
"query": "source = {{index_name | default('big5')}} | eval range_bucket = case(`metrics.size` < -10, 'range_1', `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', `metrics.size` >= 2000, 'range_6') | {% if distribution_version.split('.') | map('int') | list >= '3.4.0'.split('.') | map('int') | list %}bin `@timestamp` bins=20 | stats count() by range_bucket, `@timestamp`{% else %}stats count() by range_bucket, span(`@timestamp`, 1h) as auto_span | sort + range_bucket, + auto_span{% endif %}"
}
},
{
"name": "ppl-cardinality-agg-high",
"operation-type": "raw-request",
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | stats {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}bucket_nullable = false {% endif %}dc(`agent.name`)"
}
},
{
"name": "ppl-cardinality-agg-high-2",
"operation-type": "raw-request",
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | stats {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}bucket_nullable = false {% endif %}dc(`event.id`)"
}
},
{
"name": "ppl-cardinality-agg-low",
"operation-type": "raw-request",
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | stats {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}bucket_nullable = false {% endif %}dc(`cloud.region`)"
}
},
{
"name": "ppl-range-agg-1",
"operation-type": "raw-request",
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | eval range_bucket = case(`metrics.size` < -10, 'range_1', `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', `metrics.size` >= 2000, 'range_6') | stats count() by range_bucket"
}
},
{
"name": "ppl-range-agg-2",
"operation-type": "raw-request",
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | eval range_bucket = case(`metrics.size` < 100, 'range_1', `metrics.size` >= 100 and `metrics.size` < 1000, 'range_2', `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_3', `metrics.size` >= 2000, 'range_4') | stats count() by range_bucket"
}
},
{
Expand Down Expand Up @@ -301,7 +346,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`process.name`, 'kernel') | sort + `@timestamp` | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}process.name=kernel{% else %}match(`process.name`, 'kernel'){% endif %} | sort + `meta.file` | head 10"
}
},
{
Expand All @@ -310,7 +355,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`process.name`, 'kernel') | sort + `@timestamp` | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}process.name=kernel{% else %}match(`process.name`, 'kernel'){% endif %} | sort + `meta.file` | head 10"
}
},
{
Expand All @@ -319,7 +364,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`log.file.path`, '/var/log/messages/solarshark') | sort + `metrics.size` | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}log.file.path=\"/var/log/messages/solarshark\"{% else %}match(`log.file.path`, '/var/log/messages/solarshark'){% endif %} | sort + `metrics.size` | head 10"
}
},
{
Expand All @@ -337,7 +382,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} match(`log.file.path`, '/var/log/messages/solarshark') | sort - `metrics.size` | head 10"
"query": "source = {{index_name | default('big5')}} {% if distribution_version.split('.') | map('int') | list >= '3.3.0'.split('.') | map('int') | list %}log.file.path=\"/var/log/messages/solarshark\"{% else %}match(`log.file.path`, '/var/log/messages/solarshark'){% endif %} | sort - `metrics.size` | head 10"
}
},
{
Expand All @@ -355,7 +400,7 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | stats count() by `aws.cloudwatch.log_stream` | head 10"
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | stats count() by `aws.cloudwatch.log_stream`, `process.name` | head 10"
}
},
{
Expand All @@ -364,6 +409,6 @@
"path": "/_plugins/_ppl",
"method": "POST",
"body": {
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | stats count() by `process.name` | head 10"
"query": "source = {{index_name | default('big5')}} | where `@timestamp` >= '2023-01-01 00:00:00' and `@timestamp` < '2023-01-03 00:00:00' | stats count() by `process.name`, `aws.cloudwatch.log_stream` | head 10"
}
}
22 changes: 22 additions & 0 deletions big5/queries/ppl/cardinality_agg_high.ppl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
{
"name": "cardinality-agg-high",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"body": {
"size": 0,
"aggs": {
"agent": {
"cardinality": {
"field": "agent.name"
{% if distribution_version.split('.') | map('int') | list >= "2.19.1".split('.') | map('int') | list and distribution_version.split('.') | map('int') | list < "6.0.0".split('.') | map('int') | list %}
, "execution_hint": "ordinals"
{% endif %}
}
}
}
}
}
*/
source = big5
| stats dc(`agent.name`)
21 changes: 21 additions & 0 deletions big5/queries/ppl/cardinality_agg_high_2.ppl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
{
"name": "cardinality-agg-high-2",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"request-timeout": 1800,
"body": {
"size": 0,
"aggs": {
"agent": {
"cardinality": {
"field": "event.id",
"execution_hint":"ordinals"
}
}
}
}
}
*/
source = big5
| stats dc(`event.id`)
19 changes: 19 additions & 0 deletions big5/queries/ppl/cardinality_agg_low.ppl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
{
"name": "cardinality-agg-low",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"body": {
"size": 0,
"aggs": {
"region": {
"cardinality": {
"field": "cloud.region"
}
}
}
}
}
*/
source = big5
| stats dc(`cloud.region`)
50 changes: 50 additions & 0 deletions big5/queries/ppl/range_agg_1.ppl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
{
"name": "range-agg-1",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"body": {
"size": 0,
"aggs": {
"tmax": {
"range": {
"field": "metrics.size",
"ranges": [
{
"to": -10
},
{
"from": -10,
"to": 10
},
{
"from": 10,
"to": 100
},
{
"from": 100,
"to": 1000
},
{
"from": 1000,
"to": 2000
},
{
"from": 2000
}
]
}
}
}
}
}
*/
source = big5
| eval range_bucket = case(
`metrics.size` < -10, 'range_1',
`metrics.size` >= -10 and `metrics.size` < 10, 'range_2',
`metrics.size` >= 10 and `metrics.size` < 100, 'range_3',
`metrics.size` >= 100 and `metrics.size` < 1000, 'range_4',
`metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5',
`metrics.size` >= 2000, 'range_6')
| stats count() by range_bucket
40 changes: 40 additions & 0 deletions big5/queries/ppl/range_agg_2.ppl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
{
"name": "range-agg-2",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"body": {
"size": 0,
"aggs": {
"tmax": {
"range": {
"field": "metrics.size",
"ranges": [
{
"to": 100
},
{
"from": 100,
"to": 1000
},
{
"from": 1000,
"to": 2000
},
{
"from": 2000
}
]
}
}
}
}
}
*/
source = big5
| eval range_bucket = case(
`metrics.size` < 100, 'range_1',
`metrics.size` >= 100 and `metrics.size` < 1000, 'range_2',
`metrics.size` >= 1000 and `metrics.size` < 2000, 'range_3',
`metrics.size` >= 2000, 'range_4')
| stats count() by range_bucket
Loading
Loading