Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions nyc_taxis/challenges/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,10 @@
"operation": "delete-index",
"tags": ["setup"]
},
{
"operation": "delete-nyc-taxis-sample-index",
"tags": ["setup"]
},
{
"operation": {
"operation-type": "create-index",
Expand Down Expand Up @@ -926,6 +930,15 @@
"operation": "refresh",
"tags": ["setup"]
},
{
"operation": "create-nyc-taxis-sample-index",
"tags": ["setup"]
},
{
"name": "refresh-after-sample-index",
"operation": "refresh",
"tags": ["setup"]
},
{
"operation": "avg_passenger_count_aggregation",
"clients": 1,
Expand Down
44 changes: 33 additions & 11 deletions nyc_taxis/operations/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,28 @@
}
}
},
{
"name": "create-nyc-taxis-sample-index",
"operation-type": "raw-request",
"index": "nyc_taxis",
"method": "POST",
"path": "/_reindex",
"body": {
"source": {
"index": "nyc_taxis"
},
"max_docs": 1000,
"dest": {
"index": "nyc_taxis_sample"
}
}
},
{
"name": "delete-nyc-taxis-sample-index",
"operation-type": "delete-index",
"index": "nyc_taxis_sample",
"only-if-exists": true
},
{
"name": "avg_passenger_count_esql_segment_partitioning",
"operation-type": "esql",
Expand Down Expand Up @@ -1019,32 +1041,32 @@
{
"name": "stats_count_group_by_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis METADATA _id | stats count(passenger_count) by _id | LIMIT 1000"
"query" : "FROM nyc_taxis_sample METADATA _id | stats count(passenger_count) by _id"
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note the LIMIT 1000 was moved after the queries in this other pr: #873

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having the limit before or after the stats completely changes the behaviour of the query. The other PR moved it after, and this one effectively moves it before again. I'm assuming we will see performance differences with both changes?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having said that, I understand the purpose of these queries was to limit before, so the change in this current PR is moving back towards that original purpose.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The queries haven't even run after we changed the limit to be after, right? So we should be able to revert to the original purpose of these benchmarks that was to evaluate the inline stats more than the fetching part

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point!

},
{
"name": "inlinestats_count_group_by_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis METADATA _id | inline stats count(passenger_count) by _id | LIMIT 1000"
"query" : "FROM nyc_taxis_sample METADATA _id | inline stats count(passenger_count) by _id"
},
{
"name": "stats_avg_group_by_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis METADATA _id | stats avg(passenger_count) by _id | LIMIT 1000"
"query" : "FROM nyc_taxis_sample METADATA _id | stats avg(passenger_count) by _id"
},
{
"name": "inlinestats_avg_group_by_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis METADATA _id | inline stats avg(passenger_count) by _id | LIMIT 1000"
"query" : "FROM nyc_taxis_sample METADATA _id | inline stats avg(passenger_count) by _id"
},
{
"name": "stats_max_group_by_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis METADATA _id | stats max(passenger_count) by _id | LIMIT 1000"
"query" : "FROM nyc_taxis_sample METADATA _id | stats max(passenger_count) by _id"
},
{
"name": "inlinestats_max_group_by_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis METADATA _id | inline stats max(passenger_count) by _id | LIMIT 1000"
"query" : "FROM nyc_taxis_sample METADATA _id | inline stats max(passenger_count) by _id"
},
{
"name": "inlinestats_then_stats_count_esql",
Expand Down Expand Up @@ -1079,27 +1101,27 @@
{
"name": "one_chained_inlinestats_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis | inline stats s1 = sum(passenger_count) | LIMIT 1000"
"query" : "FROM nyc_taxis_sample | inline stats s1 = sum(passenger_count)"
},
{
"name": "two_chained_inlinestats_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis | inline stats s1 = sum(passenger_count) | inline stats s2 = sum(trip_distance) | LIMIT 1000"
"query" : "FROM nyc_taxis_sample | inline stats s1 = sum(passenger_count) | inline stats s2 = sum(trip_distance)"
},
{
"name": "three_chained_inlinestats_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis | inline stats s1 = sum(passenger_count) | inline stats s2 = sum(trip_distance) | inline stats s3 = sum(total_amount) | LIMIT 1000"
"query" : "FROM nyc_taxis_sample | inline stats s1 = sum(passenger_count) | inline stats s2 = sum(trip_distance) | inline stats s3 = sum(total_amount)"
},
{
"name": "multiple_stats_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis | stats sum = sum(passenger_count), count = count(*), avg = avg(passenger_count) | LIMIT 1000"
"query" : "FROM nyc_taxis_sample | stats sum = sum(passenger_count), count = count(*), avg = avg(passenger_count)"
},
{
"name": "multiple_inlinestats_esql",
"operation-type": "esql",
"query" : "FROM nyc_taxis | inline stats sum = sum(passenger_count), count = count(*), avg = avg(passenger_count) | LIMIT 1000"
"query" : "FROM nyc_taxis_sample | inline stats sum = sum(passenger_count), count = count(*), avg = avg(passenger_count)"
},
{# non-serverless-doc-partitioning-marker-start #}{%- if build_flavor != "serverless" -%}
{
Expand Down
Loading