Skip to content

Commit 2d5982b

Browse files
authored
Disable optimization if we aren't sure its faster (backport of #74260) (#74563)
This disables the filter-by-filter aggregation optimization used by `terms`, `range`, `date_histogram`, and `date_range` aggregations unless we're *sure* that its faster than the "native" implementation. Mostly this is when the top level query is empty or we can merge it into the filter generated by the agg rewrite process. Now that we have hard and fast rules we can drop the cost estimation framework without too much fear. So we remove it in this change. It stomps a bunch of complexity. Sadly, without the cost estimation stuff we have to add a separate mechanism for blocking the optimization against runtime fields for which it'd be kind of garbage. For that I added another rule preventing the filter-by-filter aggregation from running against the queries made by runtime fields. Its not fool-proof, but we have control over what queries we pass as a filter so its not wide open. I spent a lot of time working on an alternative to this that preserved that fancy filter-by-filter collection mechanism and was much more kind to the query cache. It detected cases where going full filter-by-filter was bad and grouped those filters together to collect in one pass with a funny ORing collector. It *worked*. And, if we were super concerned with the performance of the `filters` aggregation it'd be the way to go. But it was very complex and it was actually slower than using the native aggregation for things like `terms` and `date_histogram`. It was glorious. But it was wrong for us. Too complex and optimized the wrong things. So here we are. Hopefully this is a fairly simple solution to a sneaky problem.
1 parent 3dc0911 commit 2d5982b

File tree

22 files changed

+1062
-784
lines changed

22 files changed

+1062
-784
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/20_terms.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -822,8 +822,8 @@ setup:
822822
---
823823
"string profiler via global ordinals filters implementation":
824824
- skip:
825-
version: " - 7.12.99"
826-
reason: filters implementation first supported with sub-aggregators in 7.13.0
825+
version: " - 7.13.99"
826+
reason: profile info changed in 7.14.0
827827
- do:
828828
indices.create:
829829
index: test_3
@@ -879,7 +879,7 @@ setup:
879879
- match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters }
880880
- match: { profile.shards.0.aggregations.0.description: str_terms }
881881
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 }
882-
- match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter }
882+
- match: { profile.shards.0.aggregations.0.debug.delegate: FilterByFilterAggregator }
883883
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: "str:cow" }
884884
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: "str:pig" }
885885
- match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: "str:sheep" }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,10 @@ setup:
150150
---
151151
"Test filters agg with doc_count":
152152
- skip:
153-
version: " - 7.12.99"
153+
version: " - 7.13.99"
154+
reason: profile info changed in 7.14.0
154155
features: default_shards
155-
reason: "name changed in 7.13.0"
156+
156157
- do:
157158
search:
158159
body:
@@ -177,7 +178,7 @@ setup:
177178
- match: { aggregations.f.buckets.abc.doc_count: 11 }
178179
- match: { aggregations.f.buckets.foo.doc_count: 8 }
179180
- match: { aggregations.f.buckets.xyz.doc_count: 5 }
180-
- match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter }
181+
- match: { profile.shards.0.aggregations.0.type: FilterByFilterAggregator }
181182
# We can't assert that segments_with_doc_count_field is > 0 because we might
182183
# end up with two shards and all of the documents with the _doc_count field
183184
# may be on one field. We have a test for this in AggregationProfilerIT

server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
4848
import static org.hamcrest.Matchers.equalTo;
4949
import static org.hamcrest.Matchers.greaterThan;
50-
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
5150
import static org.hamcrest.Matchers.notNullValue;
5251

5352
@ESIntegTestCase.SuiteScopeTestCase
@@ -646,21 +645,17 @@ public void testFilterByFilter() throws InterruptedException, IOException {
646645
"delegate_debug",
647646
matchesMap().entry("average_docs_per_range", equalTo(RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2))
648647
.entry("ranges", 1)
649-
.entry("delegate", "FiltersAggregator.FilterByFilter")
648+
.entry("delegate", "FilterByFilterAggregator")
650649
.entry(
651650
"delegate_debug",
652651
matchesMap().entry("segments_with_deleted_docs", 0)
653652
.entry("segments_with_doc_count_field", 0)
654-
.entry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)
655-
.entry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)
656-
.entry("estimate_cost_time", greaterThanOrEqualTo(0L)) // ~1,276,734 nanos is normal
657653
.entry("segments_counted", 0)
658654
.entry("segments_collected", greaterThan(0))
659655
.entry(
660656
"filters",
661657
matchesList().item(
662-
matchesMap().entry("scorers_prepared_while_estimating_cost", greaterThan(0))
663-
.entry("query", "DocValuesFieldExistsQuery [field=date]")
658+
matchesMap().entry("query", "DocValuesFieldExistsQuery [field=date]")
664659
.entry("specialized_for", "docvalues_field_exists")
665660
.entry("results_from_metadata", 0)
666661
)

server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public KeywordScriptFieldType(String name) {
5353
this(name, StringFieldScript.PARSE_FROM_SOURCE, null, Collections.emptyMap(), (builder, params) -> builder);
5454
}
5555

56-
KeywordScriptFieldType(
56+
public KeywordScriptFieldType(
5757
String name,
5858
StringFieldScript.Factory scriptFactory,
5959
Script script,

server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public LongScriptFieldType(String name) {
4646
this(name, LongFieldScript.PARSE_FROM_SOURCE, null, Collections.emptyMap(), (builder, params) -> builder);
4747
}
4848

49-
LongScriptFieldType(
49+
public LongScriptFieldType(
5050
String name,
5151
LongFieldScript.Factory scriptFactory,
5252
Script script,

server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public abstract class AdaptingAggregator extends Aggregator {
3030
public AdaptingAggregator(
3131
Aggregator parent,
3232
AggregatorFactories subAggregators,
33-
CheckedFunction<AggregatorFactories, Aggregator, IOException> delegate
33+
CheckedFunction<AggregatorFactories, ? extends Aggregator, IOException> delegate
3434
) throws IOException {
3535
// Its important we set parent first or else when we build the sub-aggregators they can fail because they'll call this.parent.
3636
this.parent = parent;

server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/DocValuesFieldExistsAdapter.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import org.apache.lucene.search.DocValuesFieldExistsQuery;
1515
import org.apache.lucene.search.IndexSearcher;
1616
import org.apache.lucene.util.Bits;
17-
import org.elasticsearch.common.CheckedSupplier;
1817

1918
import java.io.IOException;
2019
import java.util.function.BiConsumer;
@@ -43,14 +42,6 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live)
4342
return super.count(ctx, counter, live);
4443
}
4544

46-
@Override
47-
long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
48-
if (canUseMetadata.get() && canCountFromMetadata(ctx)) {
49-
return 0;
50-
}
51-
return super.estimateCountCost(ctx, canUseMetadata);
52-
}
53-
5445
private boolean canCountFromMetadata(LeafReaderContext ctx) throws IOException {
5546
FieldInfo info = ctx.reader().getFieldInfos().fieldInfo(query().getField());
5647
if (info == null) {

0 commit comments

Comments
 (0)