Skip to content

Commit 477d287

Browse files
authored
Fix filter by filter execution with doc_count (#68930)
This fixed "filter by filter" execution order so it doesn't ignore `doc_count`. The "filter by filter" execution is fairly performance sensitive but when I reran performance numbers everything looked fine.
1 parent 3fc0686 commit 477d287

File tree

4 files changed

+84
-4
lines changed

4 files changed

+84
-4
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,34 @@ setup:
147147
- match: { aggregations.composite_agg.buckets.2.key.num_terms: 500 }
148148
- match: { aggregations.composite_agg.buckets.2.doc_count: 11 }
149149

150+
---
151+
"Test filters agg with doc_count":
152+
- skip:
153+
version: " - 7.99.99"
154+
reason: "fixed in 8.0.0 to be backported to 7.11.2"
155+
- do:
156+
search:
157+
body:
158+
profile: true
159+
size: 0
160+
aggs:
161+
f:
162+
filters:
163+
filters:
164+
abc:
165+
match:
166+
str: abc
167+
foo:
168+
match:
169+
str: foo
170+
xyz:
171+
match:
172+
str: xyz
173+
174+
- match: { hits.total.value: 5 }
175+
- length: { aggregations.f.buckets: 3 }
176+
- match: { aggregations.f.buckets.abc.doc_count: 11 }
177+
- match: { aggregations.f.buckets.foo.doc_count: 8 }
178+
- match: { aggregations.f.buckets.xyz.doc_count: 5 }
179+
- match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter }
180+
- gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count: 1 }

server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ public void testFilterByFilter() throws InterruptedException, IOException {
633633
assertThat(delegate.get("delegate"), equalTo("FiltersAggregator.FilterByFilter"));
634634
Map<?, ?> delegateDebug = (Map<?, ?>) delegate.get("delegate_debug");
635635
assertThat(delegateDebug, hasEntry("segments_with_deleted_docs", 0));
636+
assertThat(delegateDebug, hasEntry("segments_with_doc_count", 0));
636637
assertThat(delegateDebug, hasEntry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
637638
assertThat(delegateDebug, hasEntry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
638639
assertThat((long) delegateDebug.get("estimate_cost_time"), greaterThanOrEqualTo(0L)); // ~1,276,734 nanos is normal

server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,13 @@ public int getDocCount(int doc) throws IOException {
4343
public void setLeafReaderContext(LeafReaderContext ctx) throws IOException {
4444
docCountPostings = ctx.reader().postings(new Term(DocCountFieldMapper.NAME, DocCountFieldMapper.NAME));
4545
}
46+
47+
public boolean alwaysOne() {
48+
return docCountPostings == null;
49+
}
50+
51+
@Override
52+
public String toString() {
53+
return "doc counts are " + (alwaysOne() ? "always one" : "based on " + docCountPostings);
54+
}
4655
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515
import org.apache.lucene.search.CollectionTerminatedException;
1616
import org.apache.lucene.search.IndexOrDocValuesQuery;
1717
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
18+
import org.apache.lucene.search.LeafCollector;
1819
import org.apache.lucene.search.MatchAllDocsQuery;
1920
import org.apache.lucene.search.PointRangeQuery;
2021
import org.apache.lucene.search.Query;
22+
import org.apache.lucene.search.Scorable;
2123
import org.apache.lucene.search.ScoreMode;
22-
import org.apache.lucene.search.TotalHitCountCollector;
2324
import org.apache.lucene.search.Weight;
2425
import org.apache.lucene.util.Bits;
2526
import org.elasticsearch.common.ParseField;
@@ -38,6 +39,7 @@
3839
import org.elasticsearch.search.aggregations.LeafBucketCollector;
3940
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
4041
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
42+
import org.elasticsearch.search.aggregations.bucket.DocCountProvider;
4143
import org.elasticsearch.search.aggregations.support.AggregationContext;
4244

4345
import java.io.IOException;
@@ -275,6 +277,11 @@ public static class FilterByFilter extends FiltersAggregator {
275277
*/
276278
private BulkScorer[][] scorers;
277279
private int segmentsWithDeletedDocs;
280+
/**
281+
* Count of segments with documents have consult the {@code doc_count}
282+
* field.
283+
*/
284+
private int segmentsWithDocCount;
278285

279286
private FilterByFilter(
280287
String name,
@@ -354,6 +361,10 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket
354361
weights = buildWeights(topLevelQuery(), filters);
355362
}
356363
Bits live = ctx.reader().getLiveDocs();
364+
Counter counter = new Counter(docCountProvider);
365+
if (false == docCountProvider.alwaysOne()) {
366+
segmentsWithDocCount++;
367+
}
357368
for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) {
358369
BulkScorer scorer;
359370
if (scorers == null) {
@@ -367,9 +378,8 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket
367378
// the filter doesn't match any docs
368379
continue;
369380
}
370-
TotalHitCountCollector collector = new TotalHitCountCollector();
371-
scorer.score(collector, live);
372-
incrementBucketDocCount(filterOrd, collector.getTotalHits());
381+
scorer.score(counter, live);
382+
incrementBucketDocCount(filterOrd, counter.readAndReset(ctx));
373383
}
374384
// Throwing this exception is how we communicate to the collection mechanism that we don't need the segment.
375385
throw new CollectionTerminatedException();
@@ -379,13 +389,42 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket
379389
public void collectDebugInfo(BiConsumer<String, Object> add) {
380390
super.collectDebugInfo(add);
381391
add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs);
392+
add.accept("segments_with_doc_count", segmentsWithDocCount);
382393
if (estimatedCost != -1) {
383394
// -1 means we didn't estimate it.
384395
add.accept("estimated_cost", estimatedCost);
385396
add.accept("max_cost", maxCost);
386397
add.accept("estimate_cost_time", estimateCostTime);
387398
}
388399
}
400+
401+
/**
402+
* Counts collected documents, delegating to {@link DocCountProvider} for
403+
* how many documents each search hit is "worth".
404+
*/
405+
private static class Counter implements LeafCollector {
406+
private final DocCountProvider docCount;
407+
private long count;
408+
409+
Counter(DocCountProvider docCount) {
410+
this.docCount = docCount;
411+
}
412+
413+
public long readAndReset(LeafReaderContext ctx) throws IOException {
414+
long result = count;
415+
count = 0;
416+
docCount.setLeafReaderContext(ctx);
417+
return result;
418+
}
419+
420+
@Override
421+
public void collect(int doc) throws IOException {
422+
count += docCount.getDocCount(doc);
423+
}
424+
425+
@Override
426+
public void setScorer(Scorable scorer) throws IOException {}
427+
}
389428
}
390429

391430
/**

0 commit comments

Comments
 (0)