diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml index b7374941bb024..fe35f7ea8c528 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml @@ -147,3 +147,34 @@ setup: - match: { aggregations.composite_agg.buckets.2.key.num_terms: 500 } - match: { aggregations.composite_agg.buckets.2.doc_count: 11 } +--- +"Test filters agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "fixed in 8.0.0 to be backported to 7.11.2" + - do: + search: + body: + profile: true + size: 0 + aggs: + f: + filters: + filters: + abc: + match: + str: abc + foo: + match: + str: foo + xyz: + match: + str: xyz + + - match: { hits.total.value: 5 } + - length: { aggregations.f.buckets: 3 } + - match: { aggregations.f.buckets.abc.doc_count: 11 } + - match: { aggregations.f.buckets.foo.doc_count: 8 } + - match: { aggregations.f.buckets.xyz.doc_count: 5 } + - match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter } + - gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count: 1 } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java index ade535afb762e..ffb5beba8dcce 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java @@ -633,6 +633,7 @@ public void testFilterByFilter() throws InterruptedException, IOException { assertThat(delegate.get("delegate"), equalTo("FiltersAggregator.FilterByFilter")); Map delegateDebug = (Map) delegate.get("delegate_debug"); assertThat(delegateDebug, hasEntry("segments_with_deleted_docs", 0)); + assertThat(delegateDebug, hasEntry("segments_with_doc_count", 0)); assertThat(delegateDebug, hasEntry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)); assertThat(delegateDebug, hasEntry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)); assertThat((long) delegateDebug.get("estimate_cost_time"), greaterThanOrEqualTo(0L)); // ~1,276,734 nanos is normal diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java index 9585ea16bd746..8c7ee72e7cb5a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -43,4 +43,13 @@ public int getDocCount(int doc) throws IOException { public void setLeafReaderContext(LeafReaderContext ctx) throws IOException { docCountPostings = ctx.reader().postings(new Term(DocCountFieldMapper.NAME, DocCountFieldMapper.NAME)); } + + public boolean alwaysOne() { + return docCountPostings == null; + } + + @Override + public String toString() { + return "doc counts are " + (alwaysOne() ? "always one" : "based on " + docCountPostings); + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 368fc7a9656c9..ee7ca1ada9f2f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -15,11 +15,12 @@ import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.ParseField; @@ -38,6 +39,7 @@ import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.DocCountProvider; import org.elasticsearch.search.aggregations.support.AggregationContext; import java.io.IOException; @@ -275,6 +277,11 @@ public static class FilterByFilter extends FiltersAggregator { */ private BulkScorer[][] scorers; private int segmentsWithDeletedDocs; + /** + * Count of segments with documents have consult the {@code doc_count} + * field. + */ + private int segmentsWithDocCount; private FilterByFilter( String name, @@ -354,6 +361,10 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket weights = buildWeights(topLevelQuery(), filters); } Bits live = ctx.reader().getLiveDocs(); + Counter counter = new Counter(docCountProvider); + if (false == docCountProvider.alwaysOne()) { + segmentsWithDocCount++; + } for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { BulkScorer scorer; if (scorers == null) { @@ -367,9 +378,8 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket // the filter doesn't match any docs continue; } - TotalHitCountCollector collector = new TotalHitCountCollector(); - scorer.score(collector, live); - incrementBucketDocCount(filterOrd, collector.getTotalHits()); + scorer.score(counter, live); + incrementBucketDocCount(filterOrd, counter.readAndReset(ctx)); } // Throwing this exception is how we communicate to the collection mechanism that we don't need the segment. throw new CollectionTerminatedException(); @@ -379,6 +389,7 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket public void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs); + add.accept("segments_with_doc_count", segmentsWithDocCount); if (estimatedCost != -1) { // -1 means we didn't estimate it. add.accept("estimated_cost", estimatedCost); @@ -386,6 +397,34 @@ public void collectDebugInfo(BiConsumer add) { add.accept("estimate_cost_time", estimateCostTime); } } + + /** + * Counts collected documents, delegating to {@link DocCountProvider} for + * how many documents each search hit is "worth". + */ + private static class Counter implements LeafCollector { + private final DocCountProvider docCount; + private long count; + + Counter(DocCountProvider docCount) { + this.docCount = docCount; + } + + public long readAndReset(LeafReaderContext ctx) throws IOException { + long result = count; + count = 0; + docCount.setLeafReaderContext(ctx); + return result; + } + + @Override + public void collect(int doc) throws IOException { + count += docCount.getDocCount(doc); + } + + @Override + public void setScorer(Scorable scorer) throws IOException {} + } } /**