Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1338,3 +1338,46 @@ setup:
- match: { aggregations.str_terms.buckets.0.key: cow }
- match: { aggregations.str_terms.buckets.0.doc_count: 1 }
- match: { aggregations.str_terms.buckets.0.filter.max_number.value: 7.0 }

---
precise size:
- do:
bulk:
index: test_1
refresh: true
body: |
{ "index": {} }
{ "str": "a" }
{ "index": {} }
{ "str": "b" }
{ "index": {} }
{ "str": "c" }
{ "index": {} }
{ "str": "b" }
{ "index": {} }
{ "str": "c" }
{ "index": {} }
{ "str": "c" }

- do:
search:
index: test_1
body:
size: 0
query:
terms:
str:
- b
- c
aggs:
str_terms:
terms:
size: 2
field: str
order:
- _key : asc
- length: { aggregations.str_terms.buckets: 2 }
- match: { aggregations.str_terms.buckets.0.key: b }
- match: { aggregations.str_terms.buckets.0.doc_count: 2 }
- match: { aggregations.str_terms.buckets.1.key: c }
- match: { aggregations.str_terms.buckets.1.doc_count: 3 }
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,21 @@ protected InternalAggregation adapt(InternalAggregation delegateResult) throws I
List<StringTerms.Bucket> buckets;
long otherDocsCount = 0;
BucketOrder reduceOrder = isKeyOrder(order) ? order : InternalOrder.key(true);
/*
* We default to a shardMinDocCount of 0 which means we'd keep all
* hits, even those that don't have live documents or those that
* don't match any documents in the top level query. This is correct
* if the minDocCount is also 0, but if it is larger than 0 then we
* don't need to send those buckets back to the coordinating node.
* GlobalOrdinalsStringTermsAggregator doesn't collect those
* buckets either. It's a good thing, too, because if you take them
* into account when you sort by, say, key, you might throw away
* buckets with actual docs in them.
*/
long minDocCount = bucketCountThresholds.getShardMinDocCount();
if (minDocCount == 0 && bucketCountThresholds.getMinDocCount() > 0) {
minDocCount = 1;
}
if (filters.getBuckets().size() > bucketCountThresholds.getShardSize()) {
PriorityQueue<OrdBucket> queue = new PriorityQueue<OrdBucket>(bucketCountThresholds.getShardSize()) {
private final Comparator<Bucket> comparator = order.comparator();
Expand All @@ -165,7 +180,7 @@ protected boolean lessThan(OrdBucket a, OrdBucket b) {
};
OrdBucket spare = null;
for (InternalFilters.InternalBucket b : filters.getBuckets()) {
if (b.getDocCount() < bucketCountThresholds.getShardMinDocCount()) {
if (b.getDocCount() < minDocCount) {
continue;
}
if (spare == null) {
Expand Down Expand Up @@ -203,7 +218,7 @@ protected boolean lessThan(OrdBucket a, OrdBucket b) {
} else {
buckets = new ArrayList<>(filters.getBuckets().size());
for (InternalFilters.InternalBucket b : filters.getBuckets()) {
if (b.getDocCount() < bucketCountThresholds.getShardMinDocCount()) {
if (b.getDocCount() < minDocCount) {
continue;
}
buckets.add(buildBucket(b));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ public void ensureValidity() {
}
}

/**
* The minimum number of documents a bucket must have in order to
* be returned from a shard.
* <p>
* Important: The default for this is 0, but we should only return
* 0 document buckets if {@link #getMinDocCount()} is *also* 0.
*/
public long getShardMinDocCount() {
return shardMinDocCount;
}
Expand All @@ -102,6 +109,10 @@ public void setShardMinDocCount(long shardMinDocCount) {
this.shardMinDocCount = shardMinDocCount;
}

/**
* The minimum numbers of documents a bucket must have in order to
* survive the final reduction.
*/
public long getMinDocCount() {
return minDocCount;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
Expand Down Expand Up @@ -1699,6 +1701,51 @@ public void testAsSubAgg() throws IOException {
}, dft, kft);
}

public void testWithFilterAndPreciseSize() throws IOException {
KeywordFieldType kft = new KeywordFieldType("k", true, true, Collections.emptyMap());
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
iw.addDocument(
org.elasticsearch.common.collect.List.of(
new Field("k", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE),
new SortedSetDocValuesField("k", new BytesRef("a"))
)
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(
new Field("k", new BytesRef("b"), KeywordFieldMapper.Defaults.FIELD_TYPE),
new SortedSetDocValuesField("k", new BytesRef("b"))
)
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(
new Field("k", new BytesRef("c"), KeywordFieldMapper.Defaults.FIELD_TYPE),
new SortedSetDocValuesField("k", new BytesRef("c"))
)
);
};
TermsAggregationBuilder builder = new TermsAggregationBuilder("k").field("k");
/*
* There was a bug where we would accidentally send buckets with 0
* docs in them back to the coordinating node which would take up a
* slot that a bucket with docs in it deserves. Combination of
* ordering by bucket, the precise size, and the top level query
* would trigger that bug.
*/
builder.size(2).order(BucketOrder.key(true));
Query topLevel = new TermInSetQuery("k", new BytesRef[] {new BytesRef("b"), new BytesRef("c")});
testCase(builder, topLevel, buildIndex, (StringTerms terms) -> {
assertThat(
terms.getBuckets().stream().map(StringTerms.Bucket::getKey).collect(toList()),
equalTo(org.elasticsearch.common.collect.List.of("b", "c"))
);
}, kft);
withAggregator(builder, topLevel, buildIndex, (searcher, terms) -> {
Map<String, Object> info = new HashMap<>();
terms.collectDebugInfo(info::put);
assertThat(info, hasEntry("delegate", "FiltersAggregator.FilterByFilter"));
}, kft);
}

private final SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
private List<Document> generateDocsWithNested(String id, int value, int[] nestedValues) {
List<Document> documents = new ArrayList<>();
Expand Down