Skip to content

Commit 8b075da

Browse files
authored
Fix a sneaky bug in rare_terms (backport of #51868) (#51960)
When the `rare_terms` aggregation contained another aggregation it'd break them. Most of the time. This happened because the process that it uses to remove buckets that turn out not to be rare was incorrectly merging results from multiple leaves. This'd cause array index out of bounds issues. We didn't catch it in the test because the issue doesn't happen on the very first bucket. And the tests generated data in such a way that the first bucket always contained the rare terms. Randomizing the order of the generated data fixed the test so it caught the issue. Closes #51020
1 parent 55d96f3 commit 8b075da

File tree

5 files changed

+54
-13
lines changed

5 files changed

+54
-13
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,4 +313,50 @@ setup:
313313
- match: { hits.total.value: 1 }
314314
- length: { aggregations.long_terms.buckets: 0 }
315315

316+
---
317+
"sub aggs":
318+
- skip:
319+
version: " - 7.6.0"
320+
reason: Sub aggs fixed in 7.6.1
321+
322+
- do:
323+
index:
324+
refresh: true
325+
index: test_1
326+
id: 1
327+
body: { "str" : "abc", "number": 1 }
316328

329+
- do:
330+
index:
331+
refresh: true
332+
index: test_1
333+
id: 2
334+
body: { "str": "abc", "number": 2 }
335+
336+
- do:
337+
index:
338+
refresh: true
339+
index: test_1
340+
id: 3
341+
body: { "str": "bcd", "number": 3 }
342+
343+
- do:
344+
search:
345+
body:
346+
size: 0
347+
aggs:
348+
str_terms:
349+
rare_terms:
350+
field: str
351+
max_doc_count: 1
352+
aggs:
353+
max_n:
354+
max:
355+
field: number
356+
357+
- match: { hits.total.value: 3 }
358+
- length: { aggregations.str_terms.buckets: 1 }
359+
- match: { aggregations.str_terms.buckets.0.key: "bcd" }
360+
- is_false: aggregations.str_terms.buckets.0.key_as_string
361+
- match: { aggregations.str_terms.buckets.0.doc_count: 1 }
362+
- match: { aggregations.str_terms.buckets.0.max_n.value: 3.0 }

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractRareTermsAggregator.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ public abstract class AbstractRareTermsAggregator<T extends ValuesSource,
5050
protected final U includeExclude;
5151

5252
MergingBucketsDeferringCollector deferringCollector;
53-
LeafBucketCollector subCollectors;
5453
final SetBackedScalingCuckooFilter filter;
5554

5655
AbstractRareTermsAggregator(String name, AggregatorFactories factories, SearchContext context,
@@ -115,14 +114,14 @@ private String descendsFromNestedAggregator(Aggregator parent) {
115114
return null;
116115
}
117116

118-
protected void doCollect(V val, int docId) throws IOException {
117+
protected void doCollect(LeafBucketCollector subCollector, V val, int docId) throws IOException {
119118
long bucketOrdinal = addValueToOrds(val);
120119

121120
if (bucketOrdinal < 0) { // already seen
122121
bucketOrdinal = -1 - bucketOrdinal;
123-
collectExistingBucket(subCollectors, docId, bucketOrdinal);
122+
collectExistingBucket(subCollector, docId, bucketOrdinal);
124123
} else {
125-
collectBucket(subCollectors, docId, bucketOrdinal);
124+
collectBucket(subCollector, docId, bucketOrdinal);
126125
}
127126
}
128127

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTermsAggregator.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,6 @@ protected SortedNumericDocValues getValues(ValuesSource.Numeric valuesSource, Le
6464
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
6565
final LeafBucketCollector sub) throws IOException {
6666
final SortedNumericDocValues values = getValues(valuesSource, ctx);
67-
if (subCollectors == null) {
68-
subCollectors = sub;
69-
}
7067
return new LeafBucketCollectorBase(sub, values) {
7168

7269
@Override
@@ -78,7 +75,7 @@ public void collect(int docId, long owningBucketOrdinal) throws IOException {
7875
final long val = values.nextValue();
7976
if (previous != val || i == 0) {
8077
if ((includeExclude == null) || (includeExclude.accept(val))) {
81-
doCollect(val, docId);
78+
doCollect(sub, val, docId);
8279
}
8380
previous = val;
8481
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,6 @@ public class StringRareTermsAggregator extends AbstractRareTermsAggregator<Value
6060
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
6161
final LeafBucketCollector sub) throws IOException {
6262
final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
63-
if (subCollectors == null) {
64-
subCollectors = sub;
65-
}
6663
return new LeafBucketCollectorBase(sub, values) {
6764
final BytesRefBuilder previous = new BytesRefBuilder();
6865

@@ -84,7 +81,7 @@ public void collect(int docId, long bucket) throws IOException {
8481
continue;
8582
}
8683

87-
doCollect(bytes, docId);
84+
doCollect(sub, bytes, docId);
8885
previous.copyBytes(bytes);
8986
}
9087
}

server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,9 @@ private void executeTestCase(boolean reduced, Query query, List<Long> dataset,
562562
try (Directory directory = newDirectory()) {
563563
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
564564
Document document = new Document();
565-
for (Long value : dataset) {
565+
List<Long> shuffledDataset = new ArrayList<>(dataset);
566+
Collections.shuffle(shuffledDataset, random());
567+
for (Long value : shuffledDataset) {
566568
if (frequently()) {
567569
indexWriter.commit();
568570
}

0 commit comments

Comments
 (0)