Skip to content

Commit d01fcee

Browse files
authored
Fix illegal cast of the "low cardinality" optimization of the terms aggregation. (#27543)
The GlobalOrdinalsStringTermsAggregator.LowCardinality aggregator casts global values to `GlobalOrdinalMapping`, even though the implementation of global values is different when a `missing` value is configured. This commit adds a new API that gives access to the ordinal remapping in order to fix this problem.
1 parent 996990a commit d01fcee

File tree

9 files changed

+229
-70
lines changed

9 files changed

+229
-70
lines changed

core/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
/**
3030
* A {@link SortedSetDocValues} implementation that returns ordinals that are global.
3131
*/
32-
public class GlobalOrdinalMapping extends SortedSetDocValues {
32+
final class GlobalOrdinalMapping extends SortedSetDocValues {
3333

3434
private final SortedSetDocValues values;
3535
private final OrdinalMap ordinalMap;
@@ -49,7 +49,7 @@ public long getValueCount() {
4949
return ordinalMap.getValueCount();
5050
}
5151

52-
public final long getGlobalOrd(long segmentOrd) {
52+
public long getGlobalOrd(long segmentOrd) {
5353
return mapping.get(segmentOrd);
5454
}
5555

core/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSource.java

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import org.apache.lucene.util.BytesRef;
2626
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
2727
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
28-
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
2928
import org.elasticsearch.search.aggregations.support.ValuesSource;
3029
import org.elasticsearch.search.sort.SortOrder;
3130

@@ -179,16 +178,10 @@ Collector getLeafCollector(LeafReaderContext context, Collector next) throws IOE
179178
if (lookup == null) {
180179
lookup = dvs;
181180
if (topValue != null && topValueLong == null) {
182-
if (lookup instanceof GlobalOrdinalMapping) {
183-
// Find the global ordinal (or the insertion point) for the provided top value.
184-
topValueLong = lookupGlobalOrdinals((GlobalOrdinalMapping) lookup, topValue);
185-
} else {
186-
// Global ordinals are not needed, switch back to ordinals (single segment case).
187-
topValueLong = lookup.lookupTerm(topValue);
188-
if (topValueLong < 0) {
189-
// convert negative insert position
190-
topValueLong = -topValueLong - 2;
191-
}
181+
topValueLong = lookup.lookupTerm(topValue);
182+
if (topValueLong < 0) {
183+
// convert negative insert position
184+
topValueLong = -topValueLong - 2;
192185
}
193186
}
194187
}
@@ -202,25 +195,6 @@ Collector getLeafCollector(LeafReaderContext context, Collector next) throws IOE
202195
}
203196
};
204197
}
205-
206-
private static long lookupGlobalOrdinals(GlobalOrdinalMapping mapping, BytesRef key) throws IOException {
207-
long low = 0;
208-
long high = mapping.getValueCount();
209-
210-
while (low <= high) {
211-
long mid = (low + high) >>> 1;
212-
BytesRef midVal = mapping.lookupOrd(mid);
213-
int cmp = midVal.compareTo(key);
214-
if (cmp < 0) {
215-
low = mid + 1;
216-
} else if (cmp > 0) {
217-
high = mid - 1;
218-
} else {
219-
return mid;
220-
}
221-
}
222-
return low-1;
223-
}
224198
}
225199

226200
/**

core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import org.elasticsearch.common.util.LongHash;
3434
import org.elasticsearch.common.xcontent.XContentBuilder;
3535
import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues;
36-
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
3736
import org.elasticsearch.search.DocValueFormat;
3837
import org.elasticsearch.search.aggregations.Aggregator;
3938
import org.elasticsearch.search.aggregations.AggregatorFactories;
@@ -50,6 +49,7 @@
5049
import java.util.Arrays;
5150
import java.util.List;
5251
import java.util.Map;
52+
import java.util.function.LongUnaryOperator;
5353

5454
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
5555

@@ -295,9 +295,8 @@ protected void doClose() {
295295
*/
296296
static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
297297

298+
private LongUnaryOperator mapping;
298299
private IntArray segmentDocCounts;
299-
private SortedSetDocValues globalOrds;
300-
private SortedSetDocValues segmentOrds;
301300

302301
LowCardinality(String name,
303302
AggregatorFactories factories,
@@ -321,14 +320,14 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
321320
@Override
322321
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
323322
final LeafBucketCollector sub) throws IOException {
324-
if (segmentOrds != null) {
325-
mapSegmentCountsToGlobalCounts();
323+
if (mapping != null) {
324+
mapSegmentCountsToGlobalCounts(mapping);
326325
}
327-
globalOrds = valuesSource.globalOrdinalsValues(ctx);
328-
segmentOrds = valuesSource.ordinalsValues(ctx);
326+
final SortedSetDocValues segmentOrds = valuesSource.ordinalsValues(ctx);
329327
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
330328
assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
331329
final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
330+
mapping = valuesSource.globalOrdinalsMapping(ctx);
332331
if (singleValues != null) {
333332
return new LeafBucketCollectorBase(sub, segmentOrds) {
334333
@Override
@@ -356,9 +355,10 @@ public void collect(int doc, long bucket) throws IOException {
356355
}
357356

358357
@Override
359-
protected void doPostCollection() {
360-
if (segmentOrds != null) {
361-
mapSegmentCountsToGlobalCounts();
358+
protected void doPostCollection() throws IOException {
359+
if (mapping != null) {
360+
mapSegmentCountsToGlobalCounts(mapping);
361+
mapping = null;
362362
}
363363
}
364364

@@ -367,16 +367,7 @@ protected void doClose() {
367367
Releasables.close(segmentDocCounts);
368368
}
369369

370-
private void mapSegmentCountsToGlobalCounts() {
371-
// There is no public method in Ordinals.Docs that allows for this mapping...
372-
// This is the cleanest way I can think of so far
373-
374-
GlobalOrdinalMapping mapping;
375-
if (globalOrds.getValueCount() == segmentOrds.getValueCount()) {
376-
mapping = null;
377-
} else {
378-
mapping = (GlobalOrdinalMapping) globalOrds;
379-
}
370+
private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IOException {
380371
for (long i = 1; i < segmentDocCounts.size(); i++) {
381372
// We use set(...) here, because we need to reset the slow to 0.
382373
// segmentDocCounts get reused over the segments and otherwise counts would be too high.
@@ -385,7 +376,7 @@ private void mapSegmentCountsToGlobalCounts() {
385376
continue;
386377
}
387378
final long ord = i - 1; // remember we do +1 when counting
388-
final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord);
379+
final long globalOrd = mapping.applyAsLong(ord);
389380
long bucketOrd = getBucketOrd(globalOrd);
390381
incrementBucketDocCount(bucketOrd, inc);
391382
}

core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource, TermsAggregatorFactory> {
5050
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsAggregatorFactory.class));
5151

52+
static Boolean REMAP_GLOBAL_ORDS, COLLECT_SEGMENT_ORDS;
53+
5254
private final BucketOrder order;
5355
private final IncludeExclude includeExclude;
5456
private final String executionHint;
@@ -257,11 +259,13 @@ Aggregator create(String name,
257259

258260
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
259261
assert maxOrd != -1;
260-
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
262+
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
263+
261264
if (factories == AggregatorFactories.EMPTY &&
262265
includeExclude == null &&
263266
Aggregator.descendsFromBucketAggregator(parent) == false &&
264-
ratio <= 0.5 && maxOrd <= 2048) {
267+
// we use the static COLLECT_SEGMENT_ORDS to allow tests to force specific optimizations
268+
(COLLECT_SEGMENT_ORDS!= null ? COLLECT_SEGMENT_ORDS.booleanValue() : ratio <= 0.5 && maxOrd <= 2048)) {
265269
/**
266270
* We can use the low cardinality execution mode iff this aggregator:
267271
* - has no sub-aggregator AND
@@ -276,18 +280,24 @@ Aggregator create(String name,
276280

277281
}
278282
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
279-
boolean remapGlobalOrds = true;
280-
if (includeExclude == null &&
281-
Aggregator.descendsFromBucketAggregator(parent) == false &&
282-
(factories == AggregatorFactories.EMPTY ||
283-
(isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
284-
/**
285-
* We don't need to remap global ords iff this aggregator:
286-
* - has no include/exclude rules AND
287-
* - is not a child of a bucket aggregator AND
288-
* - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}).
289-
**/
290-
remapGlobalOrds = false;
283+
boolean remapGlobalOrds;
284+
if (REMAP_GLOBAL_ORDS != null) {
285+
// We use REMAP_GLOBAL_ORDS to allow tests to force specific optimizations
286+
remapGlobalOrds = REMAP_GLOBAL_ORDS.booleanValue();
287+
} else {
288+
remapGlobalOrds = true;
289+
if (includeExclude == null &&
290+
Aggregator.descendsFromBucketAggregator(parent) == false &&
291+
(factories == AggregatorFactories.EMPTY ||
292+
(isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
293+
/**
294+
* We don't need to remap global ords iff this aggregator:
295+
* - has no include/exclude rules AND
296+
* - is not a child of a bucket aggregator AND
297+
* - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}).
298+
**/
299+
remapGlobalOrds = false;
300+
}
291301
}
292302
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
293303
format, bucketCountThresholds, filter, context, parent, remapGlobalOrds, subAggCollectMode, showTermDocCountError,

core/src/main/java/org/elasticsearch/search/aggregations/support/MissingValues.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
3232

3333
import java.io.IOException;
34+
import java.util.function.LongUnaryOperator;
3435

3536
/**
3637
* Utility class that allows to return views of {@link ValuesSource}s that
@@ -201,6 +202,13 @@ public SortedSetDocValues globalOrdinalsValues(LeafReaderContext context)
201202
SortedSetDocValues values = valuesSource.globalOrdinalsValues(context);
202203
return replaceMissing(values, missing);
203204
}
205+
206+
@Override
207+
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
208+
return getGlobalMapping(valuesSource.ordinalsValues(context),
209+
valuesSource.globalOrdinalsValues(context),
210+
valuesSource.globalOrdinalsMapping(context), missing);
211+
}
204212
};
205213
}
206214

@@ -311,6 +319,43 @@ public boolean advanceExact(int doc) throws IOException {
311319
};
312320
}
313321

322+
static LongUnaryOperator getGlobalMapping(SortedSetDocValues values, SortedSetDocValues globalValues,
323+
LongUnaryOperator segmentToGlobalOrd, BytesRef missing) throws IOException {
324+
final long missingGlobalOrd = globalValues.lookupTerm(missing);
325+
final long missingSegmentOrd = values.lookupTerm(missing);
326+
327+
if (missingSegmentOrd >= 0) {
328+
// the missing value exists in the segment, nothing to do
329+
return segmentToGlobalOrd;
330+
} else if (missingGlobalOrd >= 0) {
331+
// the missing value exists in another segment, but not the current one
332+
final long insertedSegmentOrd = -1L - missingSegmentOrd;
333+
final long insertedGlobalOrd = missingGlobalOrd;
334+
return segmentOrd -> {
335+
if (insertedSegmentOrd == segmentOrd) {
336+
return insertedGlobalOrd;
337+
} else if (insertedSegmentOrd > segmentOrd) {
338+
return segmentToGlobalOrd.applyAsLong(segmentOrd);
339+
} else {
340+
return segmentToGlobalOrd.applyAsLong(segmentOrd - 1);
341+
}
342+
};
343+
} else {
344+
// the missing value exists neither in this segment nor in another segment
345+
final long insertedSegmentOrd = -1L - missingSegmentOrd;
346+
final long insertedGlobalOrd = -1L - missingGlobalOrd;
347+
return segmentOrd -> {
348+
if (insertedSegmentOrd == segmentOrd) {
349+
return insertedGlobalOrd;
350+
} else if (insertedSegmentOrd > segmentOrd) {
351+
return segmentToGlobalOrd.applyAsLong(segmentOrd);
352+
} else {
353+
return 1 + segmentToGlobalOrd.applyAsLong(segmentOrd - 1);
354+
}
355+
};
356+
}
357+
}
358+
314359
public static ValuesSource.GeoPoint replaceMissing(final ValuesSource.GeoPoint valuesSource, final GeoPoint missing) {
315360
return new ValuesSource.GeoPoint() {
316361

core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.lucene.index.DocValues;
2323
import org.apache.lucene.index.IndexReader;
2424
import org.apache.lucene.index.LeafReaderContext;
25+
import org.apache.lucene.index.OrdinalMap;
2526
import org.apache.lucene.index.SortedNumericDocValues;
2627
import org.apache.lucene.index.SortedSetDocValues;
2728
import org.apache.lucene.search.IndexSearcher;
@@ -47,6 +48,7 @@
4748
import org.elasticsearch.search.aggregations.support.values.ScriptLongValues;
4849

4950
import java.io.IOException;
51+
import java.util.function.LongUnaryOperator;
5052

5153
public abstract class ValuesSource {
5254

@@ -90,6 +92,11 @@ public SortedBinaryDocValues bytesValues(LeafReaderContext context) throws IOExc
9092
return org.elasticsearch.index.fielddata.FieldData.emptySortedBinary();
9193
}
9294

95+
@Override
96+
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
97+
return LongUnaryOperator.identity();
98+
}
99+
93100
};
94101

95102
@Override
@@ -105,6 +112,10 @@ public abstract SortedSetDocValues ordinalsValues(LeafReaderContext context)
105112
public abstract SortedSetDocValues globalOrdinalsValues(LeafReaderContext context)
106113
throws IOException;
107114

115+
/** Returns a mapping from segment ordinals to global ordinals. */
116+
public abstract LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context)
117+
throws IOException;
118+
108119
public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException {
109120
IndexReader indexReader = indexSearcher.getIndexReader();
110121
if (indexReader.leaves().isEmpty()) {
@@ -142,6 +153,18 @@ public SortedSetDocValues globalOrdinalsValues(LeafReaderContext context) {
142153
final AtomicOrdinalsFieldData atomicFieldData = global.load(context);
143154
return atomicFieldData.getOrdinalsValues();
144155
}
156+
157+
@Override
158+
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
159+
final IndexOrdinalsFieldData global = indexFieldData.loadGlobal((DirectoryReader)context.parent.reader());
160+
final OrdinalMap map = global.getOrdinalMap();
161+
if (map == null) {
162+
// segments and global ordinals are the same
163+
return LongUnaryOperator.identity();
164+
}
165+
final org.apache.lucene.util.LongValues segmentToGlobalOrd = map.getGlobalOrds(context.ord);
166+
return segmentToGlobalOrd::get;
167+
}
145168
}
146169
}
147170

core/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsIT.java renamed to core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
* specific language governing permissions and limitations
1717
* under the License.
1818
*/
19-
package org.elasticsearch.search.aggregations.bucket;
19+
package org.elasticsearch.search.aggregations.bucket.terms;
2020

2121
import org.elasticsearch.ElasticsearchException;
2222
import org.elasticsearch.action.index.IndexRequestBuilder;
@@ -34,16 +34,20 @@
3434
import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin;
3535
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
3636
import org.elasticsearch.search.aggregations.BucketOrder;
37+
import org.elasticsearch.search.aggregations.bucket.AbstractTermsTestCase;
3738
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
3839
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
3940
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
41+
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
4042
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
4143
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
4244
import org.elasticsearch.search.aggregations.metrics.stats.Stats;
4345
import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStats;
4446
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
4547
import org.elasticsearch.test.ESIntegTestCase;
4648
import org.hamcrest.Matchers;
49+
import org.junit.After;
50+
import org.junit.Before;
4751

4852
import java.io.IOException;
4953
import java.util.ArrayList;
@@ -84,6 +88,18 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
8488
return Collections.singleton(CustomScriptPlugin.class);
8589
}
8690

91+
@Before
92+
public void randomizeOptimizations() {
93+
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = false;randomBoolean();
94+
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean();
95+
}
96+
97+
@After
98+
public void resetOptimizations() {
99+
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null;
100+
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null;
101+
}
102+
87103
public static class CustomScriptPlugin extends AggregationTestScriptsPlugin {
88104

89105
@Override

0 commit comments

Comments
 (0)