Skip to content

Commit 766e426

Browse files
authored
Avoid doing I/O when fetching min and max for keyword fields (#92026)
Whenever sorting on a date, numeric or keyword field (as primary sort), the can_match phase retrieves min and max for the field and sorts the shards (asc or desc depending on the sort order) so that they are going to be queried following that order. This allows incremental results to be exposed in that same order when using async search, as well as optimizations built on top of such behaviour (#51852). For fields with points we call `getMinPackedValue` and `getMaxPackedValue`, while for keyword fields we call `Terms#getMin` and `Terms#getMax`. Elasticsearch uses `FilterTerms` implementations to cancel queries as well as to track field usage. Such filter implementations should delegate their `getMin` and `getMax` calls to the wrapped `Terms` instance, which will leverage info from the block tree that caches min and max, otherwise they are always going to be retrieved from the index, which does I/O and slows the can_match phase down.
1 parent c53becb commit 766e426

File tree

5 files changed

+183
-4
lines changed

5 files changed

+183
-4
lines changed

docs/changelog/92026.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 92026
2+
summary: Avoid doing I/O when fetching min and max for keyword fields
3+
area: Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ static class ExitableTerms extends FilterLeafReader.FilterTerms {
173173

174174
private final QueryCancellation queryCancellation;
175175

176-
private ExitableTerms(Terms terms, QueryCancellation queryCancellation) {
176+
ExitableTerms(Terms terms, QueryCancellation queryCancellation) {
177177
super(terms);
178178
this.queryCancellation = queryCancellation;
179179
}
@@ -187,6 +187,16 @@ public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throw
187187
public TermsEnum iterator() throws IOException {
188188
return new ExitableTermsEnum(in.iterator(), queryCancellation);
189189
}
190+
191+
@Override
192+
public BytesRef getMin() throws IOException {
193+
return in.getMin();
194+
}
195+
196+
@Override
197+
public BytesRef getMax() throws IOException {
198+
return in.getMax();
199+
}
190200
}
191201

192202
/**

server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,11 @@ public interface FieldUsageNotifier {
9090
void onKnnVectorsUsed(String field);
9191
}
9292

93-
public static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader {
93+
static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader {
9494

9595
private final FieldUsageNotifier notifier;
9696

97-
public FieldUsageTrackingLeafReader(LeafReader in, FieldUsageNotifier notifier) {
97+
FieldUsageTrackingLeafReader(LeafReader in, FieldUsageNotifier notifier) {
9898
super(in);
9999
this.notifier = notifier;
100100
}
@@ -248,7 +248,7 @@ public void close() throws IOException {
248248
}
249249
}
250250

251-
private class FieldUsageTrackingTerms extends FilterTerms {
251+
class FieldUsageTrackingTerms extends FilterTerms {
252252

253253
private final String field;
254254

@@ -286,6 +286,16 @@ public long getSumTotalTermFreq() throws IOException {
286286
public long getSumDocFreq() throws IOException {
287287
return in.getSumDocFreq();
288288
}
289+
290+
@Override
291+
public BytesRef getMin() throws IOException {
292+
return in.getMin();
293+
}
294+
295+
@Override
296+
public BytesRef getMax() throws IOException {
297+
return in.getMax();
298+
}
289299
}
290300

291301
private class FieldUsageTrackingTermsEnum extends FilterTermsEnum {

server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.apache.lucene.document.StringField;
1717
import org.apache.lucene.index.DirectoryReader;
1818
import org.apache.lucene.index.FilterDirectoryReader;
19+
import org.apache.lucene.index.FilterLeafReader;
1920
import org.apache.lucene.index.IndexReader;
2021
import org.apache.lucene.index.IndexWriter;
2122
import org.apache.lucene.index.IndexWriterConfig;
@@ -24,6 +25,7 @@
2425
import org.apache.lucene.index.NoMergePolicy;
2526
import org.apache.lucene.index.PostingsEnum;
2627
import org.apache.lucene.index.Term;
28+
import org.apache.lucene.index.Terms;
2729
import org.apache.lucene.index.TermsEnum;
2830
import org.apache.lucene.search.BoostQuery;
2931
import org.apache.lucene.search.BulkScorer;
@@ -271,6 +273,43 @@ public void onRemoval(ShardId shardId, Accountable accountable) {
271273
IOUtils.close(reader, w, dir);
272274
}
273275

276+
public void testExitableTermsMinAndMax() throws IOException {
277+
Directory dir = newDirectory();
278+
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null));
279+
Document doc = new Document();
280+
StringField fooField = new StringField("foo", "bar", Field.Store.NO);
281+
doc.add(fooField);
282+
w.addDocument(doc);
283+
w.flush();
284+
285+
DirectoryReader directoryReader = DirectoryReader.open(w);
286+
for (LeafReaderContext lfc : directoryReader.leaves()) {
287+
Terms terms = lfc.reader().terms("foo");
288+
FilterLeafReader.FilterTerms filterTerms = new ExitableTerms(terms, new ExitableDirectoryReader.QueryCancellation() {
289+
@Override
290+
public boolean isEnabled() {
291+
return false;
292+
}
293+
294+
@Override
295+
public void checkCancelled() {
296+
297+
}
298+
}) {
299+
@Override
300+
public TermsEnum iterator() {
301+
fail("Retrieving min and max should retrieve values from block tree instead of iterating");
302+
return null;
303+
}
304+
};
305+
assertEquals("bar", filterTerms.getMin().utf8ToString());
306+
assertEquals("bar", filterTerms.getMax().utf8ToString());
307+
}
308+
w.close();
309+
directoryReader.close();
310+
dir.close();
311+
}
312+
274313
private SparseFixedBitSet query(LeafReaderContext leaf, String field, String value) throws IOException {
275314
SparseFixedBitSet sparseFixedBitSet = new SparseFixedBitSet(leaf.reader().maxDoc());
276315
TermsEnum tenum = leaf.reader().terms(field).iterator();
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.search.internal;
10+
11+
import org.apache.lucene.document.Document;
12+
import org.apache.lucene.document.Field;
13+
import org.apache.lucene.document.StringField;
14+
import org.apache.lucene.index.DirectoryReader;
15+
import org.apache.lucene.index.IndexWriter;
16+
import org.apache.lucene.index.LeafReaderContext;
17+
import org.apache.lucene.index.TermsEnum;
18+
import org.apache.lucene.store.Directory;
19+
import org.elasticsearch.test.ESTestCase;
20+
21+
import java.io.IOException;
22+
23+
public class FieldUsageTrackingDirectoryReaderTests extends ESTestCase {
24+
25+
public void testTermsMinAndMax() throws IOException {
26+
Directory dir = newDirectory();
27+
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null));
28+
Document doc = new Document();
29+
StringField fooField = new StringField("foo", "bar", Field.Store.NO);
30+
doc.add(fooField);
31+
w.addDocument(doc);
32+
w.flush();
33+
34+
DirectoryReader directoryReader = DirectoryReader.open(w);
35+
for (LeafReaderContext lrc : directoryReader.leaves()) {
36+
FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader leafReader =
37+
new FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader(lrc.reader(), new TestFieldUsageNotifier());
38+
FieldUsageTrackingDirectoryReader.FieldUsageTrackingLeafReader.FieldUsageTrackingTerms terms =
39+
leafReader.new FieldUsageTrackingTerms("foo", lrc.reader().terms("foo")) {
40+
@Override
41+
public TermsEnum iterator() {
42+
fail("Retrieving min and max should retrieve values from block tree instead of iterating");
43+
return null;
44+
}
45+
};
46+
assertEquals("bar", terms.getMin().utf8ToString());
47+
assertEquals("bar", terms.getMax().utf8ToString());
48+
}
49+
w.close();
50+
directoryReader.close();
51+
dir.close();
52+
}
53+
54+
private static class TestFieldUsageNotifier implements FieldUsageTrackingDirectoryReader.FieldUsageNotifier {
55+
@Override
56+
public void onTermsUsed(String field) {
57+
58+
}
59+
60+
@Override
61+
public void onPostingsUsed(String field) {
62+
63+
}
64+
65+
@Override
66+
public void onTermFrequenciesUsed(String field) {
67+
68+
}
69+
70+
@Override
71+
public void onPositionsUsed(String field) {
72+
73+
}
74+
75+
@Override
76+
public void onOffsetsUsed(String field) {
77+
78+
}
79+
80+
@Override
81+
public void onDocValuesUsed(String field) {
82+
83+
}
84+
85+
@Override
86+
public void onStoredFieldsUsed(String field) {
87+
88+
}
89+
90+
@Override
91+
public void onNormsUsed(String field) {
92+
93+
}
94+
95+
@Override
96+
public void onPayloadsUsed(String field) {
97+
98+
}
99+
100+
@Override
101+
public void onPointsUsed(String field) {
102+
103+
}
104+
105+
@Override
106+
public void onTermVectorsUsed(String field) {
107+
108+
}
109+
110+
@Override
111+
public void onKnnVectorsUsed(String field) {
112+
113+
}
114+
}
115+
}

0 commit comments

Comments
 (0)