Skip to content

Commit e9160fc

Browse files
committed
percolator: also extract match_all queries
I've seen several cases where match_all queries were being used inside percolator queries, because these queries were created generated by other systems. Extracting these queries will allow the percolator at query time in a filter context to skip over these queries without parsing or validating that these queries actually match with the document being percolated.
1 parent af8bd8b commit e9160fc

File tree

4 files changed

+170
-21
lines changed

4 files changed

+170
-21
lines changed

modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -458,20 +458,27 @@ void processQuery(Query query, ParseContext context) {
458458
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
459459
return;
460460
}
461-
for (QueryAnalyzer.QueryExtraction term : result.extractions) {
462-
if (term.term != null) {
461+
for (QueryAnalyzer.QueryExtraction extraction : result.extractions) {
462+
if (extraction.term != null) {
463463
BytesRefBuilder builder = new BytesRefBuilder();
464-
builder.append(new BytesRef(term.field()));
464+
builder.append(new BytesRef(extraction.field()));
465465
builder.append(FIELD_VALUE_SEPARATOR);
466-
builder.append(term.bytes());
466+
builder.append(extraction.bytes());
467467
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
468-
} else if (term.range != null) {
469-
byte[] min = term.range.lowerPoint;
470-
byte[] max = term.range.upperPoint;
471-
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(term.range.fieldName, min, max)));
468+
} else if (extraction.range != null) {
469+
byte[] min = extraction.range.lowerPoint;
470+
byte[] max = extraction.range.upperPoint;
471+
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(extraction.range.fieldName, min, max)));
472472
}
473473
}
474-
if (result.verified) {
474+
475+
Version indexVersionCreated = context.mapperService().getIndexSettings().getIndexVersionCreated();
476+
if (result.matchAllDocs) {
477+
doc.add(new Field(extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
478+
if (result.verified) {
479+
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
480+
}
481+
} else if (result.verified) {
475482
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
476483
} else {
477484
doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType()));
@@ -481,7 +488,7 @@ void processQuery(Query query, ParseContext context) {
481488
for (IndexableField field : fields) {
482489
context.doc().add(field);
483490
}
484-
if (context.mapperService().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_1_0)) {
491+
if (indexVersionCreated.onOrAfter(Version.V_6_1_0)) {
485492
doc.add(new NumericDocValuesField(minimumShouldMatchFieldMapper.name(), result.minimumShouldMatch));
486493
}
487494
}

modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.lucene.search.ConstantScoreQuery;
3030
import org.apache.lucene.search.DisjunctionMaxQuery;
3131
import org.apache.lucene.search.IndexOrDocValuesQuery;
32+
import org.apache.lucene.search.MatchAllDocsQuery;
3233
import org.apache.lucene.search.MatchNoDocsQuery;
3334
import org.apache.lucene.search.MultiPhraseQuery;
3435
import org.apache.lucene.search.PhraseQuery;
@@ -70,6 +71,7 @@ final class QueryAnalyzer {
7071
static {
7172
Map<Class<? extends Query>, BiFunction<Query, Version, Result>> map = new HashMap<>();
7273
map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
74+
map.put(MatchAllDocsQuery.class, matchAllDocsQuery());
7375
map.put(ConstantScoreQuery.class, constantScoreQuery());
7476
map.put(BoostQuery.class, boostQuery());
7577
map.put(TermQuery.class, termQuery());
@@ -142,6 +144,10 @@ private static BiFunction<Query, Version, Result> matchNoDocsQuery() {
142144
return (query, version) -> new Result(true, Collections.emptySet(), 1);
143145
}
144146

147+
private static BiFunction<Query, Version, Result> matchAllDocsQuery() {
148+
return (query, version) -> new Result(true, true);
149+
}
150+
145151
private static BiFunction<Query, Version, Result> constantScoreQuery() {
146152
return (query, boosts) -> {
147153
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
@@ -356,6 +362,7 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
356362
int msm = 0;
357363
boolean requiredShouldClauses = minimumShouldMatch > 0 && numOptionalClauses > 0;
358364
boolean verified = uqe == null && numProhibitedClauses == 0 && requiredShouldClauses == false;
365+
boolean matchAllDocs = true;
359366
Set<QueryExtraction> extractions = new HashSet<>();
360367
Set<String> seenRangeFields = new HashSet<>();
361368
for (Result result : results) {
@@ -376,9 +383,14 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
376383
msm += result.minimumShouldMatch;
377384
}
378385
verified &= result.verified;
386+
matchAllDocs &= result.matchAllDocs;
379387
extractions.addAll(result.extractions);
380388
}
381-
return new Result(verified, extractions, msm);
389+
if (matchAllDocs) {
390+
return new Result(matchAllDocs, verified);
391+
} else {
392+
return new Result(verified, extractions, msm);
393+
}
382394
}
383395
} else {
384396
Set<QueryExtraction> bestClause = null;
@@ -498,12 +510,15 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
498510
if (version.before(Version.V_6_1_0)) {
499511
verified &= requiredShouldClauses <= 1;
500512
}
501-
513+
int numMatchAllClauses = 0;
502514
Set<QueryExtraction> terms = new HashSet<>();
503515
for (int i = 0; i < disjunctions.size(); i++) {
504516
Query disjunct = disjunctions.get(i);
505517
Result subResult = analyze(disjunct, version);
506518
verified &= subResult.verified;
519+
if (subResult.matchAllDocs) {
520+
numMatchAllClauses++;
521+
}
507522
terms.addAll(subResult.extractions);
508523

509524
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]);
@@ -512,6 +527,7 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
512527
rangeFieldNames[i] = t[0].range.fieldName;
513528
}
514529
}
530+
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses;
515531

516532
int msm = 0;
517533
if (version.onOrAfter(Version.V_6_1_0)) {
@@ -532,7 +548,11 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
532548
} else {
533549
msm = 1;
534550
}
535-
return new Result(verified, terms, msm);
551+
if (matchAllDocs) {
552+
return new Result(matchAllDocs, verified);
553+
} else {
554+
return new Result(verified, terms, msm);
555+
}
536556
}
537557

538558
static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) {
@@ -619,11 +639,20 @@ static class Result {
619639
final Set<QueryExtraction> extractions;
620640
final boolean verified;
621641
final int minimumShouldMatch;
642+
final boolean matchAllDocs;
622643

623644
Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) {
624645
this.extractions = extractions;
625646
this.verified = verified;
626647
this.minimumShouldMatch = minimumShouldMatch;
648+
this.matchAllDocs = false;
649+
}
650+
651+
Result(boolean matchAllDocs, boolean verified) {
652+
this.extractions = Collections.emptySet();
653+
this.verified = verified;
654+
this.minimumShouldMatch = 0;
655+
this.matchAllDocs = matchAllDocs;
627656
}
628657

629658
}

modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
import org.apache.lucene.search.PrefixQuery;
5757
import org.apache.lucene.search.Query;
5858
import org.apache.lucene.search.Scorer;
59+
import org.apache.lucene.search.Sort;
60+
import org.apache.lucene.search.SortField;
5961
import org.apache.lucene.search.TermInSetQuery;
6062
import org.apache.lucene.search.TermQuery;
6163
import org.apache.lucene.search.TopDocs;
@@ -193,6 +195,8 @@ public void testDuel() throws Exception {
193195
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
194196
if (randomBoolean()) {
195197
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
198+
} else if (randomBoolean()) {
199+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
196200
}
197201
return builder.build();
198202
});
@@ -202,6 +206,20 @@ public void testDuel() throws Exception {
202206
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
203207
if (randomBoolean()) {
204208
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
209+
} else if (randomBoolean()) {
210+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
211+
}
212+
return builder.build();
213+
});
214+
queryFunctions.add((id) -> {
215+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
216+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
217+
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
218+
if (randomBoolean()) {
219+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
220+
}
221+
if (randomBoolean()) {
222+
builder.setMinimumNumberShouldMatch(2);
205223
}
206224
return builder.build();
207225
});
@@ -467,6 +485,52 @@ public void testDuelRangeQueries() throws Exception {
467485
duelRun(queryStore, memoryIndex, shardSearcher);
468486
}
469487

488+
public void testPercolateMatchAll() throws Exception {
489+
List<ParseContext.Document> docs = new ArrayList<>();
490+
addQuery(new MatchAllDocsQuery(), docs);
491+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
492+
builder.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
493+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
494+
addQuery(builder.build(), docs);
495+
builder = new BooleanQuery.Builder();
496+
builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
497+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
498+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
499+
addQuery(builder.build(), docs);
500+
builder = new BooleanQuery.Builder();
501+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
502+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
503+
addQuery(builder.build(), docs);
504+
builder = new BooleanQuery.Builder();
505+
builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.SHOULD);
506+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
507+
addQuery(builder.build(), docs);
508+
indexWriter.addDocuments(docs);
509+
indexWriter.close();
510+
directoryReader = DirectoryReader.open(directory);
511+
IndexSearcher shardSearcher = newSearcher(directoryReader);
512+
shardSearcher.setQueryCache(null);
513+
514+
MemoryIndex memoryIndex = new MemoryIndex();
515+
memoryIndex.addField("field", "value1", new WhitespaceAnalyzer());
516+
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
517+
PercolateQuery query = (PercolateQuery) fieldType.percolateQuery("_name", queryStore,
518+
Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT);
519+
TopDocs topDocs = shardSearcher.search(query, 10, new Sort(SortField.FIELD_DOC), true, true);
520+
assertEquals(3L, topDocs.totalHits);
521+
assertEquals(3, topDocs.scoreDocs.length);
522+
assertEquals(0, topDocs.scoreDocs[0].doc);
523+
assertEquals(1, topDocs.scoreDocs[1].doc);
524+
assertEquals(4, topDocs.scoreDocs[2].doc);
525+
526+
topDocs = shardSearcher.search(new ConstantScoreQuery(query), 10);
527+
assertEquals(3L, topDocs.totalHits);
528+
assertEquals(3, topDocs.scoreDocs.length);
529+
assertEquals(0, topDocs.scoreDocs[0].doc);
530+
assertEquals(1, topDocs.scoreDocs[1].doc);
531+
assertEquals(4, topDocs.scoreDocs[2].doc);
532+
}
533+
470534
public void testPercolateSmallAndLargeDocument() throws Exception {
471535
List<ParseContext.Document> docs = new ArrayList<>();
472536
BooleanQuery.Builder builder = new BooleanQuery.Builder();

modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -594,13 +594,18 @@ public void testExtractQueryMetadata_matchNoDocsQuery() {
594594
}
595595

596596
public void testExtractQueryMetadata_matchAllDocsQuery() {
597-
expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery(), Version.CURRENT));
597+
Result result = analyze(new MatchAllDocsQuery(), Version.CURRENT);
598+
assertThat(result.verified, is(true));
599+
assertThat(result.matchAllDocs, is(true));
600+
assertThat(result.minimumShouldMatch, equalTo(0));
601+
assertThat(result.extractions.size(), equalTo(0));
598602

599603
BooleanQuery.Builder builder = new BooleanQuery.Builder();
600604
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
601605
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
602-
Result result = analyze(builder.build(), Version.CURRENT);
603-
assertThat(result.verified, is(false));
606+
result = analyze(builder.build(), Version.CURRENT);
607+
assertThat(result.verified, is(true));
608+
assertThat(result.matchAllDocs, is(false));
604609
assertThat(result.minimumShouldMatch, equalTo(1));
605610
assertTermsEqual(result.extractions, new Term("field", "value"));
606611

@@ -609,34 +614,78 @@ public void testExtractQueryMetadata_matchAllDocsQuery() {
609614
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
610615
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
611616
BooleanQuery bq1 = builder.build();
612-
expectThrows(UnsupportedQueryException.class, () -> analyze(bq1, Version.CURRENT));
617+
result = analyze(bq1, Version.CURRENT);
618+
assertThat(result.verified, is(true));
619+
assertThat(result.matchAllDocs, is(true));
620+
assertThat(result.minimumShouldMatch, equalTo(0));
621+
assertThat(result.extractions.size(), equalTo(0));
613622

614623
builder = new BooleanQuery.Builder();
615624
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
616625
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
617626
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
618627
BooleanQuery bq2 = builder.build();
619-
expectThrows(UnsupportedQueryException.class, () -> analyze(bq2, Version.CURRENT));
628+
result = analyze(bq2, Version.CURRENT);
629+
assertThat(result.verified, is(false));
630+
assertThat(result.matchAllDocs, is(true));
631+
assertThat(result.minimumShouldMatch, equalTo(0));
632+
assertThat(result.extractions.size(), equalTo(0));
620633

621634
builder = new BooleanQuery.Builder();
622635
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
623636
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
624637
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
625638
BooleanQuery bq3 = builder.build();
626-
expectThrows(UnsupportedQueryException.class, () -> analyze(bq3, Version.CURRENT));
639+
result = analyze(bq3, Version.CURRENT);
640+
assertThat(result.verified, is(true));
641+
assertThat(result.matchAllDocs, is(true));
642+
assertThat(result.minimumShouldMatch, equalTo(0));
643+
assertThat(result.extractions.size(), equalTo(0));
627644

628645
builder = new BooleanQuery.Builder();
629646
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
630647
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
631648
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
632649
BooleanQuery bq4 = builder.build();
633-
expectThrows(UnsupportedQueryException.class, () -> analyze(bq4, Version.CURRENT));
650+
result = analyze(bq4, Version.CURRENT);
651+
assertThat(result.verified, is(false));
652+
assertThat(result.matchAllDocs, is(true));
653+
assertThat(result.minimumShouldMatch, equalTo(0));
654+
assertThat(result.extractions.size(), equalTo(0));
634655

635656
builder = new BooleanQuery.Builder();
636657
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
637658
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
638659
BooleanQuery bq5 = builder.build();
639-
expectThrows(UnsupportedQueryException.class, () -> analyze(bq5, Version.CURRENT));
660+
result = analyze(bq5, Version.CURRENT);
661+
assertThat(result.verified, is(true));
662+
assertThat(result.matchAllDocs, is(true));
663+
assertThat(result.minimumShouldMatch, equalTo(0));
664+
assertThat(result.extractions.size(), equalTo(0));
665+
666+
builder = new BooleanQuery.Builder();
667+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
668+
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
669+
builder.setMinimumNumberShouldMatch(2);
670+
BooleanQuery bq6 = builder.build();
671+
result = analyze(bq6, Version.CURRENT);
672+
assertThat(result.verified, is(true));
673+
assertThat(result.matchAllDocs, is(false));
674+
assertThat(result.minimumShouldMatch, equalTo(1));
675+
assertThat(result.extractions.size(), equalTo(1));
676+
assertTermsEqual(result.extractions, new Term("field", "value"));
677+
678+
builder = new BooleanQuery.Builder();
679+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
680+
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
681+
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
682+
builder.setMinimumNumberShouldMatch(2);
683+
BooleanQuery bq7 = builder.build();
684+
result = analyze(bq7, Version.CURRENT);
685+
assertThat(result.verified, is(true));
686+
assertThat(result.matchAllDocs, is(true));
687+
assertThat(result.minimumShouldMatch, equalTo(0));
688+
assertThat(result.extractions.size(), equalTo(0));
640689
}
641690

642691
public void testExtractQueryMetadata_unsupportedQuery() {

0 commit comments

Comments
 (0)