Skip to content

Commit 5f5bbcd

Browse files
authored
Remove old version constants from QueryAnalyzer (#41996)
1 parent ad8591a commit 5f5bbcd

File tree

2 files changed

+67
-458
lines changed

2 files changed

+67
-458
lines changed

modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java

Lines changed: 66 additions & 198 deletions
Original file line numberDiff line numberDiff line change
@@ -206,20 +206,8 @@ private static BiFunction<Query, Version, Result> phraseQuery() {
206206
return new Result(true, Collections.emptySet(), 0);
207207
}
208208

209-
if (version.onOrAfter(Version.V_6_1_0)) {
210-
Set<QueryExtraction> extractions = Arrays.stream(terms).map(QueryExtraction::new).collect(toSet());
211-
return new Result(false, extractions, extractions.size());
212-
} else {
213-
// the longest term is likely to be the rarest,
214-
// so from a performance perspective it makes sense to extract that
215-
Term longestTerm = terms[0];
216-
for (Term term : terms) {
217-
if (longestTerm.bytes().length < term.bytes().length) {
218-
longestTerm = term;
219-
}
220-
}
221-
return new Result(false, Collections.singleton(new QueryExtraction(longestTerm)), 1);
222-
}
209+
Set<QueryExtraction> extractions = Arrays.stream(terms).map(QueryExtraction::new).collect(toSet());
210+
return new Result(false, extractions, extractions.size());
223211
};
224212
}
225213

@@ -255,23 +243,14 @@ private static BiFunction<Query, Version, Result> spanTermQuery() {
255243
private static BiFunction<Query, Version, Result> spanNearQuery() {
256244
return (query, version) -> {
257245
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
258-
if (version.onOrAfter(Version.V_6_1_0)) {
259-
// This has the same problem as boolean queries when it comes to duplicated clauses
260-
// so we rewrite to a boolean query to keep things simple.
261-
BooleanQuery.Builder builder = new BooleanQuery.Builder();
262-
for (SpanQuery clause : spanNearQuery.getClauses()) {
263-
builder.add(clause, Occur.FILTER);
264-
}
265-
// make sure to unverify the result
266-
return booleanQuery().apply(builder.build(), version).unverify();
267-
} else {
268-
Result bestClause = null;
269-
for (SpanQuery clause : spanNearQuery.getClauses()) {
270-
Result temp = analyze(clause, version);
271-
bestClause = selectBestResult(temp, bestClause);
272-
}
273-
return bestClause;
246+
// This has the same problem as boolean queries when it comes to duplicated clauses
247+
// so we rewrite to a boolean query to keep things simple.
248+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
249+
for (SpanQuery clause : spanNearQuery.getClauses()) {
250+
builder.add(clause, Occur.FILTER);
274251
}
252+
// make sure to unverify the result
253+
return booleanQuery().apply(builder.build(), version).unverify();
275254
};
276255
}
277256

@@ -478,77 +457,69 @@ private static Result handleConjunction(List<Result> conjunctions, Version versi
478457
if (conjunctions.isEmpty()) {
479458
throw new IllegalArgumentException("Must have at least on conjunction sub result");
480459
}
481-
if (version.onOrAfter(Version.V_6_1_0)) {
482-
for (Result subResult : conjunctions) {
483-
if (subResult.isMatchNoDocs()) {
484-
return subResult;
485-
}
460+
for (Result subResult : conjunctions) {
461+
if (subResult.isMatchNoDocs()) {
462+
return subResult;
486463
}
487-
int msm = 0;
488-
boolean verified = true;
489-
boolean matchAllDocs = true;
490-
boolean hasDuplicateTerms = false;
491-
Set<QueryExtraction> extractions = new HashSet<>();
492-
Set<String> seenRangeFields = new HashSet<>();
493-
for (Result result : conjunctions) {
494-
// In case that there are duplicate query extractions we need to be careful with
495-
// incrementing msm,
496-
// because that could lead to valid matches not becoming candidate matches:
497-
// query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
498-
// doc: field: val1 val2 val3
499-
// So lets be protective and decrease the msm:
500-
int resultMsm = result.minimumShouldMatch;
501-
for (QueryExtraction queryExtraction : result.extractions) {
502-
if (queryExtraction.range != null) {
503-
// In case of range queries each extraction does not simply increment the
504-
// minimum_should_match
505-
// for that percolator query like for a term based extraction, so that can lead
506-
// to more false
507-
// positives for percolator queries with range queries than term based queries.
508-
// The is because the way number fields are extracted from the document to be
509-
// percolated.
510-
// Per field a single range is extracted and if a percolator query has two or
511-
// more range queries
512-
// on the same field, then the minimum should match can be higher than clauses
513-
// in the CoveringQuery.
514-
// Therefore right now the minimum should match is incremented once per number
515-
// field when processing
516-
// the percolator query at index time.
517-
if (seenRangeFields.add(queryExtraction.range.fieldName)) {
518-
resultMsm = 1;
519-
} else {
520-
resultMsm = 0;
521-
}
522-
}
523-
524-
if (extractions.contains(queryExtraction)) {
525-
464+
}
465+
int msm = 0;
466+
boolean verified = true;
467+
boolean matchAllDocs = true;
468+
boolean hasDuplicateTerms = false;
469+
Set<QueryExtraction> extractions = new HashSet<>();
470+
Set<String> seenRangeFields = new HashSet<>();
471+
for (Result result : conjunctions) {
472+
// In case that there are duplicate query extractions we need to be careful with
473+
// incrementing msm,
474+
// because that could lead to valid matches not becoming candidate matches:
475+
// query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
476+
// doc: field: val1 val2 val3
477+
// So lets be protective and decrease the msm:
478+
int resultMsm = result.minimumShouldMatch;
479+
for (QueryExtraction queryExtraction : result.extractions) {
480+
if (queryExtraction.range != null) {
481+
// In case of range queries each extraction does not simply increment the
482+
// minimum_should_match
483+
// for that percolator query like for a term based extraction, so that can lead
484+
// to more false
485+
// positives for percolator queries with range queries than term based queries.
486+
// The is because the way number fields are extracted from the document to be
487+
// percolated.
488+
// Per field a single range is extracted and if a percolator query has two or
489+
// more range queries
490+
// on the same field, then the minimum should match can be higher than clauses
491+
// in the CoveringQuery.
492+
// Therefore right now the minimum should match is incremented once per number
493+
// field when processing
494+
// the percolator query at index time.
495+
if (seenRangeFields.add(queryExtraction.range.fieldName)) {
496+
resultMsm = 1;
497+
} else {
526498
resultMsm = 0;
527-
verified = false;
528-
break;
529499
}
530500
}
531-
msm += resultMsm;
532501

533-
if (result.verified == false
534-
// If some inner extractions are optional, the result can't be verified
535-
|| result.minimumShouldMatch < result.extractions.size()) {
502+
if (extractions.contains(queryExtraction)) {
503+
504+
resultMsm = 0;
536505
verified = false;
506+
break;
537507
}
538-
matchAllDocs &= result.matchAllDocs;
539-
extractions.addAll(result.extractions);
540508
}
541-
if (matchAllDocs) {
542-
return new Result(matchAllDocs, verified);
543-
} else {
544-
return new Result(verified, extractions, hasDuplicateTerms ? 1 : msm);
509+
msm += resultMsm;
510+
511+
if (result.verified == false
512+
// If some inner extractions are optional, the result can't be verified
513+
|| result.minimumShouldMatch < result.extractions.size()) {
514+
verified = false;
545515
}
516+
matchAllDocs &= result.matchAllDocs;
517+
extractions.addAll(result.extractions);
518+
}
519+
if (matchAllDocs) {
520+
return new Result(matchAllDocs, verified);
546521
} else {
547-
Result bestClause = null;
548-
for (Result result : conjunctions) {
549-
bestClause = selectBestResult(result, bestClause);
550-
}
551-
return bestClause;
522+
return new Result(verified, extractions, hasDuplicateTerms ? 1 : msm);
552523
}
553524
}
554525

@@ -565,12 +536,7 @@ private static Result handleDisjunctionQuery(List<Query> disjunctions, int requi
565536
private static Result handleDisjunction(List<Result> disjunctions, int requiredShouldClauses, Version version) {
566537
// Keep track of the msm for each clause:
567538
List<Integer> clauses = new ArrayList<>(disjunctions.size());
568-
boolean verified;
569-
if (version.before(Version.V_6_1_0)) {
570-
verified = requiredShouldClauses <= 1;
571-
} else {
572-
verified = true;
573-
}
539+
boolean verified = true;
574540
int numMatchAllClauses = 0;
575541
boolean hasRangeExtractions = false;
576542

@@ -617,10 +583,9 @@ private static Result handleDisjunction(List<Result> disjunctions, int requiredS
617583
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses;
618584

619585
int msm = 0;
620-
if (version.onOrAfter(Version.V_6_1_0) &&
621-
// Having ranges would mean we need to juggle with the msm and that complicates this logic a lot,
622-
// so for now lets not do it.
623-
hasRangeExtractions == false) {
586+
// Having ranges would mean we need to juggle with the msm and that complicates this logic a lot,
587+
// so for now lets not do it.
588+
if (hasRangeExtractions == false) {
624589
// Figure out what the combined msm is for this disjunction:
625590
// (sum the lowest required clauses, otherwise we're too strict and queries may not match)
626591
clauses = clauses.stream()
@@ -648,103 +613,6 @@ private static Result handleDisjunction(List<Result> disjunctions, int requiredS
648613
}
649614
}
650615

651-
/**
652-
* Return an extraction for the conjunction of {@code result1} and {@code result2}
653-
* by picking up clauses that look most restrictive and making it unverified if
654-
* the other clause is not null and doesn't match all documents. This is used by
655-
* 6.0.0 indices which didn't use the terms_set query.
656-
*/
657-
static Result selectBestResult(Result result1, Result result2) {
658-
assert result1 != null || result2 != null;
659-
if (result1 == null) {
660-
return result2;
661-
} else if (result2 == null) {
662-
return result1;
663-
} else if (result1.matchAllDocs) { // conjunction with match_all
664-
Result result = result2;
665-
if (result1.verified == false) {
666-
result = result.unverify();
667-
}
668-
return result;
669-
} else if (result2.matchAllDocs) { // conjunction with match_all
670-
Result result = result1;
671-
if (result2.verified == false) {
672-
result = result.unverify();
673-
}
674-
return result;
675-
} else {
676-
// Prefer term based extractions over range based extractions:
677-
boolean onlyRangeBasedExtractions = true;
678-
for (QueryExtraction clause : result1.extractions) {
679-
if (clause.term != null) {
680-
onlyRangeBasedExtractions = false;
681-
break;
682-
}
683-
}
684-
for (QueryExtraction clause : result2.extractions) {
685-
if (clause.term != null) {
686-
onlyRangeBasedExtractions = false;
687-
break;
688-
}
689-
}
690-
691-
if (onlyRangeBasedExtractions) {
692-
BytesRef extraction1SmallestRange = smallestRange(result1.extractions);
693-
BytesRef extraction2SmallestRange = smallestRange(result2.extractions);
694-
if (extraction1SmallestRange == null) {
695-
return result2.unverify();
696-
} else if (extraction2SmallestRange == null) {
697-
return result1.unverify();
698-
}
699-
700-
// Keep the clause with smallest range, this is likely to be the rarest.
701-
if (extraction1SmallestRange.compareTo(extraction2SmallestRange) <= 0) {
702-
return result1.unverify();
703-
} else {
704-
return result2.unverify();
705-
}
706-
} else {
707-
int extraction1ShortestTerm = minTermLength(result1.extractions);
708-
int extraction2ShortestTerm = minTermLength(result2.extractions);
709-
// keep the clause with longest terms, this likely to be rarest.
710-
if (extraction1ShortestTerm >= extraction2ShortestTerm) {
711-
return result1.unverify();
712-
} else {
713-
return result2.unverify();
714-
}
715-
}
716-
}
717-
}
718-
719-
private static int minTermLength(Set<QueryExtraction> extractions) {
720-
// In case there are only range extractions, then we return Integer.MIN_VALUE,
721-
// so that selectBestExtraction(...) we are likely to prefer the extractions that contains at least a single extraction
722-
if (extractions.stream().filter(queryExtraction -> queryExtraction.term != null).count() == 0 &&
723-
extractions.stream().filter(queryExtraction -> queryExtraction.range != null).count() > 0) {
724-
return Integer.MIN_VALUE;
725-
}
726-
727-
int min = Integer.MAX_VALUE;
728-
for (QueryExtraction qt : extractions) {
729-
if (qt.term != null) {
730-
min = Math.min(min, qt.bytes().length);
731-
}
732-
}
733-
return min;
734-
}
735-
736-
private static BytesRef smallestRange(Set<QueryExtraction> terms) {
737-
BytesRef min = null;
738-
for (QueryExtraction qt : terms) {
739-
if (qt.range != null) {
740-
if (min == null || qt.range.interval.compareTo(min) < 0) {
741-
min = qt.range.interval;
742-
}
743-
}
744-
}
745-
return min;
746-
}
747-
748616
/**
749617
* Query extraction result. A result is a candidate for a given document either if:
750618
* - `matchAllDocs` is true

0 commit comments

Comments
 (0)