@@ -206,20 +206,8 @@ private static BiFunction<Query, Version, Result> phraseQuery() {
206206 return new Result (true , Collections .emptySet (), 0 );
207207 }
208208
209- if (version .onOrAfter (Version .V_6_1_0 )) {
210- Set <QueryExtraction > extractions = Arrays .stream (terms ).map (QueryExtraction ::new ).collect (toSet ());
211- return new Result (false , extractions , extractions .size ());
212- } else {
213- // the longest term is likely to be the rarest,
214- // so from a performance perspective it makes sense to extract that
215- Term longestTerm = terms [0 ];
216- for (Term term : terms ) {
217- if (longestTerm .bytes ().length < term .bytes ().length ) {
218- longestTerm = term ;
219- }
220- }
221- return new Result (false , Collections .singleton (new QueryExtraction (longestTerm )), 1 );
222- }
209+ Set <QueryExtraction > extractions = Arrays .stream (terms ).map (QueryExtraction ::new ).collect (toSet ());
210+ return new Result (false , extractions , extractions .size ());
223211 };
224212 }
225213
@@ -255,23 +243,14 @@ private static BiFunction<Query, Version, Result> spanTermQuery() {
255243 private static BiFunction <Query , Version , Result > spanNearQuery () {
256244 return (query , version ) -> {
257245 SpanNearQuery spanNearQuery = (SpanNearQuery ) query ;
258- if (version .onOrAfter (Version .V_6_1_0 )) {
259- // This has the same problem as boolean queries when it comes to duplicated clauses
260- // so we rewrite to a boolean query to keep things simple.
261- BooleanQuery .Builder builder = new BooleanQuery .Builder ();
262- for (SpanQuery clause : spanNearQuery .getClauses ()) {
263- builder .add (clause , Occur .FILTER );
264- }
265- // make sure to unverify the result
266- return booleanQuery ().apply (builder .build (), version ).unverify ();
267- } else {
268- Result bestClause = null ;
269- for (SpanQuery clause : spanNearQuery .getClauses ()) {
270- Result temp = analyze (clause , version );
271- bestClause = selectBestResult (temp , bestClause );
272- }
273- return bestClause ;
246+ // This has the same problem as boolean queries when it comes to duplicated clauses
247+ // so we rewrite to a boolean query to keep things simple.
248+ BooleanQuery .Builder builder = new BooleanQuery .Builder ();
249+ for (SpanQuery clause : spanNearQuery .getClauses ()) {
250+ builder .add (clause , Occur .FILTER );
274251 }
252+ // make sure to unverify the result
253+ return booleanQuery ().apply (builder .build (), version ).unverify ();
275254 };
276255 }
277256
@@ -478,77 +457,69 @@ private static Result handleConjunction(List<Result> conjunctions, Version versi
478457 if (conjunctions .isEmpty ()) {
479458 throw new IllegalArgumentException ("Must have at least on conjunction sub result" );
480459 }
481- if (version .onOrAfter (Version .V_6_1_0 )) {
482- for (Result subResult : conjunctions ) {
483- if (subResult .isMatchNoDocs ()) {
484- return subResult ;
485- }
460+ for (Result subResult : conjunctions ) {
461+ if (subResult .isMatchNoDocs ()) {
462+ return subResult ;
486463 }
487- int msm = 0 ;
488- boolean verified = true ;
489- boolean matchAllDocs = true ;
490- boolean hasDuplicateTerms = false ;
491- Set <QueryExtraction > extractions = new HashSet <>();
492- Set <String > seenRangeFields = new HashSet <>();
493- for (Result result : conjunctions ) {
494- // In case that there are duplicate query extractions we need to be careful with
495- // incrementing msm,
496- // because that could lead to valid matches not becoming candidate matches:
497- // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
498- // doc: field: val1 val2 val3
499- // So lets be protective and decrease the msm:
500- int resultMsm = result .minimumShouldMatch ;
501- for (QueryExtraction queryExtraction : result .extractions ) {
502- if (queryExtraction .range != null ) {
503- // In case of range queries each extraction does not simply increment the
504- // minimum_should_match
505- // for that percolator query like for a term based extraction, so that can lead
506- // to more false
507- // positives for percolator queries with range queries than term based queries.
508- // The is because the way number fields are extracted from the document to be
509- // percolated.
510- // Per field a single range is extracted and if a percolator query has two or
511- // more range queries
512- // on the same field, then the minimum should match can be higher than clauses
513- // in the CoveringQuery.
514- // Therefore right now the minimum should match is incremented once per number
515- // field when processing
516- // the percolator query at index time.
517- if (seenRangeFields .add (queryExtraction .range .fieldName )) {
518- resultMsm = 1 ;
519- } else {
520- resultMsm = 0 ;
521- }
522- }
523-
524- if (extractions .contains (queryExtraction )) {
525-
464+ }
465+ int msm = 0 ;
466+ boolean verified = true ;
467+ boolean matchAllDocs = true ;
468+ boolean hasDuplicateTerms = false ;
469+ Set <QueryExtraction > extractions = new HashSet <>();
470+ Set <String > seenRangeFields = new HashSet <>();
471+ for (Result result : conjunctions ) {
472+ // In case that there are duplicate query extractions we need to be careful with
473+ // incrementing msm,
474+ // because that could lead to valid matches not becoming candidate matches:
475+ // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
476+ // doc: field: val1 val2 val3
477+ // So lets be protective and decrease the msm:
478+ int resultMsm = result .minimumShouldMatch ;
479+ for (QueryExtraction queryExtraction : result .extractions ) {
480+ if (queryExtraction .range != null ) {
481+ // In case of range queries each extraction does not simply increment the
482+ // minimum_should_match
483+ // for that percolator query like for a term based extraction, so that can lead
484+ // to more false
485+ // positives for percolator queries with range queries than term based queries.
486+ // The is because the way number fields are extracted from the document to be
487+ // percolated.
488+ // Per field a single range is extracted and if a percolator query has two or
489+ // more range queries
490+ // on the same field, then the minimum should match can be higher than clauses
491+ // in the CoveringQuery.
492+ // Therefore right now the minimum should match is incremented once per number
493+ // field when processing
494+ // the percolator query at index time.
495+ if (seenRangeFields .add (queryExtraction .range .fieldName )) {
496+ resultMsm = 1 ;
497+ } else {
526498 resultMsm = 0 ;
527- verified = false ;
528- break ;
529499 }
530500 }
531- msm += resultMsm ;
532501
533- if (result . verified == false
534- // If some inner extractions are optional, the result can't be verified
535- || result . minimumShouldMatch < result . extractions . size ()) {
502+ if (extractions . contains ( queryExtraction )) {
503+
504+ resultMsm = 0 ;
536505 verified = false ;
506+ break ;
537507 }
538- matchAllDocs &= result .matchAllDocs ;
539- extractions .addAll (result .extractions );
540508 }
541- if (matchAllDocs ) {
542- return new Result (matchAllDocs , verified );
543- } else {
544- return new Result (verified , extractions , hasDuplicateTerms ? 1 : msm );
509+ msm += resultMsm ;
510+
511+ if (result .verified == false
512+ // If some inner extractions are optional, the result can't be verified
513+ || result .minimumShouldMatch < result .extractions .size ()) {
514+ verified = false ;
545515 }
516+ matchAllDocs &= result .matchAllDocs ;
517+ extractions .addAll (result .extractions );
518+ }
519+ if (matchAllDocs ) {
520+ return new Result (matchAllDocs , verified );
546521 } else {
547- Result bestClause = null ;
548- for (Result result : conjunctions ) {
549- bestClause = selectBestResult (result , bestClause );
550- }
551- return bestClause ;
522+ return new Result (verified , extractions , hasDuplicateTerms ? 1 : msm );
552523 }
553524 }
554525
@@ -565,12 +536,7 @@ private static Result handleDisjunctionQuery(List<Query> disjunctions, int requi
565536 private static Result handleDisjunction (List <Result > disjunctions , int requiredShouldClauses , Version version ) {
566537 // Keep track of the msm for each clause:
567538 List <Integer > clauses = new ArrayList <>(disjunctions .size ());
568- boolean verified ;
569- if (version .before (Version .V_6_1_0 )) {
570- verified = requiredShouldClauses <= 1 ;
571- } else {
572- verified = true ;
573- }
539+ boolean verified = true ;
574540 int numMatchAllClauses = 0 ;
575541 boolean hasRangeExtractions = false ;
576542
@@ -617,10 +583,9 @@ private static Result handleDisjunction(List<Result> disjunctions, int requiredS
617583 boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses ;
618584
619585 int msm = 0 ;
620- if (version .onOrAfter (Version .V_6_1_0 ) &&
621- // Having ranges would mean we need to juggle with the msm and that complicates this logic a lot,
622- // so for now lets not do it.
623- hasRangeExtractions == false ) {
586+ // Having ranges would mean we need to juggle with the msm and that complicates this logic a lot,
587+ // so for now lets not do it.
588+ if (hasRangeExtractions == false ) {
624589 // Figure out what the combined msm is for this disjunction:
625590 // (sum the lowest required clauses, otherwise we're too strict and queries may not match)
626591 clauses = clauses .stream ()
@@ -648,103 +613,6 @@ private static Result handleDisjunction(List<Result> disjunctions, int requiredS
648613 }
649614 }
650615
651- /**
652- * Return an extraction for the conjunction of {@code result1} and {@code result2}
653- * by picking up clauses that look most restrictive and making it unverified if
654- * the other clause is not null and doesn't match all documents. This is used by
655- * 6.0.0 indices which didn't use the terms_set query.
656- */
657- static Result selectBestResult (Result result1 , Result result2 ) {
658- assert result1 != null || result2 != null ;
659- if (result1 == null ) {
660- return result2 ;
661- } else if (result2 == null ) {
662- return result1 ;
663- } else if (result1 .matchAllDocs ) { // conjunction with match_all
664- Result result = result2 ;
665- if (result1 .verified == false ) {
666- result = result .unverify ();
667- }
668- return result ;
669- } else if (result2 .matchAllDocs ) { // conjunction with match_all
670- Result result = result1 ;
671- if (result2 .verified == false ) {
672- result = result .unverify ();
673- }
674- return result ;
675- } else {
676- // Prefer term based extractions over range based extractions:
677- boolean onlyRangeBasedExtractions = true ;
678- for (QueryExtraction clause : result1 .extractions ) {
679- if (clause .term != null ) {
680- onlyRangeBasedExtractions = false ;
681- break ;
682- }
683- }
684- for (QueryExtraction clause : result2 .extractions ) {
685- if (clause .term != null ) {
686- onlyRangeBasedExtractions = false ;
687- break ;
688- }
689- }
690-
691- if (onlyRangeBasedExtractions ) {
692- BytesRef extraction1SmallestRange = smallestRange (result1 .extractions );
693- BytesRef extraction2SmallestRange = smallestRange (result2 .extractions );
694- if (extraction1SmallestRange == null ) {
695- return result2 .unverify ();
696- } else if (extraction2SmallestRange == null ) {
697- return result1 .unverify ();
698- }
699-
700- // Keep the clause with smallest range, this is likely to be the rarest.
701- if (extraction1SmallestRange .compareTo (extraction2SmallestRange ) <= 0 ) {
702- return result1 .unverify ();
703- } else {
704- return result2 .unverify ();
705- }
706- } else {
707- int extraction1ShortestTerm = minTermLength (result1 .extractions );
708- int extraction2ShortestTerm = minTermLength (result2 .extractions );
709- // keep the clause with longest terms, this likely to be rarest.
710- if (extraction1ShortestTerm >= extraction2ShortestTerm ) {
711- return result1 .unverify ();
712- } else {
713- return result2 .unverify ();
714- }
715- }
716- }
717- }
718-
719- private static int minTermLength (Set <QueryExtraction > extractions ) {
720- // In case there are only range extractions, then we return Integer.MIN_VALUE,
721- // so that selectBestExtraction(...) we are likely to prefer the extractions that contains at least a single extraction
722- if (extractions .stream ().filter (queryExtraction -> queryExtraction .term != null ).count () == 0 &&
723- extractions .stream ().filter (queryExtraction -> queryExtraction .range != null ).count () > 0 ) {
724- return Integer .MIN_VALUE ;
725- }
726-
727- int min = Integer .MAX_VALUE ;
728- for (QueryExtraction qt : extractions ) {
729- if (qt .term != null ) {
730- min = Math .min (min , qt .bytes ().length );
731- }
732- }
733- return min ;
734- }
735-
736- private static BytesRef smallestRange (Set <QueryExtraction > terms ) {
737- BytesRef min = null ;
738- for (QueryExtraction qt : terms ) {
739- if (qt .range != null ) {
740- if (min == null || qt .range .interval .compareTo (min ) < 0 ) {
741- min = qt .range .interval ;
742- }
743- }
744- }
745- return min ;
746- }
747-
748616 /**
749617 * Query extraction result. A result is a candidate for a given document either if:
750618 * - `matchAllDocs` is true
0 commit comments