diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index b39f2ab5a91e6..e174b3fd49eee 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -143,38 +143,50 @@ protected static IntervalsSource combineSources(List sources, i protected List analyzeTerms(TokenStream ts) throws IOException { List terms = new ArrayList<>(); TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { BytesRef term = bytesAtt.getBytesRef(); - terms.add(Intervals.term(BytesRef.deepCopyOf(term))); + int precedingSpaces = posAtt.getPositionIncrement() - 1; + terms.add(extend(Intervals.term(BytesRef.deepCopyOf(term)), precedingSpaces)); } ts.end(); return terms; } + public static IntervalsSource extend(IntervalsSource source, int precedingSpaces) { + if (precedingSpaces == 0) { + return source; + } + return Intervals.extend(source, precedingSpaces, 0); + } + protected IntervalsSource analyzeSynonyms(TokenStream ts, int maxGaps, boolean ordered) throws IOException { List terms = new ArrayList<>(); List synonyms = new ArrayList<>(); TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); + int spaces = 0; while (ts.incrementToken()) { - if (posAtt.getPositionIncrement() == 1) { + int posInc = posAtt.getPositionIncrement(); + if (posInc > 0) { if (synonyms.size() == 1) { - terms.add(synonyms.get(0)); + terms.add(extend(synonyms.get(0), spaces)); } else if (synonyms.size() > 1) { - terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0]))); + terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces)); } synonyms.clear(); + spaces = posInc - 1; } synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()))); } if (synonyms.size() == 1) { - terms.add(synonyms.get(0)); + terms.add(extend(synonyms.get(0), spaces)); } else { - terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0]))); + terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces)); } return combineSources(terms, maxGaps, ordered); } diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java index a565db41516a9..15ec8af0af2c5 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java @@ -94,6 +94,22 @@ public void testPhrase() throws IOException { } + public void testPhraseWithStopword() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 1, 2), + new Token("term3", 2, 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), 0, true); + IntervalsSource expected = Intervals.phrase( + Intervals.term("term1"), Intervals.extend(Intervals.term("term3"), 1, 0) + ); + + assertEquals(expected, source); + + } + public void testSimpleSynonyms() throws IOException { CannedTokenStream ts = new CannedTokenStream( @@ -112,16 +128,32 @@ public void testSimpleSynonyms() throws IOException { } - public void testGraphSynonyms() throws IOException { + public void testSimpleSynonymsWithGap() throws IOException { + // term1 [] term2/term3/term4 term5 + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 2, 3, 4), + new Token("term3", 0, 3, 4), + new Token("term4", 0, 3, 4), + new Token("term5", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), + Intervals.extend(Intervals.or(Intervals.term("term2"), Intervals.term("term3"), Intervals.term("term4")), 1, 0), + Intervals.term("term5") + ); + assertEquals(expected, source); + } - // term1 term2/term3:2 term4 term5 + public void testGraphSynonyms() throws IOException { - Token graphToken = new Token("term2", 3, 4); - graphToken.setPositionLength(2); + // term1 term2:2/term3 term4 term5 CannedTokenStream ts = new CannedTokenStream( new Token("term1", 1, 2), - graphToken, + new Token("term2", 1, 3, 4, 2), new Token("term3", 0, 3, 4), new Token("term4", 5, 6), new Token("term5", 6, 7) @@ -138,4 +170,50 @@ public void testGraphSynonyms() throws IOException { } + public void testGraphSynonymsWithGaps() throws IOException { + + // term1 [] term2:4/term3 [] [] term4 term5 + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 2, 3, 4, 4), + new Token("term3", 0, 3, 4), + new Token("term4", 3, 5, 6), + new Token("term5", 6, 7) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), + Intervals.or( + Intervals.extend(Intervals.term("term2"), 1, 0), + Intervals.phrase( + Intervals.extend(Intervals.term("term3"), 1, 0), + Intervals.extend(Intervals.term("term4"), 2, 0))), + Intervals.term("term5") + ); + + assertEquals(expected, source); + + } + + public void testGraphTerminatesOnGap() throws IOException { + // term1 term2:2/term3 term4 [] term5 + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 1, 2, 3, 2), + new Token("term3", 0, 2, 3), + new Token("term4", 2, 3), + new Token("term5", 2, 6, 7) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), + Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), + Intervals.extend(Intervals.term("term5"), 1, 0) + ); + assertEquals(expected, source); + } + }