elastic · romseygeek · Mar 5, 2019 · Feb 18, 2019 · Mar 4, 2019 · Mar 4, 2019
diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java
@@ -143,38 +143,50 @@ protected static IntervalsSource combineSources(List<IntervalsSource> sources, i
     protected List<IntervalsSource> analyzeTerms(TokenStream ts) throws IOException {
         List<IntervalsSource> terms = new ArrayList<>();
         TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
+        PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class);
         ts.reset();
         while (ts.incrementToken()) {
             BytesRef term = bytesAtt.getBytesRef();
-            terms.add(Intervals.term(BytesRef.deepCopyOf(term)));
+            int precedingSpaces = posAtt.getPositionIncrement() - 1;
+            terms.add(extend(Intervals.term(BytesRef.deepCopyOf(term)), precedingSpaces));
         }
         ts.end();
         return terms;
     }
 
+    public static IntervalsSource extend(IntervalsSource source, int precedingSpaces) {
+        if (precedingSpaces == 0) {
+            return source;
+        }
+        return Intervals.extend(source, precedingSpaces, 0);
+    }
+
     protected IntervalsSource analyzeSynonyms(TokenStream ts, int maxGaps, boolean ordered) throws IOException {
         List<IntervalsSource> terms = new ArrayList<>();
         List<IntervalsSource> synonyms = new ArrayList<>();
         TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
         PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class);
         ts.reset();
+        int spaces = 0;
         while (ts.incrementToken()) {
-            if (posAtt.getPositionIncrement() == 1) {
+            int posInc = posAtt.getPositionIncrement();
+            if (posInc > 0) {
                 if (synonyms.size() == 1) {
-                    terms.add(synonyms.get(0));
+                    terms.add(extend(synonyms.get(0), spaces));
                 }
                 else if (synonyms.size() > 1) {
-                    terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0])));
+                    terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces));
                 }
                 synonyms.clear();
+                spaces = posInc - 1;
             }
             synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())));
         }
         if (synonyms.size() == 1) {
-            terms.add(synonyms.get(0));
+            terms.add(extend(synonyms.get(0), spaces));
         }
         else {
-            terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0])));
+            terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces));
         }
         return combineSources(terms, maxGaps, ordered);
     }

diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java
@@ -94,6 +94,22 @@ public void testPhrase() throws IOException {
 
     }
 
+    public void testPhraseWithStopword() throws IOException {
+
+        CannedTokenStream ts = new CannedTokenStream(
+            new Token("term1", 1, 1, 2),
+            new Token("term3", 2, 5, 6)
+        );
+
+        IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), 0, true);
+        IntervalsSource expected = Intervals.phrase(
+            Intervals.term("term1"), Intervals.extend(Intervals.term("term3"), 1, 0)
+        );
+
+        assertEquals(expected, source);
+
+    }
+
     public void testSimpleSynonyms() throws IOException {
 
         CannedTokenStream ts = new CannedTokenStream(
@@ -112,16 +128,32 @@ public void testSimpleSynonyms() throws IOException {
 
     }
 
-    public void testGraphSynonyms() throws IOException {
+    public void testSimpleSynonymsWithGap() throws IOException {
+        // term1 [] term2/term3/term4 term5
+        CannedTokenStream ts = new CannedTokenStream(
+            new Token("term1", 1, 2),
+            new Token("term2", 2, 3, 4),
+            new Token("term3", 0, 3, 4),
+            new Token("term4", 0, 3, 4),
+            new Token("term5", 5, 6)
+        );
+
+        IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
+        IntervalsSource expected = Intervals.ordered(
+            Intervals.term("term1"),
+            Intervals.extend(Intervals.or(Intervals.term("term2"), Intervals.term("term3"), Intervals.term("term4")), 1, 0),
+            Intervals.term("term5")
+        );
+        assertEquals(expected, source);
+    }
 
-        // term1 term2/term3:2 term4 term5
+    public void testGraphSynonyms() throws IOException {
 
-        Token graphToken = new Token("term2", 3, 4);
-        graphToken.setPositionLength(2);
+        // term1 term2:2/term3 term4 term5
 
         CannedTokenStream ts = new CannedTokenStream(
             new Token("term1", 1, 2),
-            graphToken,
+            new Token("term2", 1, 3, 4, 2),
             new Token("term3", 0, 3, 4),
             new Token("term4", 5, 6),
             new Token("term5", 6, 7)
@@ -138,4 +170,50 @@ public void testGraphSynonyms() throws IOException {
 
     }
 
+    public void testGraphSynonymsWithGaps() throws IOException {
+
+        // term1 [] term2:4/term3 [] [] term4 term5
+
+        CannedTokenStream ts = new CannedTokenStream(
+            new Token("term1", 1, 2),
+            new Token("term2", 2, 3, 4, 4),
+            new Token("term3", 0, 3, 4),
+            new Token("term4", 3, 5, 6),
+            new Token("term5", 6, 7)
+        );
+
+        IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
+        IntervalsSource expected = Intervals.ordered(
+            Intervals.term("term1"),
+            Intervals.or(
+                Intervals.extend(Intervals.term("term2"), 1, 0),
+                Intervals.phrase(
+                    Intervals.extend(Intervals.term("term3"), 1, 0),
+                    Intervals.extend(Intervals.term("term4"), 2, 0))),
+            Intervals.term("term5")
+        );
+
+        assertEquals(expected, source);
+
+    }
+
+    public void testGraphTerminatesOnGap() throws IOException {
+        // term1 term2:2/term3 term4 [] term5
+        CannedTokenStream ts = new CannedTokenStream(
+            new Token("term1", 1, 2),
+            new Token("term2", 1, 2, 3, 2),
+            new Token("term3", 0, 2, 3),
+            new Token("term4", 2, 3),
+            new Token("term5", 2, 6, 7)
+        );
+
+        IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
+        IntervalsSource expected = Intervals.ordered(
+            Intervals.term("term1"),
+            Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")),
+            Intervals.extend(Intervals.term("term5"), 1, 0)
+        );
+        assertEquals(expected, source);
+    }
+
 }