Skip to content
Merged
5 changes: 5 additions & 0 deletions docs/changelog/114234.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 114234
summary: Prevent flattening of ordered and unordered interval sources
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import static java.util.Collections.singletonMap;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;

public class IntervalQueriesIT extends ESIntegTestCase {

Expand Down Expand Up @@ -64,6 +65,58 @@ public void testEmptyIntervalsWithNestedMappings() throws InterruptedException {
);
}

public void testPreserveInnerGap() {
assertAcked(prepareCreate("index").setMapping("""
{
"_doc" : {
"properties" : {
"text" : { "type" : "text" }
}
}
}
"""));

indexRandom(true, prepareIndex("index").setId("1").setSource("text", "w1 w2 w3 w4 w5"));

// ordered
{
var res = prepareSearch("index").setQuery(
new IntervalQueryBuilder(
"text",
new IntervalsSourceProvider.Combine(
Arrays.asList(
new IntervalsSourceProvider.Match("w1 w4", -1, true, null, null, null),
new IntervalsSourceProvider.Match("w5", -1, true, null, null, null)
),
true,
1,
null
)
)
);
assertSearchHits(res, "1");
}

// unordered
{
var res = prepareSearch("index").setQuery(
new IntervalQueryBuilder(
"text",
new IntervalsSourceProvider.Combine(
Arrays.asList(
new IntervalsSourceProvider.Match("w3", 0, false, null, null, null),
new IntervalsSourceProvider.Match("w4 w1", -1, false, null, null, null)
),
false,
0,
null
)
)
);
assertSearchHits(res, "1");
}
}

private static class EmptyAnalyzer extends Analyzer {

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ protected static IntervalsSource combineSources(List<IntervalsSource> sources, i
if (maxGaps == 0 && ordered) {
return Intervals.phrase(sourcesArray);
}
IntervalsSource inner = ordered ? Intervals.ordered(sourcesArray) : Intervals.unordered(sourcesArray);
IntervalsSource inner = ordered ? XIntervals.ordered(sourcesArray) : XIntervals.unordered(sourcesArray);
if (maxGaps == -1) {
return inner;
}
Expand Down
106 changes: 106 additions & 0 deletions server/src/main/java/org/elasticsearch/index/query/XIntervals.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.query;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.intervals.IntervalIterator;
import org.apache.lucene.queries.intervals.IntervalMatchesIterator;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.QueryVisitor;

import java.io.IOException;
import java.util.Collection;
import java.util.Objects;

/**
* Copy of {@link Intervals} that exposes versions of {@link Intervals#ordered} and {@link Intervals#unordered}
* that preserve their inner gaps.
* NOTE: Remove this hack when a version of Lucene with https://github.com/apache/lucene/pull/13819 is used (10.1.0).
*/
public final class XIntervals {

/**
* Create an ordered {@link IntervalsSource}
*
* <p>Returns intervals in which the subsources all appear in the given order
*
* @param subSources an ordered set of {@link IntervalsSource} objects
*/
public static IntervalsSource ordered(IntervalsSource... subSources) {
return new DelegateIntervalsSource(Intervals.ordered(subSources));
}

/**
* Create an ordered {@link IntervalsSource}
*
* <p>Returns intervals in which the subsources all appear in the given order
*
* @param subSources an ordered set of {@link IntervalsSource} objects
*/
public static IntervalsSource unordered(IntervalsSource... subSources) {
return new DelegateIntervalsSource(Intervals.unordered(subSources));
}

/**
* Wraps a source to avoid aggressive flattening of the ordered and unordered sources.
* The flattening modifies the final gap and is removed in the latest unreleased version of Lucene (10.1).
*/
private static class DelegateIntervalsSource extends IntervalsSource {
private final IntervalsSource delegate;

private DelegateIntervalsSource(IntervalsSource delegate) {
this.delegate = delegate;
}

@Override
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
return delegate.intervals(field, ctx);
}

@Override
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
return delegate.matches(field, ctx, doc);
}

@Override
public void visit(String field, QueryVisitor visitor) {
delegate.visit(field, visitor);
}

@Override
public int minExtent() {
return delegate.minExtent();
}

@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
return delegate.pullUpDisjunctions();
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DelegateIntervalsSource that = (DelegateIntervalsSource) o;
return Objects.equals(delegate, that.delegate);
}

@Override
public int hashCode() {
return Objects.hash(delegate);
}

@Override
public String toString() {
return delegate.toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void testOrdered() throws IOException {
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6));

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3"));
IntervalsSource expected = XIntervals.ordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3"));

assertEquals(expected, source);

Expand All @@ -57,7 +57,7 @@ public void testUnordered() throws IOException {
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6));

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, false);
IntervalsSource expected = Intervals.unordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3"));
IntervalsSource expected = XIntervals.unordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3"));

assertEquals(expected, source);

Expand Down Expand Up @@ -101,7 +101,7 @@ public void testSimpleSynonyms() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.or(Intervals.term("term2"), Intervals.term("term4")),
Intervals.term("term3")
Expand All @@ -122,7 +122,7 @@ public void testSimpleSynonymsWithGap() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.extend(Intervals.or(Intervals.term("term2"), Intervals.term("term3"), Intervals.term("term4")), 1, 0),
Intervals.term("term5")
Expand All @@ -143,7 +143,7 @@ public void testGraphSynonyms() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")),
Intervals.term("term5")
Expand All @@ -166,7 +166,7 @@ public void testGraphSynonymsWithGaps() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.or(
Intervals.extend(Intervals.term("term2"), 1, 0),
Expand All @@ -190,7 +190,7 @@ public void testGraphTerminatesOnGap() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")),
Intervals.extend(Intervals.term("term5"), 1, 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ public void testMatchInterval() throws IOException {
}""", TEXT_FIELD_NAME);

IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")));
Query expected = new IntervalQuery(TEXT_FIELD_NAME, XIntervals.unordered(Intervals.term("hello"), Intervals.term("world")));

assertEquals(expected, builder.toQuery(createSearchExecutionContext()));

Expand All @@ -222,7 +222,7 @@ public void testMatchInterval() throws IOException {
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.maxgaps(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")))
Intervals.maxgaps(40, XIntervals.unordered(Intervals.term("hello"), Intervals.term("world")))
);
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));

Expand All @@ -241,7 +241,7 @@ public void testMatchInterval() throws IOException {

builder = (IntervalQueryBuilder) parseQuery(json);
expected = new BoostQuery(
new IntervalQuery(TEXT_FIELD_NAME, Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))),
new IntervalQuery(TEXT_FIELD_NAME, XIntervals.ordered(Intervals.term("hello"), Intervals.term("world"))),
2
);
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
Expand All @@ -263,7 +263,7 @@ public void testMatchInterval() throws IOException {
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))
Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world")))
);
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));

Expand All @@ -285,7 +285,7 @@ public void testMatchInterval() throws IOException {
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.fixField(MASKED_FIELD, Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))))
Intervals.fixField(MASKED_FIELD, Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world"))))
);
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));

Expand Down Expand Up @@ -314,7 +314,7 @@ public void testMatchInterval() throws IOException {
expected = new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.containing(
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))),
Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world"))),
Intervals.term("blah")
)
);
Expand Down Expand Up @@ -426,7 +426,7 @@ public void testCombineInterval() throws IOException {
Intervals.containedBy(
Intervals.maxgaps(
30,
Intervals.ordered(Intervals.term("one"), Intervals.unordered(Intervals.term("two"), Intervals.term("three")))
XIntervals.ordered(Intervals.term("one"), XIntervals.unordered(Intervals.term("two"), Intervals.term("three")))
),
Intervals.term("SENTENCE")
)
Expand Down Expand Up @@ -486,7 +486,7 @@ public void testCombineDisjunctionInterval() throws IOException {
Intervals.notContainedBy(
Intervals.maxgaps(
30,
Intervals.ordered(Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside")))
XIntervals.ordered(Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside")))
),
Intervals.term("freeze")
)
Expand Down