diff --git a/docs/changelog/114234.yaml b/docs/changelog/114234.yaml new file mode 100644 index 0000000000000..0f77ada794bee --- /dev/null +++ b/docs/changelog/114234.yaml @@ -0,0 +1,5 @@ +pr: 114234 +summary: Prevent flattening of ordered and unordered interval sources +area: Search +type: bug +issues: [] diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/IntervalQueriesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/IntervalQueriesIT.java index 7fc93debc65c9..def137f9a9ec8 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/IntervalQueriesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/IntervalQueriesIT.java @@ -31,6 +31,7 @@ import static java.util.Collections.singletonMap; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; public class IntervalQueriesIT extends ESIntegTestCase { @@ -64,6 +65,58 @@ public void testEmptyIntervalsWithNestedMappings() throws InterruptedException { ); } + public void testPreserveInnerGap() { + assertAcked(prepareCreate("index").setMapping(""" + { + "_doc" : { + "properties" : { + "text" : { "type" : "text" } + } + } + } + """)); + + indexRandom(true, prepareIndex("index").setId("1").setSource("text", "w1 w2 w3 w4 w5")); + + // ordered + { + var res = prepareSearch("index").setQuery( + new IntervalQueryBuilder( + "text", + new IntervalsSourceProvider.Combine( + Arrays.asList( + new IntervalsSourceProvider.Match("w1 w4", -1, true, null, null, null), + new IntervalsSourceProvider.Match("w5", -1, true, null, null, null) + ), + true, + 1, + null + ) + ) + ); + assertSearchHits(res, "1"); + } + + // unordered + { + var res = prepareSearch("index").setQuery( + new IntervalQueryBuilder( + "text", + new IntervalsSourceProvider.Combine( + Arrays.asList( + new IntervalsSourceProvider.Match("w3", 0, false, null, null, null), + new IntervalsSourceProvider.Match("w4 w1", -1, false, null, null, null) + ), + false, + 0, + null + ) + ) + ); + assertSearchHits(res, "1"); + } + } + private static class EmptyAnalyzer extends Analyzer { @Override diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index 46d7fec641943..f21edaeb94f22 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -126,7 +126,7 @@ protected static IntervalsSource combineSources(List sources, i if (maxGaps == 0 && ordered) { return Intervals.phrase(sourcesArray); } - IntervalsSource inner = ordered ? Intervals.ordered(sourcesArray) : Intervals.unordered(sourcesArray); + IntervalsSource inner = ordered ? XIntervals.ordered(sourcesArray) : XIntervals.unordered(sourcesArray); if (maxGaps == -1) { return inner; } diff --git a/server/src/main/java/org/elasticsearch/index/query/XIntervals.java b/server/src/main/java/org/elasticsearch/index/query/XIntervals.java new file mode 100644 index 0000000000000..7d8552e18f790 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/XIntervals.java @@ -0,0 +1,106 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.intervals.IntervalIterator; +import org.apache.lucene.queries.intervals.IntervalMatchesIterator; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.QueryVisitor; + +import java.io.IOException; +import java.util.Collection; +import java.util.Objects; + +/** + * Copy of {@link Intervals} that exposes versions of {@link Intervals#ordered} and {@link Intervals#unordered} + * that preserve their inner gaps. + * NOTE: Remove this hack when a version of Lucene with https://github.com/apache/lucene/pull/13819 is used (10.1.0). + */ +public final class XIntervals { + + /** + * Create an ordered {@link IntervalsSource} + * + *

Returns intervals in which the subsources all appear in the given order + * + * @param subSources an ordered set of {@link IntervalsSource} objects + */ + public static IntervalsSource ordered(IntervalsSource... subSources) { + return new DelegateIntervalsSource(Intervals.ordered(subSources)); + } + + /** + * Create an ordered {@link IntervalsSource} + * + *

Returns intervals in which the subsources all appear in the given order + * + * @param subSources an ordered set of {@link IntervalsSource} objects + */ + public static IntervalsSource unordered(IntervalsSource... subSources) { + return new DelegateIntervalsSource(Intervals.unordered(subSources)); + } + + /** + * Wraps a source to avoid aggressive flattening of the ordered and unordered sources. + * The flattening modifies the final gap and is removed in the latest unreleased version of Lucene (10.1). + */ + private static class DelegateIntervalsSource extends IntervalsSource { + private final IntervalsSource delegate; + + private DelegateIntervalsSource(IntervalsSource delegate) { + this.delegate = delegate; + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + return delegate.intervals(field, ctx); + } + + @Override + public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException { + return delegate.matches(field, ctx, doc); + } + + @Override + public void visit(String field, QueryVisitor visitor) { + delegate.visit(field, visitor); + } + + @Override + public int minExtent() { + return delegate.minExtent(); + } + + @Override + public Collection pullUpDisjunctions() { + return delegate.pullUpDisjunctions(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DelegateIntervalsSource that = (DelegateIntervalsSource) o; + return Objects.equals(delegate, that.delegate); + } + + @Override + public int hashCode() { + return Objects.hash(delegate); + } + + @Override + public String toString() { + return delegate.toString(); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java index 3476655c705ae..7005f17663d0d 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java @@ -46,7 +46,7 @@ public void testOrdered() throws IOException { CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6)); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3")); + IntervalsSource expected = XIntervals.ordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3")); assertEquals(expected, source); @@ -57,7 +57,7 @@ public void testUnordered() throws IOException { CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6)); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, false); - IntervalsSource expected = Intervals.unordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3")); + IntervalsSource expected = XIntervals.unordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3")); assertEquals(expected, source); @@ -101,7 +101,7 @@ public void testSimpleSynonyms() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.term("term4")), Intervals.term("term3") @@ -122,7 +122,7 @@ public void testSimpleSynonymsWithGap() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.extend(Intervals.or(Intervals.term("term2"), Intervals.term("term3"), Intervals.term("term4")), 1, 0), Intervals.term("term5") @@ -143,7 +143,7 @@ public void testGraphSynonyms() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), Intervals.term("term5") @@ -166,7 +166,7 @@ public void testGraphSynonymsWithGaps() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.or( Intervals.extend(Intervals.term("term2"), 1, 0), @@ -190,7 +190,7 @@ public void testGraphTerminatesOnGap() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), Intervals.extend(Intervals.term("term5"), 1, 0) diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index aad8275f4749d..f0084f4f24e98 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -203,7 +203,7 @@ public void testMatchInterval() throws IOException { }""", TEXT_FIELD_NAME); IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); - Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.unordered(Intervals.term("hello"), Intervals.term("world"))); + Query expected = new IntervalQuery(TEXT_FIELD_NAME, XIntervals.unordered(Intervals.term("hello"), Intervals.term("world"))); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -222,7 +222,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery( TEXT_FIELD_NAME, - Intervals.maxgaps(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world"))) + Intervals.maxgaps(40, XIntervals.unordered(Intervals.term("hello"), Intervals.term("world"))) ); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -241,7 +241,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new BoostQuery( - new IntervalQuery(TEXT_FIELD_NAME, Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))), + new IntervalQuery(TEXT_FIELD_NAME, XIntervals.ordered(Intervals.term("hello"), Intervals.term("world"))), 2 ); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -263,7 +263,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery( TEXT_FIELD_NAME, - Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))) + Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world"))) ); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -285,7 +285,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery( TEXT_FIELD_NAME, - Intervals.fixField(MASKED_FIELD, Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))) + Intervals.fixField(MASKED_FIELD, Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world")))) ); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -314,7 +314,7 @@ public void testMatchInterval() throws IOException { expected = new IntervalQuery( TEXT_FIELD_NAME, Intervals.containing( - Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))), + Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world"))), Intervals.term("blah") ) ); @@ -426,7 +426,7 @@ public void testCombineInterval() throws IOException { Intervals.containedBy( Intervals.maxgaps( 30, - Intervals.ordered(Intervals.term("one"), Intervals.unordered(Intervals.term("two"), Intervals.term("three"))) + XIntervals.ordered(Intervals.term("one"), XIntervals.unordered(Intervals.term("two"), Intervals.term("three"))) ), Intervals.term("SENTENCE") ) @@ -486,7 +486,7 @@ public void testCombineDisjunctionInterval() throws IOException { Intervals.notContainedBy( Intervals.maxgaps( 30, - Intervals.ordered(Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside"))) + XIntervals.ordered(Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside"))) ), Intervals.term("freeze") )