From fd1553c004b571bee612455cb002e03705b32feb Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 11 Jul 2017 21:19:14 +0200 Subject: [PATCH 1/2] Ensure we rewrite common queries to `match_none` if possible (#25650) In certain situations we can early terminate and just skip the entire query phase or make the lucene level rewrite very cheap if we can already tell that a query won't match any documents. For instance if there is a single `match_none` ie. due to some range rewrite in a filter or must clause of a boolean query it can just drop all it's other queries since it will never match. --- .../index/query/BoolQueryBuilder.java | 9 ++++++++- .../query/ConstantScoreQueryBuilder.java | 3 +++ .../index/query/BoolQueryBuilderTests.java | 19 +++++++++++++++++++ .../query/ConstantScoreQueryBuilderTests.java | 5 +++++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/index/query/BoolQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/BoolQueryBuilder.java index 4db909be87e1f..8f887e5704b6a 100644 --- a/core/src/main/java/org/elasticsearch/index/query/BoolQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/BoolQueryBuilder.java @@ -39,6 +39,8 @@ import java.util.Objects; import java.util.Optional; import java.util.function.Consumer; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded; @@ -479,7 +481,12 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws changed |= rewriteClauses(queryRewriteContext, mustNotClauses, newBuilder::mustNot); changed |= rewriteClauses(queryRewriteContext, filterClauses, newBuilder::filter); changed |= rewriteClauses(queryRewriteContext, shouldClauses, newBuilder::should); - + // lets do some early termination and prevent any kind of rewriting if we have a mandatory query that is a MatchNoneQueryBuilder + Optional any = Stream.concat(newBuilder.mustClauses.stream(), newBuilder.filterClauses.stream()) + .filter(b -> b instanceof MatchNoneQueryBuilder).findAny(); + if (any.isPresent()) { + return any.get(); + } if (changed) { newBuilder.adjustPureNegative = adjustPureNegative; newBuilder.disableCoord = disableCoord; diff --git a/core/src/main/java/org/elasticsearch/index/query/ConstantScoreQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/ConstantScoreQueryBuilder.java index 6c36acfd9f1dc..e9c54fee3273d 100644 --- a/core/src/main/java/org/elasticsearch/index/query/ConstantScoreQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/ConstantScoreQueryBuilder.java @@ -165,6 +165,9 @@ protected boolean doEquals(ConstantScoreQueryBuilder other) { @Override protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException { QueryBuilder rewrite = filterBuilder.rewrite(queryRewriteContext); + if (rewrite instanceof MatchNoneQueryBuilder) { + return rewrite; // we won't match anyway + } if (rewrite != filterBuilder) { return new ConstantScoreQueryBuilder(rewrite); } diff --git a/core/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java index 66805ca89fb0f..93014a14098cd 100644 --- a/core/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java @@ -470,4 +470,23 @@ public void testRewriteMultipleTimes() throws IOException { assertEquals(rewrittenAgain, expected); assertEquals(QueryBuilder.rewriteQuery(boolQueryBuilder, createShardContext()), expected); } + + public void testRewriteWithMatchNone() throws IOException { + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); + boolQueryBuilder.must(new WrapperQueryBuilder(new WrapperQueryBuilder(new MatchNoneQueryBuilder().toString()).toString())); + QueryBuilder rewritten = boolQueryBuilder.rewrite(createShardContext()); + assertEquals(new MatchNoneQueryBuilder(), rewritten); + + boolQueryBuilder = new BoolQueryBuilder(); + boolQueryBuilder.must(new TermQueryBuilder("foo","bar")); + boolQueryBuilder.filter(new WrapperQueryBuilder(new WrapperQueryBuilder(new MatchNoneQueryBuilder().toString()).toString())); + rewritten = boolQueryBuilder.rewrite(createShardContext()); + assertEquals(new MatchNoneQueryBuilder(), rewritten); + + boolQueryBuilder = new BoolQueryBuilder(); + boolQueryBuilder.must(new TermQueryBuilder("foo","bar")); + boolQueryBuilder.filter(new BoolQueryBuilder().should(new TermQueryBuilder("foo","bar")).filter(new MatchNoneQueryBuilder())); + rewritten = QueryBuilder.rewriteQuery(boolQueryBuilder, createShardContext()); + assertEquals(new MatchNoneQueryBuilder(), rewritten); + } } diff --git a/core/src/test/java/org/elasticsearch/index/query/ConstantScoreQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/ConstantScoreQueryBuilderTests.java index b7508dd0bc85d..ccc2b188abdf4 100644 --- a/core/src/test/java/org/elasticsearch/index/query/ConstantScoreQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/ConstantScoreQueryBuilderTests.java @@ -134,4 +134,9 @@ public void testFromJsonEmptyQueryBody() throws IOException { assertWarnings("query malformed, empty clause found at [1:40]"); } + public void testRewriteToMatchNone() throws IOException { + ConstantScoreQueryBuilder constantScoreQueryBuilder = new ConstantScoreQueryBuilder(new MatchNoneQueryBuilder()); + QueryBuilder rewrite = constantScoreQueryBuilder.rewrite(createShardContext()); + assertEquals(rewrite, new MatchNoneQueryBuilder()); + } } From 9050c9e02410bd081c198ba658624623ae6c28ee Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 13 Jul 2017 11:57:01 +0200 Subject: [PATCH 2/2] Backport `can_match` endpoint to 5.6 to allow 6.0 to use the optimization in mixed version 6.0 applies some optimization to query rewriting if the number of shards is large. In oder to make use of this optimization this commit adds the internal endpoint to 5.6 such that a 6.0 coordinator node can make use of the feature even in a mixed cluster or via cross cluster search. Relates to #25658 --- .../action/search/SearchTransportService.java | 38 ++++++++++++++ .../elasticsearch/search/SearchService.java | 35 +++++++++++++ .../aggregations/AggregatorFactories.java | 16 +++++- .../bucket/terms/TermsAggregationBuilder.java | 7 +++ .../search/SearchServiceTests.java | 50 +++++++++++++++++++ 5 files changed, 144 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/search/SearchTransportService.java b/core/src/main/java/org/elasticsearch/action/search/SearchTransportService.java index 6f6600c8120a6..25765a3c2fe02 100644 --- a/core/src/main/java/org/elasticsearch/action/search/SearchTransportService.java +++ b/core/src/main/java/org/elasticsearch/action/search/SearchTransportService.java @@ -76,6 +76,7 @@ public class SearchTransportService extends AbstractComponent { public static final String QUERY_FETCH_SCROLL_ACTION_NAME = "indices:data/read/search[phase/query+fetch/scroll]"; public static final String FETCH_ID_SCROLL_ACTION_NAME = "indices:data/read/search[phase/fetch/id/scroll]"; public static final String FETCH_ID_ACTION_NAME = "indices:data/read/search[phase/fetch/id]"; + public static final String QUERY_CAN_MATCH_NAME = "indices:data/read/search[can_match]"; private final TransportService transportService; @@ -395,8 +396,45 @@ public void messageReceived(ShardFetchSearchRequest request, TransportChannel ch } }); TransportActionProxy.registerProxyAction(transportService, FETCH_ID_ACTION_NAME, FetchSearchResult::new); + + // this is super cheap and should not hit thread-pool rejections + transportService.registerRequestHandler(QUERY_CAN_MATCH_NAME, ShardSearchTransportRequest::new, ThreadPool.Names.SEARCH, + false, true, new TaskAwareTransportRequestHandler() { + @Override + public void messageReceived(ShardSearchTransportRequest request, TransportChannel channel, Task task) throws Exception { + boolean canMatch = searchService.canMatch(request); + channel.sendResponse(new CanMatchResponse(canMatch)); + } + }); + TransportActionProxy.registerProxyAction(transportService, QUERY_CAN_MATCH_NAME, CanMatchResponse::new); } + // this feature is only really used in 6.0 but we added the endpoints to 5.6 to ensure if a user is on 5.6 and they desperately + // need it they can use cross cluster search with a 6.0 CCS Node or can use a 6.0 node as a coordinator to at least test if + // if would help their usecase. it also makes the feature in 6.x BWC with the latest 5.x release. + private static final class CanMatchResponse extends SearchPhaseResult { + private boolean canMatch; + + private CanMatchResponse() {} + + private CanMatchResponse(boolean canMatch) { + this.canMatch = canMatch; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + canMatch = in.readBoolean(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeBoolean(canMatch); + } + } + + /** * Returns a connection to the given node on the provided cluster. If the cluster alias is null the node will be resolved * against the local cluster. diff --git a/core/src/main/java/org/elasticsearch/search/SearchService.java b/core/src/main/java/org/elasticsearch/search/SearchService.java index 8f087a1cf1845..bde1d8e674afa 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchService.java +++ b/core/src/main/java/org/elasticsearch/search/SearchService.java @@ -26,6 +26,7 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.OriginalIndices; import org.elasticsearch.action.search.SearchTask; +import org.elasticsearch.action.search.SearchType; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Nullable; @@ -42,6 +43,9 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.query.InnerHitContextBuilder; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.index.query.MatchNoneQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.shard.IndexEventListener; import org.elasticsearch.index.shard.IndexShard; @@ -828,4 +832,35 @@ public void run() { public AliasFilter buildAliasFilter(ClusterState state, String index, String... expressions) { return indicesService.buildAliasFilter(state, index, expressions); } + + /** + * This method does a very quick rewrite of the query and returns true if the query can potentially match any documents. + * This method can have false positives while if it returns false the query won't match any documents on the current + * shard. + */ + public boolean canMatch(ShardSearchRequest request) throws IOException { + assert request.searchType() == SearchType.QUERY_THEN_FETCH : "unexpected search type: " + request.searchType(); + try (DefaultSearchContext context = createSearchContext(request, defaultSearchTimeout, null)) { + SearchSourceBuilder source = context.request().source(); + if (canRewriteToMatchNone(source)) { + QueryBuilder queryBuilder = source.query(); + return queryBuilder instanceof MatchNoneQueryBuilder == false; + } + return true; // null query means match_all + } + } + + static boolean canRewriteToMatchNone(SearchSourceBuilder source) { + if (source == null || source.query() == null || source.query() instanceof MatchAllQueryBuilder) { + return false; + } else { + AggregatorFactories.Builder aggregations = source.aggregations(); + if (aggregations != null) { + if (aggregations.mustVisitAllDocs()) { + return false; + } + } + } + return true; + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java b/core/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java index 4e7173946936f..2b19a483c25f2 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java @@ -26,6 +26,8 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationPath; import org.elasticsearch.search.aggregations.support.AggregationPath.PathElement; @@ -293,8 +295,18 @@ public void writeTo(StreamOutput out) throws IOException { } } - public Builder addAggregators(AggregatorFactories factories) { - throw new UnsupportedOperationException("This needs to be removed"); + public boolean mustVisitAllDocs() { + for (AggregationBuilder builder : aggregationBuilders) { + if (builder instanceof GlobalAggregationBuilder) { + return true; + } else if (builder instanceof TermsAggregationBuilder) { + if (((TermsAggregationBuilder) builder).minDocCount() == 0) { + return true; + } + } + + } + return false; } public Builder addAggregator(AggregationBuilder factory) { diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java index 4ecc664dd421f..210f45be3179c 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java @@ -177,6 +177,13 @@ public TermsAggregationBuilder minDocCount(long minDocCount) { return this; } + /** + * Returns the minimum document count required per term + */ + public long minDocCount() { + return bucketCountThresholds.getMinDocCount(); + } + /** * Set the minimum document count terms should have on the shard in order to * appear in the response. diff --git a/core/src/test/java/org/elasticsearch/search/SearchServiceTests.java b/core/src/test/java/org/elasticsearch/search/SearchServiceTests.java index 31edc3ac808ea..4b7db074f40c4 100644 --- a/core/src/test/java/org/elasticsearch/search/SearchServiceTests.java +++ b/core/src/test/java/org/elasticsearch/search/SearchServiceTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.search; import com.carrotsearch.hppc.IntArrayList; + import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.action.ActionListener; @@ -36,13 +37,19 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.query.AbstractQueryBuilder; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.ShardFetchRequest; import org.elasticsearch.search.internal.AliasFilter; @@ -304,4 +311,47 @@ public String getWriteableName() { return null; } } + + public void testCanMatch() throws IOException { + createIndex("index"); + final SearchService service = getInstanceFromNode(SearchService.class); + final IndicesService indicesService = getInstanceFromNode(IndicesService.class); + final IndexService indexService = indicesService.indexServiceSafe(resolveIndex("index")); + final IndexShard indexShard = indexService.getShard(0); + assertTrue(service.canMatch(new ShardSearchLocalRequest(indexShard.shardId(), 1, SearchType.QUERY_THEN_FETCH, null, + Strings.EMPTY_ARRAY, false, new AliasFilter(null, Strings.EMPTY_ARRAY), 1f))); + + assertTrue(service.canMatch(new ShardSearchLocalRequest(indexShard.shardId(), 1, SearchType.QUERY_THEN_FETCH, + new SearchSourceBuilder(), Strings.EMPTY_ARRAY, false, new AliasFilter(null, Strings.EMPTY_ARRAY), 1f))); + + assertTrue(service.canMatch(new ShardSearchLocalRequest(indexShard.shardId(), 1, SearchType.QUERY_THEN_FETCH, + new SearchSourceBuilder().query(new MatchAllQueryBuilder()), Strings.EMPTY_ARRAY, false, + new AliasFilter(null, Strings.EMPTY_ARRAY), 1f))); + + assertTrue(service.canMatch(new ShardSearchLocalRequest(indexShard.shardId(), 1, SearchType.QUERY_THEN_FETCH, + new SearchSourceBuilder().query(new MatchNoneQueryBuilder()) + .aggregation(new TermsAggregationBuilder("test", ValueType.STRING).minDocCount(0)), Strings.EMPTY_ARRAY, false, + new AliasFilter(null, Strings.EMPTY_ARRAY), 1f))); + assertTrue(service.canMatch(new ShardSearchLocalRequest(indexShard.shardId(), 1, SearchType.QUERY_THEN_FETCH, + new SearchSourceBuilder().query(new MatchNoneQueryBuilder()) + .aggregation(new GlobalAggregationBuilder("test")), Strings.EMPTY_ARRAY, false, + new AliasFilter(null, Strings.EMPTY_ARRAY), 1f))); + + assertFalse(service.canMatch(new ShardSearchLocalRequest(indexShard.shardId(), 1, SearchType.QUERY_THEN_FETCH, + new SearchSourceBuilder().query(new MatchNoneQueryBuilder()), Strings.EMPTY_ARRAY, false, + new AliasFilter(null, Strings.EMPTY_ARRAY), 1f))); + + } + + public void testCanRewriteToMatchNone() { + assertFalse(SearchService.canRewriteToMatchNone(new SearchSourceBuilder().query(new MatchNoneQueryBuilder()) + .aggregation(new GlobalAggregationBuilder("test")))); + assertFalse(SearchService.canRewriteToMatchNone(new SearchSourceBuilder())); + assertFalse(SearchService.canRewriteToMatchNone(null)); + assertFalse(SearchService.canRewriteToMatchNone(new SearchSourceBuilder().query(new MatchNoneQueryBuilder()) + .aggregation(new TermsAggregationBuilder("test", ValueType.STRING).minDocCount(0)))); + assertTrue(SearchService.canRewriteToMatchNone(new SearchSourceBuilder().query(new TermQueryBuilder("foo", "bar")))); + assertTrue(SearchService.canRewriteToMatchNone(new SearchSourceBuilder().query(new MatchNoneQueryBuilder()) + .aggregation(new TermsAggregationBuilder("test", ValueType.STRING).minDocCount(1)))); + } }