Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Added
- Expand fetch phase profiling to support inner hits and top hits aggregation phases ([##18936](https://github.com/opensearch-project/OpenSearch/pull/18936))
- Add temporal routing processors for time-based document routing ([#18920](https://github.com/opensearch-project/OpenSearch/issues/18920))
- Implement Query Rewriting Infrastructure ([#19060](https://github.com/opensearch-project/OpenSearch/pull/19060))
- The dynamic mapping parameter supports false_allow_templates ([#19065](https://github.com/opensearch-project/OpenSearch/pull/19065))
- Add a toBuilder method in EngineConfig to support easy modification of configs([#19054](https://github.com/opensearch-project/OpenSearch/pull/19054))
- Add StoreFactory plugin interface for custom Store implementations([#19091](https://github.com/opensearch-project/OpenSearch/pull/19091))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,8 @@ public void apply(Settings value, Settings current, Settings previous) {
BlobStoreRepository.SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD,

SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING,
SearchService.QUERY_REWRITING_ENABLED_SETTING,
SearchService.QUERY_REWRITING_TERMS_THRESHOLD_SETTING,

// Composite index settings
CompositeIndexSettings.STAR_TREE_INDEX_ENABLED_SETTING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@
import org.opensearch.core.xcontent.ObjectParser;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.NumberFieldMapper;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -402,9 +400,6 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws
return any.get();
}

changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext);
changed |= rewriteMustClausesToFilter(newBuilder, queryRewriteContext);

if (changed) {
newBuilder.adjustPureNegative = adjustPureNegative;
if (minimumShouldMatch != null) {
Expand Down Expand Up @@ -559,53 +554,4 @@ private boolean checkAllDocsHaveOneValue(List<LeafReaderContext> contexts, Strin
}
return true;
}

private boolean rewriteMustClausesToFilter(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) {
// If we have must clauses which return the same score for all matching documents, like numeric term queries or ranges,
// moving them from must clauses to filter clauses improves performance in some cases.
// This works because it can let Lucene use MaxScoreCache to skip non-competitive docs.
boolean changed = false;
Set<QueryBuilder> mustClausesToMove = new HashSet<>();

QueryShardContext shardContext;
if (queryRewriteContext == null) {
shardContext = null;
} else {
shardContext = queryRewriteContext.convertToShardContext(); // can still be null
}

for (QueryBuilder clause : mustClauses) {
if (isClauseIrrelevantToScoring(clause, shardContext)) {
mustClausesToMove.add(clause);
changed = true;
}
}

newBuilder.mustClauses.removeAll(mustClausesToMove);
newBuilder.filterClauses.addAll(mustClausesToMove);
return changed;
}

private boolean isClauseIrrelevantToScoring(QueryBuilder clause, QueryShardContext context) {
// This is an incomplete list of clauses this might apply for; it can be expanded in future.

// If a clause is purely numeric, for example a date range, its score is unimportant as
// it'll be the same for all returned docs
if (clause instanceof RangeQueryBuilder) return true;
if (clause instanceof GeoBoundingBoxQueryBuilder) return true;

// Further optimizations depend on knowing whether the field is numeric.
// QueryBuilder.doRewrite() is called several times in the search flow, and the shard context telling us this
// is only available the last time, when it's called from SearchService.executeQueryPhase().
// Skip moving these clauses if we don't have the shard context.
if (context == null) return false;
if (!(clause instanceof WithFieldName wfn)) return false;
MappedFieldType fieldType = context.fieldMapper(wfn.fieldName());
if (!(fieldType instanceof NumberFieldMapper.NumberFieldType)) return false;

if (clause instanceof MatchQueryBuilder) return true;
if (clause instanceof TermQueryBuilder) return true;
if (clause instanceof TermsQueryBuilder) return true;
return false;
}
}
35 changes: 33 additions & 2 deletions server/src/main/java/org/opensearch/search/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@
import org.opensearch.search.profile.Profilers;
import org.opensearch.search.profile.SearchProfileShardResults;
import org.opensearch.search.query.QueryPhase;
import org.opensearch.search.query.QueryRewriterRegistry;
import org.opensearch.search.query.QuerySearchRequest;
import org.opensearch.search.query.QuerySearchResult;
import org.opensearch.search.query.ScrollQuerySearchResult;
Expand Down Expand Up @@ -276,6 +277,27 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
Property.Deprecated
);

public static final Setting<Boolean> QUERY_REWRITING_ENABLED_SETTING = Setting.boolSetting(
"search.query_rewriting.enabled",
true,
Property.Dynamic,
Property.NodeScope
);

/**
* Controls the threshold for the number of term queries on the same field that triggers
* the TermsMergingRewriter to combine them into a single terms query. For example,
* if set to 16 (default), when 16 or more term queries target the same field within
* a boolean clause, they will be merged into a single terms query for better performance.
*/
public static final Setting<Integer> QUERY_REWRITING_TERMS_THRESHOLD_SETTING = Setting.intSetting(
"search.query_rewriting.terms_threshold",
16,
2, // minimum value
Property.Dynamic,
Property.NodeScope
);

// Allow concurrent segment search for all requests
public static final String CONCURRENT_SEGMENT_SEARCH_MODE_ALL = "all";

Expand Down Expand Up @@ -507,6 +529,10 @@ public SearchService(
this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories;

this.pluginProfilers = pluginProfilers;

// Initialize QueryRewriterRegistry with cluster settings so TermsMergingRewriter
// can register its settings update consumer
QueryRewriterRegistry.INSTANCE.initialize(settings, clusterService.getClusterSettings());
}

private void validateKeepAlives(TimeValue defaultKeepAlive, TimeValue maxKeepAlive) {
Expand Down Expand Up @@ -1488,8 +1514,13 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc
context.size(source.size());
Map<String, InnerHitContextBuilder> innerHitBuilders = new HashMap<>();
if (source.query() != null) {
InnerHitContextBuilder.extractInnerHits(source.query(), innerHitBuilders);
context.parsedQuery(queryShardContext.toQuery(source.query()));
QueryBuilder query = source.query();

// Apply query rewriting optimizations
query = QueryRewriterRegistry.INSTANCE.rewrite(query, queryShardContext);

InnerHitContextBuilder.extractInnerHits(query, innerHitBuilders);
context.parsedQuery(queryShardContext.toQuery(query));
}
if (source.postFilter() != null) {
InnerHitContextBuilder.extractInnerHits(source.postFilter(), innerHitBuilders);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.query;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryShardContext;

/**
* Interface for query rewriting implementations that optimize query structure
* before conversion to Lucene queries.
*
* @opensearch.experimental
*/
@ExperimentalApi
public interface QueryRewriter {

/**
* Rewrites the given query builder to a more optimal form.
*
* @param query The query to rewrite
* @param context The search execution context
* @return The rewritten query (may be the same instance if no rewrite needed)
*/
QueryBuilder rewrite(QueryBuilder query, QueryShardContext context);

/**
* Returns the priority of this rewriter. Lower values execute first.
* This allows control over rewrite ordering when multiple rewriters
* may interact.
*
* @return The priority value
*/
default int priority() {
return 1000;
}

/**
* Returns the name of this rewriter for debugging and profiling.
*
* @return The rewriter name
*/
String name();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.query;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.common.settings.ClusterSettings;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.SearchService;
import org.opensearch.search.query.rewriters.BooleanFlatteningRewriter;
import org.opensearch.search.query.rewriters.MatchAllRemovalRewriter;
import org.opensearch.search.query.rewriters.MustNotToShouldRewriter;
import org.opensearch.search.query.rewriters.MustToFilterRewriter;
import org.opensearch.search.query.rewriters.TermsMergingRewriter;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;

/**
* Registry for query rewriters
*
* @opensearch.internal
*/
public final class QueryRewriterRegistry {

private static final Logger logger = LogManager.getLogger(QueryRewriterRegistry.class);

public static final QueryRewriterRegistry INSTANCE = new QueryRewriterRegistry();

/**
* Default rewriters.
* CopyOnWriteArrayList is used for thread-safety during registration.
*/
private final CopyOnWriteArrayList<QueryRewriter> rewriters;

/**
* Whether query rewriting is enabled.
*/
private volatile boolean enabled;

private QueryRewriterRegistry() {
this.rewriters = new CopyOnWriteArrayList<>();

// Register default rewriters using singletons
registerRewriter(BooleanFlatteningRewriter.INSTANCE);
registerRewriter(MustToFilterRewriter.INSTANCE);
registerRewriter(MustNotToShouldRewriter.INSTANCE);
registerRewriter(MatchAllRemovalRewriter.INSTANCE);
registerRewriter(TermsMergingRewriter.INSTANCE);
}

/**
* Register a custom query rewriter.
*
* @param rewriter The rewriter to register
*/
public void registerRewriter(QueryRewriter rewriter) {
if (rewriter != null) {
rewriters.add(rewriter);
logger.info("Registered query rewriter: {}", rewriter.name());
}
}

/**
* Initialize the registry with cluster settings.
* This must be called once during system startup to properly configure
* the TermsMergingRewriter with settings and update consumers.
*
* @param settings Initial cluster settings
* @param clusterSettings Cluster settings for registering update consumers
*/
public void initialize(Settings settings, ClusterSettings clusterSettings) {
TermsMergingRewriter.INSTANCE.initialize(settings, clusterSettings);
this.enabled = SearchService.QUERY_REWRITING_ENABLED_SETTING.get(settings);
clusterSettings.addSettingsUpdateConsumer(
SearchService.QUERY_REWRITING_ENABLED_SETTING,
(Boolean enabled) -> this.enabled = enabled
);
}

public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) {
if (!enabled || query == null) {
return query;
}

List<QueryRewriter> sortedRewriters = new ArrayList<>(rewriters);
sortedRewriters.sort(Comparator.comparingInt(QueryRewriter::priority));

QueryBuilder current = query;
for (QueryRewriter rewriter : sortedRewriters) {
try {
QueryBuilder rewritten = rewriter.rewrite(current, context);
if (rewritten != current) {
current = rewritten;
}
} catch (Exception e) {
logger.warn("Query rewriter {} failed: {}", rewriter.name(), e.getMessage());
}
}

return current;
}
}
Loading
Loading