diff --git a/CHANGELOG.md b/CHANGELOG.md index 55d0002e1aa09..0952424854f54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Flaky Test] Fix flaky test IngestFromKinesisIT.testAllActiveIngestion ([#19380](https://github.com/opensearch-project/OpenSearch/pull/19380)) - Fix lag metric for pull-based ingestion when streaming source is empty ([#19393](https://github.com/opensearch-project/OpenSearch/pull/19393)) - Fix ingestion state xcontent serialization in IndexMetadata and fail fast on mapping errors([#19320](https://github.com/opensearch-project/OpenSearch/pull/19320)) +- Fix updated keyword field params leading to stale responses from request cache ([#19385](https://github.com/opensearch-project/OpenSearch/pull/19385)) ### Dependencies - Update to Gradle 9.1.0 ([#19329](https://github.com/opensearch-project/OpenSearch/pull/19329)) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java index dc72291e95184..069bdd3fa7a64 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java @@ -47,6 +47,7 @@ import org.opensearch.action.admin.indices.alias.Alias; import org.opensearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest; import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse; +import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.action.search.SearchType; import org.opensearch.cluster.ClusterState; @@ -861,6 +862,115 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo assertEquals(0, requestCacheStats.getMemorySizeInBytes()); } + public void testKeywordFieldUseSimilarityCacheability() throws Exception { + testKeywordFieldParameterCacheabilityCase("use_similarity"); + } + + public void testKeywordFieldSplitWhitespaceCacheability() throws Exception { + testKeywordFieldParameterCacheabilityCase("split_queries_on_whitespace"); + } + + private void testKeywordFieldParameterCacheabilityCase(String paramName) throws Exception { + // When keyword fields have non-default values for "use_similarity" or "split_queries_on_whitespace", + // queries on those fields shouldn't be cacheable. Default value is false for both parameters. + String node = internalCluster().startNode( + Settings.builder().put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, TimeValue.timeValueMillis(50)) + ); + Client client = client(node); + String index = "index"; + String field1 = "field1"; + String field2 = "field2"; + assertAcked( + client.admin() + .indices() + .prepareCreate(index) + .setMapping(field1, "type=keyword," + paramName + "=false", field2, "type=keyword," + paramName + "=true") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + // Disable index refreshing to avoid cache being invalidated mid-test + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.timeValueMillis(-1)) + ) + .get() + ); + + indexRandom(true, client.prepareIndex(index).setSource(field1, "foo", field2, "foo")); + indexRandom(true, client.prepareIndex(index).setSource(field1, "foo2", field2, "foo2")); + ensureSearchable(index); + // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache + forceMerge(client, index); + + // Show this behavior works for a nested query where the termQuery built by KeywordFieldMapper is not the outermost query + SearchResponse resp = client.prepareSearch(index) + .setRequestCache(true) + .setQuery( + QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field1, "foo2")) + ) + .get(); + assertSearchResponse(resp); + // This query should be cached + RequestCacheStats stats = getNodeCacheStats(client); + assertEquals(1, stats.getMissCount()); + long cacheSize = stats.getMemorySizeInBytes(); + assertTrue(cacheSize > 0); + + // The same query but involving field2 at all should not be cacheable even if setRequestCache is true: + // no extra misses and no extra values in cache + assertQueryCausesCacheState( + client, + index, + QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field2, "foo2")), + 0, + 1, + cacheSize + ); + + // Now change the parameter value to true for field1, and check the same query again does NOT come from the cache (hits == 0) + // and also shouldn't add any extra entries to the cache + PutMappingRequest mappingRequest = new PutMappingRequest().indices(index).source(field1, "type=keyword," + paramName + "=true"); + client.admin().indices().putMapping(mappingRequest).actionGet(); + assertQueryCausesCacheState( + client, + index, + QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field1, "foo2")), + 0, + 1, + cacheSize + ); + + // Now test all query building methods on field1 and ensure none are cacheable + + assertQueryCausesCacheState(client, index, QueryBuilders.termsQuery(field1, "foo", "foo2"), 0, 1, cacheSize); + + assertQueryCausesCacheState(client, index, QueryBuilders.prefixQuery(field1, "fo"), 0, 1, cacheSize); + + assertQueryCausesCacheState(client, index, QueryBuilders.regexpQuery(field1, "f*o"), 0, 1, cacheSize); + + assertQueryCausesCacheState(client, index, QueryBuilders.rangeQuery(field1).gte("f").lte("g"), 0, 1, cacheSize); + + assertQueryCausesCacheState(client, index, QueryBuilders.fuzzyQuery(field1, "foe"), 0, 1, cacheSize); + + assertQueryCausesCacheState(client, index, QueryBuilders.wildcardQuery(field1, "f*"), 0, 1, cacheSize); + } + + private void assertQueryCausesCacheState( + Client client, + String index, + QueryBuilder builder, + long expectedHits, + long expectedMisses, + long expectedMemory + ) { + SearchResponse resp = client.prepareSearch(index).setRequestCache(true).setQuery(builder).get(); + assertSearchResponse(resp); + RequestCacheStats stats = getNodeCacheStats(client); + assertEquals(expectedHits, stats.getHitCount()); + assertEquals(expectedMisses, stats.getMissCount()); + assertEquals(expectedMemory, stats.getMemorySizeInBytes()); + } + private Path[] shardDirectory(String server, Index index, int shard) { NodeEnvironment env = internalCluster().getInstance(NodeEnvironment.class, server); final Path[] paths = env.availableShardPaths(new ShardId(index, shard)); diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 1c534153f2629..7ace516459763 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -312,6 +312,7 @@ public static class KeywordFieldType extends StringFieldType { private final int ignoreAbove; private final String nullValue; private final boolean useSimilarity; + private final boolean splitQueriesOnWhitespace; public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normalizer, NamedAnalyzer searchAnalyzer, Builder builder) { super( @@ -328,6 +329,7 @@ public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normaliz this.ignoreAbove = builder.ignoreAbove.getValue(); this.nullValue = builder.nullValue.getValue(); this.useSimilarity = builder.useSimilarity.getValue(); + this.splitQueriesOnWhitespace = builder.splitQueriesOnWhitespace.getValue(); } public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, Map meta) { @@ -335,11 +337,23 @@ public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, } public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, boolean useSimilarity, Map meta) { + this(name, isSearchable, hasDocValues, useSimilarity, false, meta); + } + + public KeywordFieldType( + String name, + boolean isSearchable, + boolean hasDocValues, + boolean useSimilarity, + boolean splitQueriesOnWhitespace, + Map meta + ) { super(name, isSearchable, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; this.useSimilarity = useSimilarity; + this.splitQueriesOnWhitespace = splitQueriesOnWhitespace; } public KeywordFieldType(String name) { @@ -358,6 +372,7 @@ public KeywordFieldType(String name, FieldType fieldType) { this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; this.useSimilarity = false; + this.splitQueriesOnWhitespace = false; } public KeywordFieldType(String name, NamedAnalyzer analyzer) { @@ -365,6 +380,7 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) { this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; this.useSimilarity = false; + this.splitQueriesOnWhitespace = false; } @Override @@ -447,6 +463,7 @@ protected Object rewriteForDocValue(Object value) { @Override public Query termQueryCaseInsensitive(Object value, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); + checkToDisableCaching(context); if (isSearchable()) { return super.termQueryCaseInsensitive(value, context); } else { @@ -467,6 +484,7 @@ public Query termQueryCaseInsensitive(Object value, QueryShardContext context) { @Override public Query termQuery(Object value, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); + checkToDisableCaching(context); if (isSearchable()) { Query query = super.termQuery(value, context); if (!this.useSimilarity) { @@ -494,6 +512,7 @@ public Query termQuery(Object value, QueryShardContext context) { @Override public Query termsQuery(List values, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); + checkToDisableCaching(context); // has index and doc_values enabled if (isSearchable() && hasDocValues()) { if (!context.keywordFieldIndexOrDocValuesEnabled()) { @@ -540,6 +559,7 @@ public Query prefixQuery( ); } failIfNotIndexedAndNoDocValues(); + checkToDisableCaching(context); if (isSearchable() && hasDocValues()) { if (!context.keywordFieldIndexOrDocValuesEnabled()) { return super.prefixQuery(value, method, caseInsensitive, context); @@ -583,6 +603,7 @@ public Query regexpQuery( ); } failIfNotIndexedAndNoDocValues(); + checkToDisableCaching(context); if (isSearchable() && hasDocValues()) { if (!context.keywordFieldIndexOrDocValuesEnabled()) { return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); @@ -621,6 +642,7 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower ); } failIfNotIndexedAndNoDocValues(); + checkToDisableCaching(context); if (isSearchable() && hasDocValues()) { Query indexQuery = new TermRangeQuery( name(), @@ -669,6 +691,7 @@ public Query fuzzyQuery( QueryShardContext context ) { failIfNotIndexedAndNoDocValues(); + checkToDisableCaching(context); if (context.allowExpensiveQueries() == false) { throw new OpenSearchException( "[fuzzy] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false." @@ -716,6 +739,7 @@ public Query wildcardQuery( ); } failIfNotIndexedAndNoDocValues(); + checkToDisableCaching(context); // keyword field types are always normalized, so ignore case sensitivity and force normalize the // wildcard // query text @@ -745,6 +769,13 @@ public Query wildcardQuery( return super.wildcardQuery(value, method, caseInsensitive, true, context); } + private void checkToDisableCaching(QueryShardContext context) { + // Mark the query as non-cacheable if the defaults for useSimilarity or splitQueriesOnWhitespace are not used. + if (useSimilarity || splitQueriesOnWhitespace) { + context.setIsCacheable(false); + } + } + } private final boolean indexed; diff --git a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java index 8005d36ff017e..f2c278f04b021 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java +++ b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java @@ -652,6 +652,10 @@ public final boolean isCacheable() { return cacheable; } + public final void setIsCacheable(boolean isCacheable) { + this.cacheable = isCacheable; + } + /** * Returns the shard ID this context was created for. */