diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 7fde4359742e3..524a1eb79f133 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -204,6 +204,7 @@ protected KeywordFieldType(KeywordFieldType ref) { this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace; } + @Override public KeywordFieldType clone() { return new KeywordFieldType(this); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java index 4ddda3df0af2d..05bf6b61d1de1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java @@ -21,8 +21,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; @@ -31,6 +29,7 @@ import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.unit.Fuzziness; @@ -38,6 +37,8 @@ import org.elasticsearch.index.query.support.QueryParsers; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; @@ -47,6 +48,8 @@ * can be implemented. */ public abstract class StringFieldType extends TermBasedFieldType { + private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\\\.)|([?*]+)"); + public StringFieldType() {} protected StringFieldType(MappedFieldType ref) { @@ -92,16 +95,41 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer @Override public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { - Query termQuery = termQuery(value, context); - if (termQuery instanceof MatchNoDocsQuery || termQuery instanceof MatchAllDocsQuery) { - return termQuery; - } - + failIfNotIndexed(); if (context.allowExpensiveQueries() == false) { throw new ElasticsearchException("[wildcard] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false."); } - Term term = MappedFieldType.extractTerm(termQuery); + + Term term; + if (searchAnalyzer() != null) { + // we want to normalize everything except wildcard characters, e.g. F?o Ba* to f?o ba*, even if e.g there + // is a char_filter that would otherwise remove them + Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(value); + BytesRefBuilder sb = new BytesRefBuilder(); + int last = 0; + + while (wildcardMatcher.find()) { + if (wildcardMatcher.start() > 0) { + String chunk = value.substring(last, wildcardMatcher.start()); + + BytesRef normalized = searchAnalyzer().normalize(name(), chunk); + sb.append(normalized); + } + // append the matched group - without normalizing + sb.append(new BytesRef(wildcardMatcher.group())); + + last = wildcardMatcher.end(); + } + if (last < value.length()) { + String chunk = value.substring(last); + BytesRef normalized = searchAnalyzer().normalize(name(), chunk); + sb.append(normalized); + } + term = new Term(name(), sb.toBytesRef()); + } else { + term = new Term(name(), indexedValueForSearch(value)); + } WildcardQuery query = new WildcardQuery(term); QueryParsers.setRewriteMethod(query, method); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java index e5c33b83f24e5..d32d4fa9d3f11 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java @@ -30,13 +30,11 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -90,7 +88,7 @@ public MetadataFieldMapper getDefault(ParserContext context) { } } - public static final class TypeFieldType extends StringFieldType { + public static final class TypeFieldType extends ConstantFieldType { TypeFieldType() { } @@ -121,61 +119,8 @@ public ValuesSourceType getValuesSourceType() { } @Override - public boolean isSearchable() { - return true; - } - - @Override - public Query existsQuery(QueryShardContext context) { - return new MatchAllDocsQuery(); - } - - @Override - public Query termQuery(Object value, QueryShardContext context) { - return termsQuery(Arrays.asList(value), context); - } - - @Override - public Query termsQuery(List values, QueryShardContext context) { - DocumentMapper mapper = context.getMapperService().documentMapper(); - if (mapper == null) { - return new MatchNoDocsQuery("No types"); - } - BytesRef indexType = indexedValueForSearch(mapper.type()); - if (values.stream() - .map(this::indexedValueForSearch) - .anyMatch(indexType::equals)) { - if (context.getMapperService().hasNested()) { - // type filters are expected not to match nested docs - return Queries.newNonNestedFilter(); - } else { - return new MatchAllDocsQuery(); - } - } else { - return new MatchNoDocsQuery("Type list does not contain the index type"); - } - } - - @Override - public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { - Query result = new MatchAllDocsQuery(); - String type = context.getMapperService().documentMapper().type(); - if (type != null) { - BytesRef typeBytes = new BytesRef(type); - if (lowerTerm != null) { - int comp = indexedValueForSearch(lowerTerm).compareTo(typeBytes); - if (comp > 0 || (comp == 0 && includeLower == false)) { - result = new MatchNoDocsQuery("[_type] was lexicographically smaller than lower bound of range"); - } - } - if (upperTerm != null) { - int comp = indexedValueForSearch(upperTerm).compareTo(typeBytes); - if (comp < 0 || (comp == 0 && includeUpper == false)) { - result = new MatchNoDocsQuery("[_type] was lexicographically greater than upper bound of range"); - } - } - } - return result; + protected boolean matches(String pattern, QueryShardContext context) { + return pattern.equals(MapperService.SINGLE_MAPPING_NAME); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java index 79a93c04faa1f..0e00bf6677c8f 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java @@ -21,14 +21,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.UUIDs; -import org.elasticsearch.common.lucene.search.Queries; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.test.VersionUtils; import org.mockito.Mockito; public class TypeFieldTypeTests extends FieldTypeTestCase { @@ -39,40 +32,14 @@ protected MappedFieldType createDefaultFieldType() { public void testTermsQuery() throws Exception { QueryShardContext context = Mockito.mock(QueryShardContext.class); - Version indexVersionCreated = VersionUtils.randomIndexCompatibleVersion(random()); - Settings indexSettings = Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, indexVersionCreated) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) - .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).build(); - IndexMetaData indexMetaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(indexSettings).build(); - IndexSettings mockSettings = new IndexSettings(indexMetaData, Settings.EMPTY); - Mockito.when(context.getIndexSettings()).thenReturn(mockSettings); - Mockito.when(context.indexVersionCreated()).thenReturn(indexVersionCreated); - - MapperService mapperService = Mockito.mock(MapperService.class); - Mockito.when(mapperService.documentMapper()).thenReturn(null); - Mockito.when(context.getMapperService()).thenReturn(mapperService); TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType(); ft.setName(TypeFieldMapper.NAME); - Query query = ft.termQuery("my_type", context); - assertEquals(new MatchNoDocsQuery(), query); - DocumentMapper mapper = Mockito.mock(DocumentMapper.class); - Mockito.when(mapper.type()).thenReturn("my_type"); - Mockito.when(mapperService.documentMapper()).thenReturn(mapper); - query = ft.termQuery("my_type", context); + Query query = ft.termQuery("_doc", context); assertEquals(new MatchAllDocsQuery(), query); - Mockito.when(mapperService.hasNested()).thenReturn(true); - query = ft.termQuery("my_type", context); - assertEquals(Queries.newNonNestedFilter(), query); - - mapper = Mockito.mock(DocumentMapper.class); - Mockito.when(mapper.type()).thenReturn("other_type"); - Mockito.when(mapperService.documentMapper()).thenReturn(mapper); - query = ft.termQuery("my_type", context); + query = ft.termQuery("other_type", context); assertEquals(new MatchNoDocsQuery(), query); } } diff --git a/server/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java index c9c43a1e90186..852c64fd6fdfd 100644 --- a/server/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java @@ -556,13 +556,6 @@ public void testParseRelation() { assertEquals(ShapeRelation.INTERSECTS, builder.relation()); } - public void testTypeField() throws IOException { - RangeQueryBuilder builder = QueryBuilders.rangeQuery("_type") - .from("value1"); - builder.doToQuery(createShardContext()); - assertWarnings(QueryShardContext.TYPES_DEPRECATION_MESSAGE); - } - /** * Range queries should generally be cacheable, at least the ones we create randomly. * This test makes sure we also test the non-cacheable cases regularly. diff --git a/server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java index 9482e0bb87d49..3eff0d1f819a5 100644 --- a/server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.Locale; import java.util.Map; import static org.hamcrest.Matchers.equalTo; @@ -75,7 +76,9 @@ protected void doAssertLuceneQuery(WildcardQueryBuilder queryBuilder, Query quer assertThat(wildcardQuery.getField(), equalTo(expectedFieldName)); assertThat(wildcardQuery.getTerm().field(), equalTo(expectedFieldName)); - assertThat(wildcardQuery.getTerm().text(), equalTo(queryBuilder.value())); + // wildcard queries get normalized + String text = wildcardQuery.getTerm().text().toLowerCase(Locale.ROOT); + assertThat(text, equalTo(text)); } else { Query expected = new MatchNoDocsQuery("unknown field [" + expectedFieldName + "]"); assertEquals(expected, query); @@ -138,14 +141,14 @@ public void testTypeField() throws IOException { builder.doToQuery(createShardContext()); assertWarnings(QueryShardContext.TYPES_DEPRECATION_MESSAGE); } - + public void testRewriteIndexQueryToMatchNone() throws IOException { WildcardQueryBuilder query = new WildcardQueryBuilder("_index", "does_not_exist"); QueryShardContext queryShardContext = createShardContext(); QueryBuilder rewritten = query.rewrite(queryShardContext); assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class)); - } - + } + public void testRewriteIndexQueryNotMatchNone() throws IOException { String fullIndexName = getIndex().getName(); String firstHalfOfIndexName = fullIndexName.substring(0,fullIndexName.length()/2); diff --git a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index 51bb41f238950..404e8f92ec8f4 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.query; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.join.ScoreMode; @@ -31,6 +33,7 @@ import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.bootstrap.JavaVersion; import org.elasticsearch.common.document.DocumentField; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.time.DateFormatter; @@ -38,6 +41,9 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.NormalizingCharFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; @@ -46,11 +52,14 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder; +import org.elasticsearch.index.query.WildcardQueryBuilder; import org.elasticsearch.index.query.WrapperQueryBuilder; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.TermsLookup; +import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; +import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; @@ -62,16 +71,20 @@ import org.hamcrest.Matcher; import java.io.IOException; +import java.io.Reader; import java.time.Instant; import java.time.ZoneId; import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; +import java.util.Arrays; import java.util.Collection; -import java.util.Collections; +import java.util.Map; import java.util.Random; import java.util.concurrent.ExecutionException; +import java.util.regex.Pattern; +import static java.util.Collections.singletonMap; import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; @@ -121,7 +134,7 @@ public class SearchQueryIT extends ESIntegTestCase { @Override protected Collection> nodePlugins() { - return Collections.singleton(InternalSettingsPlugin.class); + return Arrays.asList(InternalSettingsPlugin.class, MockAnalysisPlugin.class); } @Override @@ -1763,6 +1776,107 @@ public void testFieldAliasesForMetaFields() throws Exception { } + /** + * Test that wildcard queries on keyword fields get normalized + */ + public void testWildcardQueryNormalizationOnKeywordField() { + assertAcked(prepareCreate("test") + .setSettings(Settings.builder() + .put("index.analysis.normalizer.lowercase_normalizer.type", "custom") + .putList("index.analysis.normalizer.lowercase_normalizer.filter", "lowercase") + .build()) + .setMapping("field1", "type=keyword,normalizer=lowercase_normalizer")); + client().prepareIndex("test").setId("1").setSource("field1", "Bbb Aaa").get(); + refresh(); + + { + WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*"); + SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get(); + assertHitCount(searchResponse, 1L); + + wildCardQuery = wildcardQuery("field1", "bb*"); + searchResponse = client().prepareSearch().setQuery(wildCardQuery).get(); + assertHitCount(searchResponse, 1L); + } + } + + /** + * Test that wildcard queries on text fields get normalized + */ + public void testWildcardQueryNormalizationOnTextField() { + assertAcked(prepareCreate("test") + .setSettings(Settings.builder() + .put("index.analysis.analyzer.lowercase_analyzer.type", "custom") + .put("index.analysis.analyzer.lowercase_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.lowercase_analyzer.filter", "lowercase") + .build()) + .setMapping("field1", "type=text,analyzer=lowercase_analyzer")); + client().prepareIndex("test").setId("1").setSource("field1", "Bbb Aaa").get(); + refresh(); + + { + WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*"); + SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get(); + assertHitCount(searchResponse, 1L); + + wildCardQuery = wildcardQuery("field1", "bb*"); + searchResponse = client().prepareSearch().setQuery(wildCardQuery).get(); + assertHitCount(searchResponse, 1L); + } + } + + /** + * Reserved characters should be excluded when the normalization is applied for keyword fields. + * See https://github.com/elastic/elasticsearch/issues/46300 for details. + */ + public void testWildcardQueryNormalizationKeywordSpecialCharacters() { + assertAcked(prepareCreate("test") + .setSettings(Settings.builder().put("index.analysis.char_filter.no_wildcard.type", "mock_pattern_replace") + .put("index.analysis.normalizer.no_wildcard.type", "custom") + .put("index.analysis.normalizer.no_wildcard.char_filter", "no_wildcard").build()) + .setMapping("field", "type=keyword,normalizer=no_wildcard")); + client().prepareIndex("test").setId("1").setSource("field", "label-1").get(); + refresh(); + + WildcardQueryBuilder wildCardQuery = wildcardQuery("field", "la*"); + SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get(); + assertHitCount(searchResponse, 1L); + + wildCardQuery = wildcardQuery("field", "la*el-?"); + searchResponse = client().prepareSearch().setQuery(wildCardQuery).get(); + assertHitCount(searchResponse, 1L); + } + + public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin { + + @Override + public Map> getCharFilters() { + return singletonMap("mock_pattern_replace", (indexSettings, env, name, settings) -> { + class Factory implements NormalizingCharFilterFactory { + + private final Pattern pattern = Regex.compile("[\\*\\?]", null); + + @Override + public String name() { + return name; + } + + @Override + public Reader create(Reader reader) { + return new PatternReplaceCharFilter(pattern, "", reader); + } + } + return new Factory(); + }); + } + + @Override + public Map> getTokenizers() { + return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name, + () -> new MockTokenizer(MockTokenizer.KEYWORD, false))); + } + } + /** * Test correct handling {@link SpanBooleanQueryRewriteWithMaxClause#rewrite(IndexReader, MultiTermQuery)}. That rewrite method is e.g. * set for fuzzy queries with "constant_score" rewrite nested inside a `span_multi` query and would cause NPEs due to an unset