elastic · cbuescher · Apr 26, 2021 · Apr 12, 2021 · Apr 15, 2021 · Apr 15, 2021
diff --git a/...ysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java b/...ysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java
@@ -12,6 +12,7 @@
 import com.ibm.icu.text.RawCollationKey;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.util.ULocale;
+
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.SortedSetDocValuesField;

diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java
@@ -1806,7 +1806,7 @@ public void testWildcardQueryNormalizationOnTextField() {
          {
              WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
              SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
-             assertHitCount(searchResponse, 1L);
+             assertHitCount(searchResponse, 0L);
 
              wildCardQuery = wildcardQuery("field1", "bb*");
              searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -15,6 +15,8 @@
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.xcontent.XContentParser;
@@ -322,6 +324,19 @@ protected BytesRef indexedValueForSearch(Object value) {
             return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
         }
 
+        /**
+         * Wildcard queries on keyword fields use the normalizer of the underlying field, regardless of their case sensitivity option
+         */
+        @Override
+        public Query wildcardQuery(
+            String value,
+            MultiTermQuery.RewriteMethod method,
+            boolean caseInsensitive,
+            SearchExecutionContext context
+        ) {
+            return super.wildcardQuery(value, method, caseInsensitive, true, context);
+        }
+
         @Override
         public CollapseType collapseType() {
             return CollapseType.KEYWORD;
@@ -332,6 +347,7 @@ public CollapseType collapseType() {
         public int ignoreAbove() {
             return ignoreAbove;
         }
+
     }
 
     private final boolean indexed;

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
@@ -36,8 +36,8 @@
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
 import org.elasticsearch.index.query.QueryRewriteContext;
-import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.query.QueryShardException;
+import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
 import org.elasticsearch.search.lookup.SearchLookup;
@@ -237,6 +237,11 @@ public Query wildcardQuery(String value,
             + "] which is of type [" + typeName() + "]");
     }
 
+    public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
+        throw new QueryShardException(context, "Can only use wildcard queries on keyword, text and wildcard fields - not on [" + name
+            + "] which is of type [" + typeName() + "]");
+    }
+
     public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
         @Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
         throw new QueryShardException(context, "Can only use regexp queries on keyword and text fields - not on [" + name

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java
@@ -113,14 +113,30 @@ public static final String normalizeWildcardPattern(String fieldname, String val
 
     @Override
     public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) {
+        return wildcardQuery(value, method, caseInsensitive, false, context);
+    }
+
+
+    @Override
+    public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
+        return wildcardQuery(value, method, false, true, context);
+    }
+
+    protected Query wildcardQuery(
+        String value,
+        MultiTermQuery.RewriteMethod method,
+        boolean caseInsensitive,
+        boolean shouldNormalize,
+        SearchExecutionContext context
+    ) {
         failIfNotIndexed();
         if (context.allowExpensiveQueries() == false) {
             throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
                     ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
         }
 
         Term term;
-        if (getTextSearchInfo().getSearchAnalyzer() != null) {
+        if (getTextSearchInfo().getSearchAnalyzer() != null && shouldNormalize) {
             value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
             term = new Term(name(), value);
         } else {

diff --git a/server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java b/server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java
@@ -682,7 +682,7 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
             if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) {
                 throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
             }
-            return currentFieldType.wildcardQuery(termStr, getMultiTermRewriteMethod(), context);
+            return currentFieldType.normalizedWildcardQuery(termStr, getMultiTermRewriteMethod(), context);
         } catch (RuntimeException e) {
             if (lenient) {
                 return newLenientFieldQuery(field, e);

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IndexFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IndexFieldTypeTests.java
@@ -14,8 +14,8 @@
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.query.QueryShardException;
+import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.test.ESTestCase;
 
 import java.util.function.Predicate;

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java
@@ -19,9 +19,11 @@
 import org.apache.lucene.search.TermInSetQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.Operations;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.common.lucene.BytesRefs;
@@ -158,4 +160,44 @@ public void testFetchSourceValue() throws IOException {
         assertEquals(List.of("42"), fetchSourceValue(fieldType, 42L));
         assertEquals(List.of("true"), fetchSourceValue(fieldType, true));
     }
+
+    public void testWildcardQuery() {
+        TextFieldType ft = createFieldType();
+
+        // case sensitive
+        AutomatonQuery actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, false, MOCK_CONTEXT);
+        AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
+        assertEquals(expected, actual);
+        assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
+
+        // case insensitive
+        actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, true, MOCK_CONTEXT);
+        expected = AutomatonQueries.caseInsensitiveWildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
+        assertEquals(expected, actual);
+        assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
+        assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
+
+        ElasticsearchException ee = expectThrows(ElasticsearchException.class,
+                () -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
+        assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
+                ee.getMessage());
+    }
+
+    /**
+     * we use this e.g. in query string query parser to normalize terms on text fields
+     */
+    public void testNormalizedWildcardQuery() {
+        TextFieldType ft = createFieldType();
+
+        AutomatonQuery actual = (AutomatonQuery) ft.normalizedWildcardQuery("*Butterflies*", null, MOCK_CONTEXT);
+        AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*butterflies*")));
+        assertEquals(expected, actual);
+        assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
+        assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
+
+        ElasticsearchException ee = expectThrows(ElasticsearchException.class,
+                () -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
+        assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
+                ee.getMessage());
+    }
 }