Skip to content

Commit aba9113

Browse files
author
Christoph Büscher
authored
Fix case sensitivity rules for wildcard queries on text fields (#71751) (#72224)
Wildcard queries on text fields should not apply the fields analyzer to the search query. However, we accidentally enabled this in #53127 by moving the query normalization to the StringFieldType super type. This change fixes this by separating the notion of normalization and case insensitivity (as implemented in the `case_insensitive` flag). This is done because we still need to maintain normalization of the query sting when the wildcard query method on the field type is requested from the `query_string` query parser. Wildcard queries on keyword fields should also continue to apply the fields normalizer, regardless of whether the `case_insensitive` is set, because normalization could involve something else than lowercasing (e.g. substituting umlauts like in the GermanNormalizationFilter). Closes #71403
1 parent 66e8490 commit aba9113

File tree

9 files changed

+96
-5
lines changed

9 files changed

+96
-5
lines changed

plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.ibm.icu.text.RawCollationKey;
1313
import com.ibm.icu.text.RuleBasedCollator;
1414
import com.ibm.icu.util.ULocale;
15+
1516
import org.apache.lucene.document.Field;
1617
import org.apache.lucene.document.FieldType;
1718
import org.apache.lucene.document.SortedSetDocValuesField;

server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1924,7 +1924,7 @@ public void testWildcardQueryNormalizationOnKeywordField() {
19241924
}
19251925

19261926
/**
1927-
* Test that wildcard queries on text fields get normalized
1927+
* Test that wildcard queries on text fields don't get normalized
19281928
*/
19291929
public void testWildcardQueryNormalizationOnTextField() {
19301930
assertAcked(prepareCreate("test")
@@ -1940,6 +1940,11 @@ public void testWildcardQueryNormalizationOnTextField() {
19401940
{
19411941
WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
19421942
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
1943+
assertHitCount(searchResponse, 0L);
1944+
1945+
// the following works not because of normalization but because of the `case_insensitive` parameter
1946+
wildCardQuery = wildcardQuery("field1", "Bb*").caseInsensitive(true);
1947+
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
19431948
assertHitCount(searchResponse, 1L);
19441949

19451950
wildCardQuery = wildcardQuery("field1", "bb*");

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import org.apache.lucene.document.SortedSetDocValuesField;
1616
import org.apache.lucene.index.IndexOptions;
1717
import org.apache.lucene.index.LeafReaderContext;
18+
import org.apache.lucene.search.MultiTermQuery;
19+
import org.apache.lucene.search.Query;
1820
import org.apache.lucene.util.BytesRef;
1921
import org.elasticsearch.common.lucene.Lucene;
2022
import org.elasticsearch.common.xcontent.XContentParser;
@@ -325,6 +327,19 @@ protected BytesRef indexedValueForSearch(Object value) {
325327
return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
326328
}
327329

330+
/**
331+
* Wildcard queries on keyword fields use the normalizer of the underlying field, regardless of their case sensitivity option
332+
*/
333+
@Override
334+
public Query wildcardQuery(
335+
String value,
336+
MultiTermQuery.RewriteMethod method,
337+
boolean caseInsensitive,
338+
SearchExecutionContext context
339+
) {
340+
return super.wildcardQuery(value, method, caseInsensitive, true, context);
341+
}
342+
328343
@Override
329344
public CollapseType collapseType() {
330345
return CollapseType.KEYWORD;
@@ -335,6 +350,7 @@ public CollapseType collapseType() {
335350
public int ignoreAbove() {
336351
return ignoreAbove;
337352
}
353+
338354
}
339355

340356
private final boolean indexed;

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
import org.elasticsearch.index.fielddata.IndexFieldData;
3737
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
3838
import org.elasticsearch.index.query.QueryRewriteContext;
39-
import org.elasticsearch.index.query.SearchExecutionContext;
4039
import org.elasticsearch.index.query.QueryShardException;
40+
import org.elasticsearch.index.query.SearchExecutionContext;
4141
import org.elasticsearch.search.DocValueFormat;
4242
import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
4343
import org.elasticsearch.search.lookup.SearchLookup;
@@ -248,6 +248,11 @@ public Query wildcardQuery(String value,
248248
+ "] which is of type [" + typeName() + "]");
249249
}
250250

251+
public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
252+
throw new QueryShardException(context, "Can only use wildcard queries on keyword, text and wildcard fields - not on [" + name
253+
+ "] which is of type [" + typeName() + "]");
254+
}
255+
251256
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
252257
@Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
253258
throw new QueryShardException(context, "Can only use regexp queries on keyword and text fields - not on [" + name

server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,30 @@ public static final String normalizeWildcardPattern(String fieldname, String val
113113

114114
@Override
115115
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) {
116+
return wildcardQuery(value, method, caseInsensitive, false, context);
117+
}
118+
119+
120+
@Override
121+
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
122+
return wildcardQuery(value, method, false, true, context);
123+
}
124+
125+
protected Query wildcardQuery(
126+
String value,
127+
MultiTermQuery.RewriteMethod method,
128+
boolean caseInsensitive,
129+
boolean shouldNormalize,
130+
SearchExecutionContext context
131+
) {
116132
failIfNotIndexed();
117133
if (context.allowExpensiveQueries() == false) {
118134
throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
119135
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
120136
}
121137

122138
Term term;
123-
if (getTextSearchInfo().getSearchAnalyzer() != null) {
139+
if (getTextSearchInfo().getSearchAnalyzer() != null && shouldNormalize) {
124140
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
125141
term = new Term(name(), value);
126142
} else {

server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
682682
if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) {
683683
throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
684684
}
685-
return currentFieldType.wildcardQuery(termStr, getMultiTermRewriteMethod(), context);
685+
return currentFieldType.normalizedWildcardQuery(termStr, getMultiTermRewriteMethod(), context);
686686
} catch (RuntimeException e) {
687687
if (lenient) {
688688
return newLenientFieldQuery(field, e);

server/src/test/java/org/elasticsearch/index/mapper/IndexFieldTypeTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import org.elasticsearch.common.regex.Regex;
1515
import org.elasticsearch.common.settings.Settings;
1616
import org.elasticsearch.index.IndexSettings;
17-
import org.elasticsearch.index.query.SearchExecutionContext;
1817
import org.elasticsearch.index.query.QueryShardException;
18+
import org.elasticsearch.index.query.SearchExecutionContext;
1919
import org.elasticsearch.test.ESTestCase;
2020

2121
import java.util.function.Predicate;

server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@
1919
import org.apache.lucene.search.TermInSetQuery;
2020
import org.apache.lucene.search.TermQuery;
2121
import org.apache.lucene.search.TermRangeQuery;
22+
import org.apache.lucene.search.WildcardQuery;
2223
import org.apache.lucene.util.BytesRef;
2324
import org.apache.lucene.util.automaton.Automata;
2425
import org.apache.lucene.util.automaton.Automaton;
26+
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
2527
import org.apache.lucene.util.automaton.Operations;
2628
import org.elasticsearch.ElasticsearchException;
2729
import org.elasticsearch.common.lucene.BytesRefs;
@@ -157,4 +159,45 @@ public void testFetchSourceValue() throws IOException {
157159
assertEquals(Collections.singletonList("42"), fetchSourceValue(fieldType, 42L));
158160
assertEquals(Collections.singletonList("true"), fetchSourceValue(fieldType, true));
159161
}
162+
163+
public void testWildcardQuery() {
164+
TextFieldType ft = createFieldType();
165+
166+
// case sensitive
167+
AutomatonQuery actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, false, MOCK_CONTEXT);
168+
AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
169+
assertEquals(expected, actual);
170+
assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
171+
172+
// case insensitive
173+
actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, true, MOCK_CONTEXT);
174+
expected = AutomatonQueries.caseInsensitiveWildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
175+
assertEquals(expected, actual);
176+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
177+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
178+
179+
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
180+
() -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
181+
assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
182+
ee.getMessage());
183+
}
184+
185+
/**
186+
* we use this e.g. in query string query parser to normalize terms on text fields
187+
*/
188+
public void testNormalizedWildcardQuery() {
189+
TextFieldType ft = createFieldType();
190+
191+
AutomatonQuery actual = (AutomatonQuery) ft.normalizedWildcardQuery("*Butterflies*", null, MOCK_CONTEXT);
192+
AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*butterflies*")));
193+
assertEquals(expected, actual);
194+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
195+
assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
196+
197+
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
198+
() -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
199+
assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
200+
ee.getMessage());
201+
}
202+
160203
}

x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ private WildcardFieldType(String name, String nullValue, int ignoreAbove,
265265
this.ignoreAbove = ignoreAbove;
266266
}
267267

268+
@Override
269+
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
270+
return wildcardQuery(value, method, false, context);
271+
}
272+
268273
@Override
269274
public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) {
270275

0 commit comments

Comments
 (0)