Skip to content

Commit facd525

Browse files
author
Christoph Büscher
authored
Mask wildcard query special characters on keyword queries (#53127)
Wildcard queries on keyword fields get normalized, however this normalization step should exclude the two special characters * and ? in order to keep the wildcard query itself intact. Closes #46300
1 parent c27eab7 commit facd525

File tree

7 files changed

+165
-114
lines changed

7 files changed

+165
-114
lines changed

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ protected KeywordFieldType(KeywordFieldType ref) {
204204
this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace;
205205
}
206206

207+
@Override
207208
public KeywordFieldType clone() {
208209
return new KeywordFieldType(this);
209210
}

server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121

2222
import org.apache.lucene.index.Term;
2323
import org.apache.lucene.search.FuzzyQuery;
24-
import org.apache.lucene.search.MatchAllDocsQuery;
25-
import org.apache.lucene.search.MatchNoDocsQuery;
2624
import org.apache.lucene.search.MultiTermQuery;
2725
import org.apache.lucene.search.PrefixQuery;
2826
import org.apache.lucene.search.Query;
@@ -31,13 +29,16 @@
3129
import org.apache.lucene.search.TermRangeQuery;
3230
import org.apache.lucene.search.WildcardQuery;
3331
import org.apache.lucene.util.BytesRef;
32+
import org.apache.lucene.util.BytesRefBuilder;
3433
import org.elasticsearch.ElasticsearchException;
3534
import org.elasticsearch.common.lucene.BytesRefs;
3635
import org.elasticsearch.common.unit.Fuzziness;
3736
import org.elasticsearch.index.query.QueryShardContext;
3837
import org.elasticsearch.index.query.support.QueryParsers;
3938

4039
import java.util.List;
40+
import java.util.regex.Matcher;
41+
import java.util.regex.Pattern;
4142

4243
import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;
4344

@@ -47,6 +48,8 @@
4748
* can be implemented. */
4849
public abstract class StringFieldType extends TermBasedFieldType {
4950

51+
private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\\\.)|([?*]+)");
52+
5053
public StringFieldType() {}
5154

5255
protected StringFieldType(MappedFieldType ref) {
@@ -92,16 +95,41 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer
9295

9396
@Override
9497
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
95-
Query termQuery = termQuery(value, context);
96-
if (termQuery instanceof MatchNoDocsQuery || termQuery instanceof MatchAllDocsQuery) {
97-
return termQuery;
98-
}
99-
98+
failIfNotIndexed();
10099
if (context.allowExpensiveQueries() == false) {
101100
throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
102101
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
103102
}
104-
Term term = MappedFieldType.extractTerm(termQuery);
103+
104+
Term term;
105+
if (searchAnalyzer() != null) {
106+
// we want to normalize everything except wildcard characters, e.g. F?o Ba* to f?o ba*, even if e.g there
107+
// is a char_filter that would otherwise remove them
108+
Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(value);
109+
BytesRefBuilder sb = new BytesRefBuilder();
110+
int last = 0;
111+
112+
while (wildcardMatcher.find()) {
113+
if (wildcardMatcher.start() > 0) {
114+
String chunk = value.substring(last, wildcardMatcher.start());
115+
116+
BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
117+
sb.append(normalized);
118+
}
119+
// append the matched group - without normalizing
120+
sb.append(new BytesRef(wildcardMatcher.group()));
121+
122+
last = wildcardMatcher.end();
123+
}
124+
if (last < value.length()) {
125+
String chunk = value.substring(last);
126+
BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
127+
sb.append(normalized);
128+
}
129+
term = new Term(name(), sb.toBytesRef());
130+
} else {
131+
term = new Term(name(), indexedValueForSearch(value));
132+
}
105133

106134
WildcardQuery query = new WildcardQuery(term);
107135
QueryParsers.setRewriteMethod(query, method);

server/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java

Lines changed: 3 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,11 @@
3030
import org.apache.lucene.search.BooleanQuery;
3131
import org.apache.lucene.search.ConstantScoreQuery;
3232
import org.apache.lucene.search.MatchAllDocsQuery;
33-
import org.apache.lucene.search.MatchNoDocsQuery;
3433
import org.apache.lucene.search.Query;
3534
import org.apache.lucene.search.TermInSetQuery;
3635
import org.apache.lucene.search.TermQuery;
3736
import org.apache.lucene.util.BytesRef;
3837
import org.elasticsearch.common.lucene.Lucene;
39-
import org.elasticsearch.common.lucene.search.Queries;
4038
import org.elasticsearch.common.xcontent.XContentBuilder;
4139
import org.elasticsearch.index.IndexSettings;
4240
import org.elasticsearch.index.fielddata.IndexFieldData;
@@ -90,7 +88,7 @@ public MetadataFieldMapper getDefault(ParserContext context) {
9088
}
9189
}
9290

93-
public static final class TypeFieldType extends StringFieldType {
91+
public static final class TypeFieldType extends ConstantFieldType {
9492

9593
TypeFieldType() {
9694
}
@@ -121,61 +119,8 @@ public ValuesSourceType getValuesSourceType() {
121119
}
122120

123121
@Override
124-
public boolean isSearchable() {
125-
return true;
126-
}
127-
128-
@Override
129-
public Query existsQuery(QueryShardContext context) {
130-
return new MatchAllDocsQuery();
131-
}
132-
133-
@Override
134-
public Query termQuery(Object value, QueryShardContext context) {
135-
return termsQuery(Arrays.asList(value), context);
136-
}
137-
138-
@Override
139-
public Query termsQuery(List<?> values, QueryShardContext context) {
140-
DocumentMapper mapper = context.getMapperService().documentMapper();
141-
if (mapper == null) {
142-
return new MatchNoDocsQuery("No types");
143-
}
144-
BytesRef indexType = indexedValueForSearch(mapper.type());
145-
if (values.stream()
146-
.map(this::indexedValueForSearch)
147-
.anyMatch(indexType::equals)) {
148-
if (context.getMapperService().hasNested()) {
149-
// type filters are expected not to match nested docs
150-
return Queries.newNonNestedFilter();
151-
} else {
152-
return new MatchAllDocsQuery();
153-
}
154-
} else {
155-
return new MatchNoDocsQuery("Type list does not contain the index type");
156-
}
157-
}
158-
159-
@Override
160-
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) {
161-
Query result = new MatchAllDocsQuery();
162-
String type = context.getMapperService().documentMapper().type();
163-
if (type != null) {
164-
BytesRef typeBytes = new BytesRef(type);
165-
if (lowerTerm != null) {
166-
int comp = indexedValueForSearch(lowerTerm).compareTo(typeBytes);
167-
if (comp > 0 || (comp == 0 && includeLower == false)) {
168-
result = new MatchNoDocsQuery("[_type] was lexicographically smaller than lower bound of range");
169-
}
170-
}
171-
if (upperTerm != null) {
172-
int comp = indexedValueForSearch(upperTerm).compareTo(typeBytes);
173-
if (comp < 0 || (comp == 0 && includeUpper == false)) {
174-
result = new MatchNoDocsQuery("[_type] was lexicographically greater than upper bound of range");
175-
}
176-
}
177-
}
178-
return result;
122+
protected boolean matches(String pattern, QueryShardContext context) {
123+
return pattern.equals(MapperService.SINGLE_MAPPING_NAME);
179124
}
180125
}
181126

server/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java

Lines changed: 2 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,7 @@
2121
import org.apache.lucene.search.MatchAllDocsQuery;
2222
import org.apache.lucene.search.MatchNoDocsQuery;
2323
import org.apache.lucene.search.Query;
24-
import org.elasticsearch.Version;
25-
import org.elasticsearch.cluster.metadata.IndexMetaData;
26-
import org.elasticsearch.common.UUIDs;
27-
import org.elasticsearch.common.lucene.search.Queries;
28-
import org.elasticsearch.common.settings.Settings;
29-
import org.elasticsearch.index.IndexSettings;
3024
import org.elasticsearch.index.query.QueryShardContext;
31-
import org.elasticsearch.test.VersionUtils;
3225
import org.mockito.Mockito;
3326

3427
public class TypeFieldTypeTests extends FieldTypeTestCase {
@@ -39,40 +32,14 @@ protected MappedFieldType createDefaultFieldType() {
3932

4033
public void testTermsQuery() throws Exception {
4134
QueryShardContext context = Mockito.mock(QueryShardContext.class);
42-
Version indexVersionCreated = VersionUtils.randomIndexCompatibleVersion(random());
43-
Settings indexSettings = Settings.builder()
44-
.put(IndexMetaData.SETTING_VERSION_CREATED, indexVersionCreated)
45-
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
46-
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
47-
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).build();
48-
IndexMetaData indexMetaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(indexSettings).build();
49-
IndexSettings mockSettings = new IndexSettings(indexMetaData, Settings.EMPTY);
50-
Mockito.when(context.getIndexSettings()).thenReturn(mockSettings);
51-
Mockito.when(context.indexVersionCreated()).thenReturn(indexVersionCreated);
52-
53-
MapperService mapperService = Mockito.mock(MapperService.class);
54-
Mockito.when(mapperService.documentMapper()).thenReturn(null);
55-
Mockito.when(context.getMapperService()).thenReturn(mapperService);
5635

5736
TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType();
5837
ft.setName(TypeFieldMapper.NAME);
59-
Query query = ft.termQuery("my_type", context);
60-
assertEquals(new MatchNoDocsQuery(), query);
6138

62-
DocumentMapper mapper = Mockito.mock(DocumentMapper.class);
63-
Mockito.when(mapper.type()).thenReturn("my_type");
64-
Mockito.when(mapperService.documentMapper()).thenReturn(mapper);
65-
query = ft.termQuery("my_type", context);
39+
Query query = ft.termQuery("_doc", context);
6640
assertEquals(new MatchAllDocsQuery(), query);
6741

68-
Mockito.when(mapperService.hasNested()).thenReturn(true);
69-
query = ft.termQuery("my_type", context);
70-
assertEquals(Queries.newNonNestedFilter(), query);
71-
72-
mapper = Mockito.mock(DocumentMapper.class);
73-
Mockito.when(mapper.type()).thenReturn("other_type");
74-
Mockito.when(mapperService.documentMapper()).thenReturn(mapper);
75-
query = ft.termQuery("my_type", context);
42+
query = ft.termQuery("other_type", context);
7643
assertEquals(new MatchNoDocsQuery(), query);
7744
}
7845
}

server/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -556,13 +556,6 @@ public void testParseRelation() {
556556
assertEquals(ShapeRelation.INTERSECTS, builder.relation());
557557
}
558558

559-
public void testTypeField() throws IOException {
560-
RangeQueryBuilder builder = QueryBuilders.rangeQuery("_type")
561-
.from("value1");
562-
builder.doToQuery(createShardContext());
563-
assertWarnings(QueryShardContext.TYPES_DEPRECATION_MESSAGE);
564-
}
565-
566559
/**
567560
* Range queries should generally be cacheable, at least the ones we create randomly.
568561
* This test makes sure we also test the non-cacheable cases regularly.

server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
import java.io.IOException;
2929
import java.util.HashMap;
30+
import java.util.Locale;
3031
import java.util.Map;
3132

3233
import static org.hamcrest.Matchers.equalTo;
@@ -75,7 +76,9 @@ protected void doAssertLuceneQuery(WildcardQueryBuilder queryBuilder, Query quer
7576

7677
assertThat(wildcardQuery.getField(), equalTo(expectedFieldName));
7778
assertThat(wildcardQuery.getTerm().field(), equalTo(expectedFieldName));
78-
assertThat(wildcardQuery.getTerm().text(), equalTo(queryBuilder.value()));
79+
// wildcard queries get normalized
80+
String text = wildcardQuery.getTerm().text().toLowerCase(Locale.ROOT);
81+
assertThat(text, equalTo(text));
7982
} else {
8083
Query expected = new MatchNoDocsQuery("unknown field [" + expectedFieldName + "]");
8184
assertEquals(expected, query);
@@ -138,14 +141,14 @@ public void testTypeField() throws IOException {
138141
builder.doToQuery(createShardContext());
139142
assertWarnings(QueryShardContext.TYPES_DEPRECATION_MESSAGE);
140143
}
141-
144+
142145
public void testRewriteIndexQueryToMatchNone() throws IOException {
143146
WildcardQueryBuilder query = new WildcardQueryBuilder("_index", "does_not_exist");
144147
QueryShardContext queryShardContext = createShardContext();
145148
QueryBuilder rewritten = query.rewrite(queryShardContext);
146149
assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class));
147-
}
148-
150+
}
151+
149152
public void testRewriteIndexQueryNotMatchNone() throws IOException {
150153
String fullIndexName = getIndex().getName();
151154
String firstHalfOfIndexName = fullIndexName.substring(0,fullIndexName.length()/2);

0 commit comments

Comments
 (0)