Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.ibm.icu.text.RawCollationKey;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.util.ULocale;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1806,7 +1806,7 @@ public void testWildcardQueryNormalizationOnTextField() {
{
WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);
assertHitCount(searchResponse, 0L);

wildCardQuery = wildcardQuery("field1", "bb*");
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a test where the search string is mixed case but set wildCardQuery.caseInsensitive(true)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.xcontent.XContentParser;
Expand Down Expand Up @@ -322,6 +324,19 @@ protected BytesRef indexedValueForSearch(Object value) {
return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
}

/**
* Wildcard queries on keyword fields use the normalizer of the underlying field, regardless of their case sensitivity option
*/
@Override
public Query wildcardQuery(
String value,
MultiTermQuery.RewriteMethod method,
boolean caseInsensitive,
SearchExecutionContext context
) {
return super.wildcardQuery(value, method, caseInsensitive, true, context);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe calling normalizedWildcardQuery here would make the intent more obvious plus help any tracing back of where we make use of normalized wildcard queries

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, but I left the caseInsensitive parameter out of the normalizedWildcardQuery signature on purpose, so I need to call the protected method that takes both arguments here. My thinking was that we use normalizedWildcardQuery only from QueryStringQueryParser where we don't have the caseInsensitive option. Maybe you see a different solution?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok. That makes sense.

}

@Override
public CollapseType collapseType() {
return CollapseType.KEYWORD;
Expand All @@ -332,6 +347,7 @@ public CollapseType collapseType() {
public int ignoreAbove() {
return ignoreAbove;
}

}

private final boolean indexed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
import org.elasticsearch.search.lookup.SearchLookup;
Expand Down Expand Up @@ -237,6 +237,11 @@ public Query wildcardQuery(String value,
+ "] which is of type [" + typeName() + "]");
}

public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
throw new QueryShardException(context, "Can only use wildcard queries on keyword, text and wildcard fields - not on [" + name
+ "] which is of type [" + typeName() + "]");
}

public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
@Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
throw new QueryShardException(context, "Can only use regexp queries on keyword and text fields - not on [" + name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,30 @@ public static final String normalizeWildcardPattern(String fieldname, String val

@Override
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) {
return wildcardQuery(value, method, caseInsensitive, false, context);
}


@Override
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
return wildcardQuery(value, method, false, true, context);
}

protected Query wildcardQuery(
String value,
MultiTermQuery.RewriteMethod method,
boolean caseInsensitive,
boolean shouldNormalize,
SearchExecutionContext context
) {
failIfNotIndexed();
if (context.allowExpensiveQueries() == false) {
throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
}

Term term;
if (getTextSearchInfo().getSearchAnalyzer() != null) {
if (getTextSearchInfo().getSearchAnalyzer() != null && shouldNormalize) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
term = new Term(name(), value);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) {
throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
}
return currentFieldType.wildcardQuery(termStr, getMultiTermRewriteMethod(), context);
return currentFieldType.normalizedWildcardQuery(termStr, getMultiTermRewriteMethod(), context);
} catch (RuntimeException e) {
if (lenient) {
return newLenientFieldQuery(field, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.test.ESTestCase;

import java.util.function.Predicate;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.lucene.BytesRefs;
Expand Down Expand Up @@ -158,4 +160,44 @@ public void testFetchSourceValue() throws IOException {
assertEquals(List.of("42"), fetchSourceValue(fieldType, 42L));
assertEquals(List.of("true"), fetchSourceValue(fieldType, true));
}

public void testWildcardQuery() {
TextFieldType ft = createFieldType();

// case sensitive
AutomatonQuery actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, false, MOCK_CONTEXT);
AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
assertEquals(expected, actual);
assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));

// case insensitive
actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, true, MOCK_CONTEXT);
expected = AutomatonQueries.caseInsensitiveWildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
assertEquals(expected, actual);
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));

ElasticsearchException ee = expectThrows(ElasticsearchException.class,
() -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
ee.getMessage());
}

/**
* we use this e.g. in query string query parser to normalize terms on text fields
*/
public void testNormalizedWildcardQuery() {
TextFieldType ft = createFieldType();

AutomatonQuery actual = (AutomatonQuery) ft.normalizedWildcardQuery("*Butterflies*", null, MOCK_CONTEXT);
AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*butterflies*")));
assertEquals(expected, actual);
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));

ElasticsearchException ee = expectThrows(ElasticsearchException.class,
() -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
ee.getMessage());
}
}