diff --git a/docs/changelog/83404.yaml b/docs/changelog/83404.yaml new file mode 100644 index 0000000000000..f81ada4d1ef0c --- /dev/null +++ b/docs/changelog/83404.yaml @@ -0,0 +1,5 @@ +pr: 83404 +summary: Implement all queries on doc-values only keyword fields +area: Mapping +type: enhancement +issues: [] diff --git a/docs/reference/mapping/params/doc-values.asciidoc b/docs/reference/mapping/params/doc-values.asciidoc index 0b4a2c7cf4b2f..a2ea57960edc2 100644 --- a/docs/reference/mapping/params/doc-values.asciidoc +++ b/docs/reference/mapping/params/doc-values.asciidoc @@ -19,7 +19,7 @@ with the __notable exception of `text` and `annotated_text` fields__. <>, <>, the <>, <>, <> and the <> -can also be queried using term or range-based queries +can also be queried when they are not <> but only have doc values enabled. Query performance on doc values is much slower than on index structures, but offers an interesting tradeoff between disk usage and query performance for diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index df77e897761c0..c73e77aab94a2 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -82,8 +82,7 @@ The following parameters are accepted by `keyword` fields: Should the field be quickly searchable? Accepts `true` (default) and `false`. `keyword` fields that only have <> - enabled can still be queried using term or range-based queries, - albeit slower. + enabled can still be queried, albeit slower. <>:: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml index f497acfce5626..b817c5dfdb2cb 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml @@ -289,6 +289,60 @@ setup: body: { query: { range: { keyword: { gte: "key1" } } } } - length: { hits.hits: 2 } +--- +"Test fuzzy query on keyword field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { fuzzy: { keyword: { value: "kay1", fuzziness: 1 } } } } + - length: { hits.hits: 1 } + +--- +"Test prefix query on keyword field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { prefix: { keyword: { value: "key" } } } } + - length: { hits.hits: 2 } + +--- +"Test case insensitive term query on keyword field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { term: { keyword: { value: "KeY1", case_insensitive: true } } } } + - length: { hits.hits: 1 } + +--- +"Test wildcard query on keyword field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { wildcard: { keyword: { value: "k*1" } } } } + - length: { hits.hits: 1 } + +--- +"Test case insensitive wildcard query on keyword field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { wildcard: { keyword: { value: "K*1", case_insensitive: true } } } } + - length: { hits.hits: 1 } + +--- +"Test regexp query on keyword field where only doc values are enabled": + + - do: + search: + index: test + body: { query: { regexp: { keyword: { value: "k.*1" } } } } + - length: { hits.hits: 1 } + --- "Test match query on boolean field where only doc values are enabled": diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 756bc4206ac9a..c118f7bba157e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -31,8 +31,10 @@ import org.apache.lucene.util.automaton.MinimizationOperations; import org.apache.lucene.util.automaton.Operations; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.AutomatonQueries; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.FieldData; @@ -44,9 +46,15 @@ import org.elasticsearch.script.ScriptCompiler; import org.elasticsearch.script.StringFieldScript; import org.elasticsearch.script.field.KeywordDocValuesField; +import org.elasticsearch.script.field.SortedSetDocValuesStringFieldScript; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.lookup.FieldValues; import org.elasticsearch.search.lookup.SearchLookup; +import org.elasticsearch.search.runtime.StringScriptFieldFuzzyQuery; +import org.elasticsearch.search.runtime.StringScriptFieldPrefixQuery; +import org.elasticsearch.search.runtime.StringScriptFieldRegexpQuery; +import org.elasticsearch.search.runtime.StringScriptFieldTermQuery; +import org.elasticsearch.search.runtime.StringScriptFieldWildcardQuery; import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; @@ -388,6 +396,68 @@ public Query rangeQuery( } } + @Override + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + SearchExecutionContext context + ) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context); + } else { + return StringScriptFieldFuzzyQuery.build( + new Script(""), + ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + name(), + indexedValueForSearch(value).utf8ToString(), + fuzziness.asDistance(BytesRefs.toString(value)), + prefixLength, + transpositions + ); + } + } + + @Override + public Query prefixQuery( + String value, + MultiTermQuery.RewriteMethod method, + boolean caseInsensitive, + SearchExecutionContext context + ) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.prefixQuery(value, method, caseInsensitive, context); + } else { + return new StringScriptFieldPrefixQuery( + new Script(""), + ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + name(), + indexedValueForSearch(value).utf8ToString(), + caseInsensitive + ); + } + } + + @Override + public Query termQueryCaseInsensitive(Object value, SearchExecutionContext context) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.termQueryCaseInsensitive(value, context); + } else { + return new StringScriptFieldTermQuery( + new Script(""), + ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + name(), + indexedValueForSearch(value).utf8ToString(), + true + ); + } + } + @Override public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext, String searchAfter) throws IOException { @@ -521,7 +591,72 @@ public Query wildcardQuery( boolean caseInsensitive, SearchExecutionContext context ) { - return super.wildcardQuery(value, method, caseInsensitive, true, context); + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.wildcardQuery(value, method, caseInsensitive, true, context); + } else { + if (getTextSearchInfo().getSearchAnalyzer() != null) { + value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer()); + } else { + value = indexedValueForSearch(value).utf8ToString(); + } + return new StringScriptFieldWildcardQuery( + new Script(""), + ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + name(), + value, + caseInsensitive + ); + } + } + + @Override + public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.normalizedWildcardQuery(value, method, context); + } else { + if (getTextSearchInfo().getSearchAnalyzer() != null) { + value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer()); + } else { + value = indexedValueForSearch(value).utf8ToString(); + } + return new StringScriptFieldWildcardQuery( + new Script(""), + ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + name(), + value, + false + ); + } + } + + @Override + public Query regexpQuery( + String value, + int syntaxFlags, + int matchFlags, + int maxDeterminizedStates, + MultiTermQuery.RewriteMethod method, + SearchExecutionContext context + ) { + failIfNotIndexedNorDocValuesFallback(context); + if (isIndexed()) { + return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + } else { + if (matchFlags != 0) { + throw new IllegalArgumentException("Match flags not yet implemented [" + matchFlags + "]"); + } + return new StringScriptFieldRegexpQuery( + new Script(""), + ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + name(), + indexedValueForSearch(value).utf8ToString(), + syntaxFlags, + matchFlags, + maxDeterminizedStates + ); + } } @Override diff --git a/server/src/main/java/org/elasticsearch/script/field/SortedSetDocValuesStringFieldScript.java b/server/src/main/java/org/elasticsearch/script/field/SortedSetDocValuesStringFieldScript.java new file mode 100644 index 0000000000000..72313aebde15a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/field/SortedSetDocValuesStringFieldScript.java @@ -0,0 +1,58 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.script.field; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.script.StringFieldScript; +import org.elasticsearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.Map; + +public class SortedSetDocValuesStringFieldScript extends StringFieldScript { + private final SortedSetDocValues sortedSetDocValues; + + boolean hasValue = false; + + public SortedSetDocValuesStringFieldScript(String fieldName, SearchLookup searchLookup, LeafReaderContext ctx) { + super(fieldName, Map.of(), searchLookup, ctx); + try { + sortedSetDocValues = DocValues.getSortedSet(ctx.reader(), fieldName); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void setDocument(int docID) { + try { + hasValue = sortedSetDocValues.advanceExact(docID); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void execute() { + try { + if (hasValue) { + long ord; + while ((ord = sortedSetDocValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + BytesRef bytesRef = sortedSetDocValues.lookupOrd(ord); + emit(bytesRef.utf8ToString()); + } + } + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java index 3037d7d3c9703..d6c01f123130c 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java @@ -167,12 +167,12 @@ public void testRegexpQuery() { MappedFieldType ft = new KeywordFieldType("field"); assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT)); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage()); ElasticsearchException ee = expectThrows( ElasticsearchException.class, @@ -188,12 +188,12 @@ public void testFuzzyQuery() { ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT) ); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage()); ElasticsearchException ee = expectThrows( ElasticsearchException.class,