Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
89c6be5
Directly add fields to doc in FlattenedFieldParser
jordan-powers Jan 5, 2026
ac64f0d
Add SeparateCount#addToFieldInDoc
jordan-powers Jan 5, 2026
a978bd0
Store flattened fields in binary doc values
jordan-powers Jan 5, 2026
3de1881
Implement flattened fields synthetic source for binary doc values
jordan-powers Jan 6, 2026
2164715
Update FlattenedFieldMapperTests
jordan-powers Jan 6, 2026
86ac0ad
Support IndexFieldData for binary flattened fields
jordan-powers Jan 7, 2026
6cfbaaa
Fix FlattenedFieldMapperTests#testSyntheticEmptyListNoDocValuesLoader
jordan-powers Jan 7, 2026
9171ba4
Fix FlattendFieldMapperTests
jordan-powers Jan 7, 2026
aaeaf77
Update docs/changelog/140246.yaml
jordan-powers Jan 7, 2026
383fa2c
Update BinaryKeyedFlattenedLeafFieldData javadoc
jordan-powers Jan 7, 2026
a3cb19b
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 7, 2026
f98a4a5
Fix ValuesSourceConfigTests#testFlattened
jordan-powers Jan 7, 2026
d9627a8
Fix AggregatorTestCase#testSupportedFieldTypes
jordan-powers Jan 7, 2026
1d46bec
Fix FlattenedFieldMapperTests#testIndexTimeFieldData
jordan-powers Jan 7, 2026
754edfa
Fix FlattenedFieldMapperTests#testSortShortcuts
jordan-powers Jan 7, 2026
3b721dd
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 7, 2026
22d2168
Merge remote-tracking branch 'origin/flattened-field-binary-doc-value…
jordan-powers Jan 7, 2026
6353524
Update docs/changelog/140246.yaml
jordan-powers Jan 8, 2026
a7bd1d8
Update DocumenetLeafReader#getDocValuesSkipper to return null
jordan-powers Jan 8, 2026
beee1f1
Only use binary doc values if TSDB codec is enabled
jordan-powers Jan 8, 2026
704dab2
Fix FlattenedFieldMapperTests
jordan-powers Jan 8, 2026
f58e263
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 8, 2026
a6bc38c
Merge remote-tracking branch 'origin/flattened-field-binary-doc-value…
jordan-powers Jan 8, 2026
fc12bb6
Fix more tests
jordan-powers Jan 8, 2026
ebd110a
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 8, 2026
8503e1f
Update KeywordFieldType to accept usesBinaryDocValues as a parameter
jordan-powers Jan 8, 2026
40e92bc
Pass usesBinaryDocValues in to flattened keyed field type
jordan-powers Jan 8, 2026
8b5720c
Remove unused FlattenedFieldMapperTests#getDocValuesField
jordan-powers Jan 8, 2026
c266a09
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 8, 2026
7a756ca
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 9, 2026
055e9be
Rename to addToSeparateCountMultiBinaryFieldInDoc
jordan-powers Jan 9, 2026
263cfea
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 13, 2026
fb69560
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 13, 2026
f2f518e
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 14, 2026
fcc7019
Make some classes final
jordan-powers Jan 15, 2026
301071e
Merge remote-tracking branch 'upstream/main' into flattened-field-bin…
jordan-powers Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/140246.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 140246
summary: Store flattened field data in binary doc values
area: Mapping
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion MOD_ROUTING_FUNCTION = def(9_064_0_00, Version.LUCENE_10_3_2);
public static final IndexVersion FALLBACK_TEXT_FIELDS_BINARY_DOC_VALUES_FORMAT_CHECK = def(9_065_0_00, Version.LUCENE_10_3_2);
public static final IndexVersion READ_SI_FILES_FROM_MEMORY_FOR_HOLLOW_COMMITS = def(9_066_0_00, Version.LUCENE_10_3_2);
public static final IndexVersion FLATTENED_FIELD_TSDB_CODEC_USE_BINARY_DOC_VALUES = def(9_067_0_00, Version.LUCENE_10_3_2);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,15 @@ public static MultiValuedSortedBinaryDocValues from(LeafReader leafReader, Strin
if (counts == null) {
return new IntegratedCounts(values);
} else {
Sparsity sparsity = Sparsity.UNKNOWN;
ValueMode valueMode = ValueMode.UNKNOWN;

DocValuesSkipper countsSkipper = leafReader.getDocValuesSkipper(countsFieldName);
Sparsity sparsity = countsSkipper.docCount() == leafReader.maxDoc() ? Sparsity.DENSE : Sparsity.SPARSE;
ValueMode valueMode = countsSkipper.maxValue() == 1 ? ValueMode.SINGLE_VALUED : ValueMode.MULTI_VALUED;
if (countsSkipper != null) {
sparsity = countsSkipper.docCount() == leafReader.maxDoc() ? Sparsity.DENSE : Sparsity.SPARSE;
valueMode = countsSkipper.maxValue() == 1 ? ValueMode.SINGLE_VALUED : ValueMode.MULTI_VALUED;
}

return new SeparateCounts(values, counts, sparsity, valueMode);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public NumericDocValues getNormValues(String field) throws IOException {

@Override
public DocValuesSkipper getDocValuesSkipper(String s) throws IOException {
throw new UnsupportedOperationException();
return null;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.InvertableType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DocValuesSkipIndexType;
Expand Down Expand Up @@ -112,7 +111,6 @@
import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING;
import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;
import static org.elasticsearch.index.mapper.FieldMapper.Parameter.useTimeSeriesDocValuesSkippers;
import static org.elasticsearch.index.mapper.MultiValuedBinaryDocValuesField.SeparateCount.COUNT_FIELD_SUFFIX;

/**
* A field mapper for keywords. This mapper accepts strings and indexes them as-is.
Expand Down Expand Up @@ -345,6 +343,10 @@ public DocValuesParameter.Values docValuesParameters() {
return docValuesParameters.getValue();
}

boolean usesBinaryDocValues() {
return docValuesParameters().enabled() && docValuesParameters().cardinality() == DocValuesParameter.Values.Cardinality.HIGH;
}

public SimilarityProvider similarity() {
return this.similarity.get();
}
Expand Down Expand Up @@ -544,7 +546,7 @@ public static final class KeywordFieldType extends TextFamilyFieldType {
private final boolean eagerGlobalOrdinals;
private final FieldValues<String> scriptValues;
private final boolean isDimension;
private final DocValuesParameter.Values docValuesParameters;
private final boolean usesBinaryDocValues;

public KeywordFieldType(
String name,
Expand Down Expand Up @@ -573,22 +575,32 @@ public KeywordFieldType(
this.nullValue = builder.nullValue.getValue();
this.scriptValues = builder.scriptValues();
this.isDimension = builder.dimension.getValue();
this.docValuesParameters = builder.docValuesParameters();
this.usesBinaryDocValues = builder.usesBinaryDocValues();
}

public KeywordFieldType(String name) {
this(name, true, true, Collections.emptyMap());
this(name, true, true, false, Collections.emptyMap());
}

public KeywordFieldType(String name, boolean isIndexed, boolean hasDocValues, Map<String, String> meta) {
this(name, isIndexed, hasDocValues, false, meta);
}

public KeywordFieldType(
String name,
boolean isIndexed,
boolean hasDocValues,
boolean usesBinaryDocValues,
Map<String, String> meta
) {
super(name, IndexType.terms(isIndexed, hasDocValues), false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta, false, false);
this.normalizer = Lucene.KEYWORD_ANALYZER;
this.ignoreAbove = IGNORE_ABOVE_DEFAULT;
this.nullValue = null;
this.eagerGlobalOrdinals = false;
this.scriptValues = null;
this.isDimension = false;
this.docValuesParameters = DEFAULT_DOC_VALUES_PARAMS;
this.usesBinaryDocValues = usesBinaryDocValues;
}

public KeywordFieldType(String name, FieldType fieldType, boolean isSyntheticSource) {
Expand All @@ -607,7 +619,7 @@ public KeywordFieldType(String name, FieldType fieldType, boolean isSyntheticSou
this.eagerGlobalOrdinals = false;
this.scriptValues = null;
this.isDimension = false;
this.docValuesParameters = DEFAULT_DOC_VALUES_PARAMS;
this.usesBinaryDocValues = false;
}

public KeywordFieldType(String name, NamedAnalyzer analyzer) {
Expand All @@ -626,7 +638,11 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) {
this.eagerGlobalOrdinals = false;
this.scriptValues = null;
this.isDimension = false;
this.docValuesParameters = DEFAULT_DOC_VALUES_PARAMS;
this.usesBinaryDocValues = false;
}

public boolean usesBinaryDocValues() {
return usesBinaryDocValues;
}

@Override
Expand All @@ -639,7 +655,7 @@ public Query termQuery(Object value, SearchExecutionContext context) {
failIfNotIndexedNorDocValuesFallback(context);
if (indexType.hasTerms()) {
return super.termQuery(value, context);
} else if (storedInBinaryDocValues()) {
} else if (usesBinaryDocValues) {
return new SlowCustomBinaryDocValuesTermQuery(name(), indexedValueForSearch(value));
} else {
return SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value));
Expand All @@ -651,7 +667,7 @@ public Query termsQuery(Collection<?> values, SearchExecutionContext context) {
failIfNotIndexedNorDocValuesFallback(context);
if (indexType.hasTerms()) {
return super.termsQuery(values, context);
} else if (storedInBinaryDocValues()) {
} else if (usesBinaryDocValues) {
return new StringScriptFieldTermsQuery(
new Script(""),
ctx -> new SortedBinaryDocValuesStringFieldScript(name(), context.lookup(), ctx),
Expand All @@ -675,7 +691,7 @@ public Query rangeQuery(
failIfNotIndexedNorDocValuesFallback(context);
if (indexType.hasTerms()) {
return super.rangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, context);
} else if (storedInBinaryDocValues()) {
} else if (usesBinaryDocValues) {
return new StringScriptFieldRangeQuery(
new Script(""),
ctx -> new SortedBinaryDocValuesStringFieldScript(name(), context.lookup(), ctx),
Expand Down Expand Up @@ -709,7 +725,7 @@ public Query fuzzyQuery(
failIfNotIndexedNorDocValuesFallback(context);
if (indexType.hasTerms()) {
return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context, rewriteMethod);
} else if (storedInBinaryDocValues()) {
} else if (usesBinaryDocValues) {
return StringScriptFieldFuzzyQuery.build(
new Script(""),
ctx -> new SortedBinaryDocValuesStringFieldScript(name(), context.lookup(), ctx),
Expand Down Expand Up @@ -741,7 +757,7 @@ public Query prefixQuery(
failIfNotIndexedNorDocValuesFallback(context);
if (indexType.hasTerms()) {
return super.prefixQuery(value, method, caseInsensitive, context);
} else if (storedInBinaryDocValues()) {
} else if (usesBinaryDocValues) {
return new StringScriptFieldPrefixQuery(
new Script(""),
ctx -> new SortedBinaryDocValuesStringFieldScript(name(), context.lookup(), ctx),
Expand Down Expand Up @@ -769,7 +785,7 @@ public Query termQueryCaseInsensitive(Object value, SearchExecutionContext conte
failIfNotIndexedNorDocValuesFallback(context);
if (indexType.hasTerms()) {
return super.termQueryCaseInsensitive(value, context);
} else if (storedInBinaryDocValues()) {
} else if (usesBinaryDocValues) {
return new StringScriptFieldTermQuery(
new Script(""),
ctx -> new SortedBinaryDocValuesStringFieldScript(name(), context.lookup(), ctx),
Expand All @@ -794,7 +810,7 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi
if (indexType.hasTerms()) {
terms = MultiTerms.getTerms(reader, name());
} else if (hasDocValues()) {
if (storedInBinaryDocValues()) {
if (usesBinaryDocValues) {
throw new UnsupportedOperationException("TODO");
} else {
terms = SortedSetDocValuesTerms.getTerms(reader, name());
Expand Down Expand Up @@ -828,10 +844,6 @@ public String typeName() {
return CONTENT_TYPE;
}

public boolean storedInBinaryDocValues() {
return docValuesParameters.enabled() && docValuesParameters.cardinality() == DocValuesParameter.Values.Cardinality.HIGH;
}

@Override
public boolean eagerGlobalOrdinals() {
return eagerGlobalOrdinals;
Expand All @@ -846,7 +858,7 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) {
BlockLoaderFunctionConfig cfg = blContext.blockLoaderFunctionConfig();
if (cfg == null) {
if (storedInBinaryDocValues()) {
if (usesBinaryDocValues) {
return new BytesRefsFromBinaryMultiSeparateCountBlockLoader(name());
} else {
return new BytesRefsFromOrdsBlockLoader(name());
Expand Down Expand Up @@ -986,7 +998,7 @@ protected BytesRef storedToBytesRef(Object stored) {
}

private IndexFieldData.Builder fieldDataFromDocValues() {
if (storedInBinaryDocValues()) {
if (usesBinaryDocValues) {
return new BytesBinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD, KeywordDocValuesField::new);
} else {
return new SortedSetOrdinalsIndexFieldData.Builder(
Expand Down Expand Up @@ -1076,7 +1088,7 @@ public Query wildcardQuery(
value = indexedValueForSearch(value).utf8ToString();
}

if (storedInBinaryDocValues()) {
if (usesBinaryDocValues) {
return new SlowCustomBinaryDocValuesWildcardQuery(name(), value, caseInsensitive);
}

Expand All @@ -1102,7 +1114,7 @@ public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod
value = indexedValueForSearch(value).utf8ToString();
}

if (storedInBinaryDocValues()) {
if (usesBinaryDocValues) {
return new StringScriptFieldWildcardQuery(
new Script(""),
ctx -> new SortedBinaryDocValuesStringFieldScript(name(), context.lookup(), ctx),
Expand Down Expand Up @@ -1134,7 +1146,7 @@ public Query regexpQuery(
throw new IllegalArgumentException("Match flags not yet implemented [" + matchFlags + "]");
}

if (storedInBinaryDocValues()) {
if (usesBinaryDocValues) {
return new StringScriptFieldRegexpQuery(
new Script(""),
ctx -> new SortedBinaryDocValuesStringFieldScript(name(), context.lookup(), ctx),
Expand Down Expand Up @@ -1365,20 +1377,13 @@ private boolean indexValue(DocumentParserContext context, XContentString value)
throw new IllegalArgumentException(msg);
}

if (fieldType().storedInBinaryDocValues()) {
if (fieldType().usesBinaryDocValues()) {
assert fieldType.docValuesType() == DocValuesType.NONE;

var field = (MultiValuedBinaryDocValuesField.SeparateCount) context.doc().getByKey(fieldType().name());
var countField = (NumericDocValuesField) context.doc().getByKey(fieldType().name() + COUNT_FIELD_SUFFIX);
if (field == null) {
field = new MultiValuedBinaryDocValuesField.SeparateCount(fieldType().name(), false);
context.doc().addWithKey(fieldType().name(), field);
countField = NumericDocValuesField.indexedField(field.countFieldName(), -1); // dummy value
context.doc().addWithKey(countField.name(), countField);
}

field.add(binaryValue);
countField.setLongValue(field.count());
MultiValuedBinaryDocValuesField.SeparateCount.addToSeparateCountMultiBinaryFieldInDoc(
context.doc(),
fieldType().name(),
binaryValue
);
}

// If we're using binary doc values, then the values are stored in a separate MultiValuedBinaryDocValuesField (see above)
Expand Down Expand Up @@ -1483,8 +1488,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
return SyntheticSourceSupport.FALLBACK;
}

boolean docValuesSupportNativeSyntheticSource = fieldType().storedInBinaryDocValues() == false
|| sourceKeepMode == SourceKeepMode.NONE;
boolean docValuesSupportNativeSyntheticSource = fieldType().usesBinaryDocValues() == false || sourceKeepMode == SourceKeepMode.NONE;

if (fieldType.stored() || (docValuesParameters.enabled() && docValuesSupportNativeSyntheticSource)) {
return new SyntheticSourceSupport.Native(() -> syntheticFieldLoader(fullPath(), leafName()));
Expand All @@ -1510,7 +1514,7 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
}
});
} else if (docValuesParameters.enabled()) {
if (fieldType().storedInBinaryDocValues() == false) {
if (fieldType().usesBinaryDocValues() == false) {
if (offsetsFieldName != null) {
layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName));
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
Expand Down Expand Up @@ -125,5 +126,19 @@ public BytesRef binaryValue() {
public String countFieldName() {
return name() + COUNT_FIELD_SUFFIX;
}

public static void addToSeparateCountMultiBinaryFieldInDoc(LuceneDocument doc, String fieldName, BytesRef binaryValue) {
var field = (SeparateCount) doc.getByKey(fieldName);
var countField = (NumericDocValuesField) doc.getByKey(fieldName + COUNT_FIELD_SUFFIX);
if (field == null) {
field = new SeparateCount(fieldName, false);
countField = NumericDocValuesField.indexedField(field.countFieldName(), -1); // dummy value
doc.addWithKey(field.name(), field);
doc.addWithKey(countField.name(), countField);
}

field.add(binaryValue);
countField.setLongValue(field.count());
}
}
}
Loading