diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java index fdb8107055709..ea45bb4f339a8 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java @@ -652,7 +652,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addIgnoredField(mappedFieldType.name()); if (isSourceSynthetic) { // Save a copy of the field so synthetic source can load it - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser())); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser()); } return; } else { @@ -685,7 +685,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addIgnoredField(mappedFieldType.name()); if (isSourceSynthetic) { // Save a copy of the field so synthetic source can load it - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser())); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser()); } return; } else { @@ -854,11 +854,16 @@ private SourceLoader.SyntheticFieldLoader docValuesSyntheticFieldLoader() { ) ); if (ignoreMalformed.value()) { - layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath())); + layers.add(CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexSettings.getIndexVersionCreated())); } return new CompositeSyntheticFieldLoader(leafName(), fullPath(), layers); } else { - return new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed.value()) { + return new SortedNumericDocValuesSyntheticFieldLoader( + fullPath(), + leafName(), + ignoreMalformed.value(), + indexSettings.getIndexVersionCreated() + ) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(decodeForSyntheticSource(value, scalingFactor)); diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index b4228a5bd30af..a9b439334be9f 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -117,4 +117,8 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task -> task.addAllowedWarningRegex("Use of the \\[max_size\\] rollover condition has been deprecated in favour of the \\[max_primary_shard_size\\] condition and will be removed in a later version") task.skipTest("search.vectors/42_knn_search_bbq_flat/Vector rescoring has same scoring as exact search for kNN section", "scores have changed slightly with native implementations") task.skipTest("search.vectors/41_knn_search_bbq_hnsw/Vector rescoring has same scoring as exact search for kNN section", "scores have changed slightly with native implementations") + task.skipTest( + "get/100_synthetic_source/fields with ignore_malformed", + "Malformed values are now stored in binary doc values which sort differently than stored fields" + ) }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml index 91a1d1bf9ef40..55151dc4bdbc6 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml @@ -799,8 +799,8 @@ fields with ignore_malformed: ip: - 10.10.1.1 - 192.8.1.2 - - hot garbage # fields saved by ignore_malformed are sorted after doc values - - 7 + - 7 # malformed values come after doc_values and are sorted by encoded byte representation + - hot garbage - is_false: fields - do: diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 0b0b4511b66d1..c6db5d65efe5c 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -223,6 +223,8 @@ private static Version parseUnchecked(String version) { public static final IndexVersion ID_FIELD_USE_ES812_POSTINGS_FORMAT = def(9_070_0_00, Version.LUCENE_10_3_2); public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID_94 = def(9_071_0_00, Version.LUCENE_10_3_2); public static final IndexVersion TIME_SERIES_DOC_VALUES_FORMAT_VERSION_3 = def(9_072_0_00, Version.LUCENE_10_3_2); + public static final IndexVersion STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES = def(9_073_0_00, Version.LUCENE_10_3_2); + /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java index 6adba143d99c2..acccfac06b001 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java @@ -17,7 +17,7 @@ import java.io.IOException; -public final class BinaryDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer { +public class BinaryDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer { private final String name; private SortedBinaryDocValues bytesValues; @@ -42,6 +42,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf } bytesValues = MultiValuedSortedBinaryDocValues.from(leafReader, name, docValues); + return docId -> { hasValue = bytesValues.advanceExact(docId); return hasValue; @@ -61,10 +62,17 @@ public void write(XContentBuilder b) throws IOException { for (int i = 0; i < bytesValues.docValueCount(); ++i) { BytesRef value = bytesValues.nextValue(); - b.utf8Value(value.bytes, value.offset, value.length); + writeValue(b, value); } } + /** + * Write a single value to the builder. Subclasses can override to change the write format. + */ + protected void writeValue(XContentBuilder b, BytesRef value) throws IOException { + b.utf8Value(value.bytes, value.offset, value.length); + } + @Override public String fieldName() { return name; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java index 31cbe60088eec..302bc8b1c0f7c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java @@ -571,7 +571,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addIgnoredField(mappedFieldType.name()); if (storeMalformedFields) { // Save a copy of the field so synthetic source can load it - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser())); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser()); } return; } else { @@ -659,11 +659,16 @@ private SourceLoader.SyntheticFieldLoader docValuesSyntheticFieldLoader() { ) ); if (ignoreMalformed.value()) { - layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath())); + layers.add(CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexSettings.getIndexVersionCreated())); } return new CompositeSyntheticFieldLoader(leafName(), fullPath(), layers); } else { - return new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed.value()) { + return new SortedNumericDocValuesSyntheticFieldLoader( + fullPath(), + leafName(), + ignoreMalformed.value(), + indexSettings.getIndexVersionCreated() + ) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(value == 1); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java index 8dff9b4f4d08b..3808d6f6e6a09 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java @@ -11,6 +11,8 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; @@ -173,11 +175,23 @@ default void reset() { } /** - * Layer that loads malformed values stored in a dedicated field with a conventional name. + * Returns the appropriate malformed values layer for the given index version. + * Uses binary doc values for new indices and stored fields for old indices. + */ + public static Layer malformedValuesLayer(String fieldName, IndexVersion indexVersion) { + if (indexVersion.onOrAfter(IndexVersions.STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES)) { + return new MalformedValuesBinaryDocValuesLayer(fieldName); + } else { + return new MalformedValuesStoredFieldLayer(fieldName); + } + } + + /** + * Layer that loads malformed values from stored fields for synthetic source. * @see IgnoreMalformedStoredValues */ - public static class MalformedValuesLayer extends StoredFieldLayer { - public MalformedValuesLayer(String fieldName) { + private static class MalformedValuesStoredFieldLayer extends StoredFieldLayer { + MalformedValuesStoredFieldLayer(String fieldName) { super(IgnoreMalformedStoredValues.name(fieldName)); } @@ -191,6 +205,20 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { } } + /** + * Layer that loads malformed values from binary doc values for synthetic source. + */ + private static class MalformedValuesBinaryDocValuesLayer extends BinaryDocValuesSyntheticFieldLoaderLayer { + MalformedValuesBinaryDocValuesLayer(String fieldName) { + super(IgnoreMalformedStoredValues.name(fieldName)); + } + + @Override + protected void writeValue(XContentBuilder b, BytesRef value) throws IOException { + XContentDataHelper.decodeAndWrite(b, value); + } + } + /** * Layer that loads field values from a provided stored field. */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index a2f5b6873c2bf..d6b73735649e8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -1164,7 +1164,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addIgnoredField(mappedFieldType.name()); if (isSourceSynthetic) { // Save a copy of the field so synthetic source can load it - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser())); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser()); } return; } else { @@ -1243,7 +1243,12 @@ public Long getNullValue() { protected SyntheticSourceSupport syntheticSourceSupport() { if (hasDocValues) { return new SyntheticSourceSupport.Native( - () -> new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed) { + () -> new SortedNumericDocValuesSyntheticFieldLoader( + fullPath(), + leafName(), + ignoreMalformed, + indexSettings.getIndexVersionCreated() + ) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(fieldType().format(value, fieldType().dateTimeFormatter())); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java index 797f813dd45bd..489b5a984c003 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java @@ -648,7 +648,7 @@ protected void onMalformedValue(DocumentParserContext context, XContentBuilder m throws IOException { super.onMalformedValue(context, malformedDataForSyntheticSource, cause); if (malformedDataForSyntheticSource != null) { - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), malformedDataForSyntheticSource)); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), malformedDataForSyntheticSource); } } @@ -656,7 +656,12 @@ protected void onMalformedValue(DocumentParserContext context, XContentBuilder m protected SyntheticSourceSupport syntheticSourceSupport() { if (fieldType().hasDocValues()) { return new SyntheticSourceSupport.Native( - () -> new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed()) { + () -> new SortedNumericDocValuesSyntheticFieldLoader( + fullPath(), + leafName(), + ignoreMalformed(), + indexSettings.getIndexVersionCreated() + ) { final GeoPoint point = new GeoPoint(); @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java index aa7b395519802..df111ddc4cf20 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java @@ -10,9 +10,17 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; import java.io.IOException; import java.util.List; @@ -22,13 +30,73 @@ import static java.util.Collections.emptyList; /** - * Saves malformed values to stored fields so they can be loaded for synthetic - * {@code _source}. + * Saves malformed values to stored fields or binary doc values so they can be loaded for synthetic {@code _source}. */ public abstract class IgnoreMalformedStoredValues { public static final String IGNORE_MALFORMED_FIELD_NAME_SUFFIX = "._ignore_malformed"; + /** + * Stores a malformed value in binary doc values (new indices) or in stored fields (old indices) in order to support synthetic source. + */ + public static void storeMalformedValueForSyntheticSource(DocumentParserContext context, String fieldPath, XContentParser parser) + throws IOException { + IndexVersion indexVersion = context.indexSettings().getIndexVersionCreated(); + if (indexVersion.onOrAfter(IndexVersions.STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES)) { + BytesRef encoded = XContentDataHelper.encodeToken(parser); + saveToBinaryDocValues(context, fieldPath, encoded); + } else { + context.doc().add(storedField(fieldPath, parser)); + } + } + + /** + * Stores a malformed value in binary doc values (new indices) or in stored fields (old indices) in order to support synthetic source. + */ + public static void storeMalformedValueForSyntheticSource(DocumentParserContext context, String fieldPath, XContentBuilder builder) + throws IOException { + IndexVersion indexVersion = context.indexSettings().getIndexVersionCreated(); + if (indexVersion.onOrAfter(IndexVersions.STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES)) { + BytesRef encoded = encodeBuilderAsJson(builder); + saveToBinaryDocValues(context, fieldPath, encoded); + } else { + context.doc().add(storedField(fieldPath, builder)); + } + } + + /** + * Encodes an XContentBuilder's content, normalizing to JSON regardless of the builder's original content type. + * + * Malformed values are sorted by their encoded {@link BytesRef}. The encoding prefix byte is content-type-specific ('j' for + * JSON, 'c' for CBOR, etc.), so without normalization the sort order would depend on which content type the client used when + * indexing — an implementation detail that should not affect user-visible {@code _source} output. Normalizing to JSON is cheap in + * practice: the extra parse/serialize round-trip only runs when the content type is non-JSON AND the document has a malformed value, + * both of which are rare conditions. + */ + private static BytesRef encodeBuilderAsJson(XContentBuilder builder) throws IOException { + XContentType originalType = builder.contentType(); + if (originalType == XContentType.JSON) { + return XContentDataHelper.encodeXContentBuilder(builder); + } + BytesReference rawBytes = BytesReference.bytes(builder); + try (XContentParser parser = XContentHelper.createParserNotCompressed(XContentParserConfiguration.EMPTY, rawBytes, originalType)) { + XContentBuilder jsonBuilder = JsonXContent.contentBuilder(); + parser.nextToken(); + jsonBuilder.copyCurrentStructure(parser); + return XContentDataHelper.encodeXContentBuilder(jsonBuilder); + } + } + + private static void saveToBinaryDocValues(DocumentParserContext context, String fieldPath, BytesRef encoded) { + final String fieldName = name(fieldPath); + MultiValuedBinaryDocValuesField field = (MultiValuedBinaryDocValuesField) context.doc().getByKey(fieldName); + if (field == null) { + field = new MultiValuedBinaryDocValuesField.IntegratedCount(fieldName, true); + context.doc().addWithKey(fieldName, field); + } + field.add(encoded); + } + /** * Creates a stored field that stores malformed data to be used in synthetic source. * Name of the stored field is original name of the field with added conventional suffix. @@ -67,12 +135,32 @@ public static IgnoreMalformedStoredValues stored(String fieldName) { return new Stored(fieldName); } + /** + * Build the appropriate {@link IgnoreMalformedStoredValues} for loading malformed values during synthetic source reconstruction. + * Uses binary doc values for new indices and stored fields for old indices. + */ + public static IgnoreMalformedStoredValues forSyntheticSource(String fieldName, IndexVersion indexVersion) { + if (indexVersion.onOrAfter(IndexVersions.STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES)) { + return new DocValues(fieldName); + } else { + return new Stored(fieldName); + } + } + /** * A {@link Stream} mapping stored field paths to a place to put them * so they can be included in the next document. */ public abstract Stream> storedFieldLoaders(); + /** + * Create a doc values loader for loading malformed values from binary doc values. + * Returns {@code null} if this implementation does not use doc values or if there are no doc values to load. + */ + public SourceLoader.SyntheticFieldLoader.DocValuesLoader docValuesLoader(LeafReader reader) throws IOException { + return null; + } + /** * How many values has this field loaded for this document? */ @@ -145,6 +233,45 @@ public void reset() { } } + private static class DocValues extends IgnoreMalformedStoredValues { + + private final BinaryDocValuesSyntheticFieldLoaderLayer delegate; + + DocValues(String fieldName) { + this.delegate = new BinaryDocValuesSyntheticFieldLoaderLayer(name(fieldName)) { + @Override + protected void writeValue(XContentBuilder b, BytesRef value) throws IOException { + XContentDataHelper.decodeAndWrite(b, value); + } + }; + } + + @Override + public Stream> storedFieldLoaders() { + return Stream.empty(); + } + + @Override + public SourceLoader.SyntheticFieldLoader.DocValuesLoader docValuesLoader(LeafReader reader) throws IOException { + return delegate.docValuesLoader(reader, null); + } + + @Override + public int count() { + return (int) delegate.valueCount(); + } + + @Override + public void write(XContentBuilder b) throws IOException { + delegate.write(b); + } + + @Override + public void reset() { + // no-op: BinaryDocValuesSyntheticFieldLoaderLayer resets state on advanceToDoc + } + } + public static String name(String fieldName) { return fieldName + IGNORE_MALFORMED_FIELD_NAME_SUFFIX; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java index c54dd25be9cac..ec533daa4f6c0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java @@ -657,7 +657,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addIgnoredField(fieldType().name()); if (storeIgnored) { // Save a copy of the field so synthetic source can load it - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser())); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser()); } return; } else { @@ -753,7 +753,7 @@ protected BytesRef preserve(BytesRef value) { } if (ignoreMalformed) { - layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath())); + layers.add(CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexSettings.getIndexVersionCreated())); } return new CompositeSyntheticFieldLoader(leafName(), fullPath(), layers); }); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MultiValuedBinaryDocValuesField.java b/server/src/main/java/org/elasticsearch/index/mapper/MultiValuedBinaryDocValuesField.java index d976608370c5d..f85a99b275a59 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MultiValuedBinaryDocValuesField.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MultiValuedBinaryDocValuesField.java @@ -128,10 +128,19 @@ public String countFieldName() { } public static void addToSeparateCountMultiBinaryFieldInDoc(LuceneDocument doc, String fieldName, BytesRef binaryValue) { + addToSeparateCountMultiBinaryFieldInDoc(doc, fieldName, binaryValue, false); + } + + public static void addToSeparateCountMultiBinaryFieldInDoc( + LuceneDocument doc, + String fieldName, + BytesRef binaryValue, + boolean keepDuplicates + ) { var field = (SeparateCount) doc.getByKey(fieldName); var countField = (NumericDocValuesField) doc.getByKey(fieldName + COUNT_FIELD_SUFFIX); if (field == null) { - field = new SeparateCount(fieldName, false); + field = new SeparateCount(fieldName, keepDuplicates); countField = NumericDocValuesField.indexedField(field.countFieldName(), -1); // dummy value doc.addWithKey(field.name(), field); doc.addWithKey(countField.name(), countField); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index eff939aba44e7..4272958464da8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -36,6 +36,7 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -1854,8 +1855,13 @@ public double reduceToStoredPrecision(double value) { abstract void writeValue(XContentBuilder builder, long longValue) throws IOException; - SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String fieldSimpleName, boolean ignoreMalformed) { - return new SortedNumericDocValuesSyntheticFieldLoader(fieldName, fieldSimpleName, ignoreMalformed) { + SourceLoader.SyntheticFieldLoader syntheticFieldLoader( + String fieldName, + String fieldSimpleName, + boolean ignoreMalformed, + IndexVersion indexVersion + ) { + return new SortedNumericDocValuesSyntheticFieldLoader(fieldName, fieldSimpleName, ignoreMalformed, indexVersion) { @Override public void writeValue(XContentBuilder b, long value) throws IOException { NumberType.this.writeValue(b, value); @@ -2371,7 +2377,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addIgnoredField(mappedFieldType.name()); if (isSyntheticSource) { // Save a copy of the field so synthetic source can load it - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser())); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser()); } return; } else { @@ -2485,11 +2491,11 @@ private SourceLoader.SyntheticFieldLoader docValuesSyntheticFieldLoader() { var layers = new ArrayList(2); layers.add(new SortedNumericWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName, type::writeValue)); if (ignoreMalformed.value()) { - layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath())); + layers.add(CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexSettings.getIndexVersionCreated())); } return new CompositeSyntheticFieldLoader(leafName(), fullPath(), layers); } else { - return type.syntheticFieldLoader(fullPath(), leafName(), ignoreMalformed.value()); + return type.syntheticFieldLoader(fullPath(), leafName(), ignoreMalformed.value(), indexSettings.getIndexVersionCreated()); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SortedNumericDocValuesSyntheticFieldLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/SortedNumericDocValuesSyntheticFieldLoader.java index 18d05f23ee904..f087aec3f544c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SortedNumericDocValuesSyntheticFieldLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SortedNumericDocValuesSyntheticFieldLoader.java @@ -13,6 +13,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedNumericDocValues; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; @@ -34,17 +35,34 @@ public abstract class SortedNumericDocValuesSyntheticFieldLoader implements Sour private Values values = NO_VALUES; /** - * Build a loader from doc values and, optionally, a stored field. + * Build a loader from doc values and, optionally, a stored field for malformed values. * @param name the name of the field to load from doc values * @param simpleName the name to give the field in the rendered {@code _source} * @param loadIgnoreMalformedValues should we load values skipped by {@code ignore_malformed} */ protected SortedNumericDocValuesSyntheticFieldLoader(String name, String simpleName, boolean loadIgnoreMalformedValues) { + this(name, simpleName, loadIgnoreMalformedValues ? IgnoreMalformedStoredValues.stored(name) : IgnoreMalformedStoredValues.empty()); + } + + protected SortedNumericDocValuesSyntheticFieldLoader( + String name, + String simpleName, + boolean loadIgnoreMalformedValues, + IndexVersion indexVersion + ) { + this( + name, + simpleName, + loadIgnoreMalformedValues + ? IgnoreMalformedStoredValues.forSyntheticSource(name, indexVersion) + : IgnoreMalformedStoredValues.empty() + ); + } + + private SortedNumericDocValuesSyntheticFieldLoader(String name, String simpleName, IgnoreMalformedStoredValues ignoreMalformedValues) { this.name = name; this.simpleName = simpleName; - this.ignoreMalformedValues = loadIgnoreMalformedValues - ? IgnoreMalformedStoredValues.stored(name) - : IgnoreMalformedStoredValues.empty(); + this.ignoreMalformedValues = ignoreMalformedValues; } protected abstract void writeValue(XContentBuilder b, long value) throws IOException; @@ -56,6 +74,19 @@ public Stream> storedFieldLoaders() { @Override public DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) throws IOException { + DocValuesLoader fieldLoader = fieldDocValuesLoader(reader, docIdsInLeaf); + DocValuesLoader malformedLoader = ignoreMalformedValues.docValuesLoader(reader); + + if (fieldLoader != null && malformedLoader != null) { + return docId -> fieldLoader.advanceToDoc(docId) | malformedLoader.advanceToDoc(docId); + } else if (malformedLoader != null) { + return malformedLoader; + } else { + return fieldLoader; + } + } + + private DocValuesLoader fieldDocValuesLoader(LeafReader reader, int[] docIdsInLeaf) throws IOException { SortedNumericDocValues dv = docValuesOrNull(reader, name); if (dv == null) { values = NO_VALUES; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IPSyntheticSourceNativeArrayIntegrationTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IPSyntheticSourceNativeArrayIntegrationTests.java index 0707a63f66bd0..0f470072b9d61 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IPSyntheticSourceNativeArrayIntegrationTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IPSyntheticSourceNativeArrayIntegrationTests.java @@ -62,7 +62,7 @@ public void testSynthesizeArrayIgnoreMalformed() throws Exception { new Object[] { "192.168.1.1", "192.168.1.1", "malformed" }, new Object[] { null, null, null, "malformed" }, new Object[] { "192.168.1.3", "192.168.1.3", "192.168.1.1", "malformed" } }; - verifySyntheticArray(arrayValues, mapping, "_id", "field._ignore_malformed"); + verifySyntheticArray(arrayValues, mapping, "_id"); } public void testSynthesizeObjectArray() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValuesTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValuesTests.java index 02112874d54e2..e90cc6b2d038a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValuesTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValuesTests.java @@ -10,7 +10,13 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; @@ -23,6 +29,7 @@ import java.math.BigInteger; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Stream; import static org.hamcrest.Matchers.equalTo; @@ -30,7 +37,7 @@ public class IgnoreMalformedStoredValuesTests extends ESTestCase { public void testIgnoreMalformedBoolean() throws IOException { boolean b = randomBoolean(); - XContentParser p = ignoreMalformed(randomFrom(XContentType.values()), b); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.values()), b); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_BOOLEAN)); assertThat(p.booleanValue(), equalTo(b)); assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); @@ -38,7 +45,7 @@ public void testIgnoreMalformedBoolean() throws IOException { public void testIgnoreMalformedString() throws IOException { String s = randomAlphaOfLength(5); - XContentParser p = ignoreMalformed(randomFrom(XContentType.values()), s); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.values()), s); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); assertThat(p.text(), equalTo(s)); assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); @@ -46,7 +53,7 @@ public void testIgnoreMalformedString() throws IOException { public void testIgnoreMalformedInt() throws IOException { int i = randomInt(); - XContentParser p = ignoreMalformed(randomFrom(XContentType.values()), i); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.values()), i); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); assertThat(p.numberType(), equalTo(XContentParser.NumberType.INT)); assertThat(p.intValue(), equalTo(i)); @@ -55,7 +62,7 @@ public void testIgnoreMalformedInt() throws IOException { public void testIgnoreMalformedLong() throws IOException { long l = randomLong(); - XContentParser p = ignoreMalformed(randomFrom(XContentType.values()), l); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.values()), l); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); assertThat(p.numberType(), equalTo(XContentParser.NumberType.LONG)); assertThat(p.longValue(), equalTo(l)); @@ -64,7 +71,7 @@ public void testIgnoreMalformedLong() throws IOException { public void testIgnoreMalformedFloat() throws IOException { float f = randomFloat(); - XContentParser p = ignoreMalformed(randomFrom(XContentType.SMILE, XContentType.CBOR), f); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.SMILE, XContentType.CBOR), f); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); assertThat(p.numberType(), equalTo(XContentParser.NumberType.FLOAT)); assertThat(p.floatValue(), equalTo(f)); @@ -73,7 +80,7 @@ public void testIgnoreMalformedFloat() throws IOException { public void testIgnoreMalformedDouble() throws IOException { double d = randomDouble(); - XContentParser p = ignoreMalformed(randomFrom(XContentType.values()), d); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.values()), d); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); assertThat(p.numberType(), equalTo(XContentParser.NumberType.DOUBLE)); assertThat(p.doubleValue(), equalTo(d)); @@ -82,7 +89,7 @@ public void testIgnoreMalformedDouble() throws IOException { public void testIgnoreMalformedBigInteger() throws IOException { BigInteger i = randomBigInteger(); - XContentParser p = ignoreMalformed(randomFrom(XContentType.SMILE, XContentType.CBOR), i); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.SMILE, XContentType.CBOR), i); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); assertThat(p.numberType(), equalTo(XContentParser.NumberType.BIG_INTEGER)); assertThat(p.numberValue(), equalTo(i)); @@ -91,7 +98,7 @@ public void testIgnoreMalformedBigInteger() throws IOException { public void testIgnoreMalformedBigDecimal() throws IOException { BigDecimal d = new BigDecimal(randomBigInteger(), randomInt()); - XContentParser p = ignoreMalformed(XContentType.CBOR, d); + XContentParser p = ignoreMalformedFromStoredField(XContentType.CBOR, d); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); assertThat(p.numberType(), equalTo(XContentParser.NumberType.BIG_DECIMAL)); assertThat(p.numberValue(), equalTo(d)); @@ -100,7 +107,7 @@ public void testIgnoreMalformedBigDecimal() throws IOException { public void testIgnoreMalformedBytes() throws IOException { byte[] b = randomByteArrayOfLength(10); - XContentParser p = ignoreMalformed(randomFrom(XContentType.SMILE, XContentType.CBOR), b); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.SMILE, XContentType.CBOR), b); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_EMBEDDED_OBJECT)); assertThat(p.binaryValue(), equalTo(b)); assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); @@ -108,7 +115,7 @@ public void testIgnoreMalformedBytes() throws IOException { public void testIgnoreMalformedObjectBoolean() throws IOException { boolean b = randomBoolean(); - XContentParser p = ignoreMalformed(randomFrom(XContentType.values()), Map.of("foo", b)); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.values()), Map.of("foo", b)); assertThat(p.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); assertThat(p.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); assertThat(p.currentName(), equalTo("foo")); @@ -122,7 +129,7 @@ public void testIgnoreMalformedArrayInt() throws IOException { int i1 = randomInt(); int i2 = randomInt(); int i3 = randomInt(); - XContentParser p = ignoreMalformed(randomFrom(XContentType.values()), List.of(i1, i2, i3)); + XContentParser p = ignoreMalformedFromStoredField(randomFrom(XContentType.values()), List.of(i1, i2, i3)); assertThat(p.nextToken(), equalTo(XContentParser.Token.START_ARRAY)); assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); assertThat(p.numberType(), equalTo(XContentParser.NumberType.INT)); @@ -137,7 +144,186 @@ public void testIgnoreMalformedArrayInt() throws IOException { assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); } - private static XContentParser ignoreMalformed(XContentType type, Object value) throws IOException { + public void testDocValuesRoundTripString() throws IOException { + String s = randomAlphaOfLength(5); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.values()), s); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); + assertThat(p.text(), equalTo(s)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripInt() throws IOException { + int i = randomInt(); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.values()), i); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.INT)); + assertThat(p.intValue(), equalTo(i)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripBoolean() throws IOException { + boolean b = randomBoolean(); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.values()), b); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_BOOLEAN)); + assertThat(p.booleanValue(), equalTo(b)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripDouble() throws IOException { + double d = randomDouble(); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.values()), d); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.DOUBLE)); + assertThat(p.doubleValue(), equalTo(d)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripLong() throws IOException { + long l = randomLong(); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.values()), l); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.LONG)); + assertThat(p.longValue(), equalTo(l)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripFloat() throws IOException { + float f = randomFloat(); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.SMILE, XContentType.CBOR), f); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.FLOAT)); + assertThat(p.floatValue(), equalTo(f)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripBigInteger() throws IOException { + BigInteger i = randomBigInteger(); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.SMILE, XContentType.CBOR), i); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.BIG_INTEGER)); + assertThat(p.numberValue(), equalTo(i)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripBigDecimal() throws IOException { + BigDecimal d = new BigDecimal(randomBigInteger(), randomInt()); + XContentParser p = ignoreMalformedFromDocValues(XContentType.CBOR, d); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.BIG_DECIMAL)); + assertThat(p.numberValue(), equalTo(d)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripBytes() throws IOException { + byte[] b = randomByteArrayOfLength(10); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.SMILE, XContentType.CBOR), b); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_EMBEDDED_OBJECT)); + assertThat(p.binaryValue(), equalTo(b)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripObject() throws IOException { + boolean b = randomBoolean(); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.values()), Map.of("foo", b)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(p.currentName(), equalTo("foo")); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_BOOLEAN)); + assertThat(p.booleanValue(), equalTo(b)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripArray() throws IOException { + int i1 = randomInt(); + int i2 = randomInt(); + int i3 = randomInt(); + XContentParser p = ignoreMalformedFromDocValues(randomFrom(XContentType.values()), List.of(i1, i2, i3)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.START_ARRAY)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.INT)); + assertThat(p.intValue(), equalTo(i1)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.INT)); + assertThat(p.intValue(), equalTo(i2)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + assertThat(p.numberType(), equalTo(XContentParser.NumberType.INT)); + assertThat(p.intValue(), equalTo(i3)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_ARRAY)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); + } + + public void testDocValuesRoundTripMultipleValues() throws IOException { + String s1 = randomAlphaOfLength(5); + String s2 = randomAlphaOfLength(5); + String fieldName = "test_field"; + String dvFieldName = IgnoreMalformedStoredValues.name(fieldName); + + XContentType type = randomFrom(XContentType.values()); + BytesRef encoded1 = encodeValue(type, s1); + BytesRef encoded2 = encodeValue(type, s2); + + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { + LuceneDocument doc = new LuceneDocument(); + MultiValuedBinaryDocValuesField.SeparateCount.addToSeparateCountMultiBinaryFieldInDoc(doc, dvFieldName, encoded1, true); + MultiValuedBinaryDocValuesField.SeparateCount.addToSeparateCountMultiBinaryFieldInDoc(doc, dvFieldName, encoded2, true); + iw.addDocument(doc); + } + + try (DirectoryReader reader = DirectoryReader.open(directory)) { + LeafReader leafReader = reader.leaves().get(0).reader(); + IgnoreMalformedStoredValues values = IgnoreMalformedStoredValues.forSyntheticSource(fieldName, IndexVersion.current()); + SourceLoader.SyntheticFieldLoader.DocValuesLoader loader = values.docValuesLoader(leafReader); + assertNotNull(loader); + assertTrue(loader.advanceToDoc(0)); + assertThat(values.count(), equalTo(2)); + + XContentBuilder b = CborXContent.contentBuilder(); + b.startObject(); + b.field(fieldName); + b.startArray(); + values.write(b); + b.endArray(); + b.endObject(); + + XContentParser p = CborXContent.cborXContent.createParser( + XContentParserConfiguration.EMPTY, + BytesReference.bytes(b).streamInput() + ); + assertThat(p.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(p.currentName(), equalTo(fieldName)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.START_ARRAY)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); + String v1 = p.text(); + assertThat(p.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); + String v2 = p.text(); + assertThat(Set.of(v1, v2), equalTo(Set.of(s1, s2))); + assertThat(p.nextToken(), equalTo(XContentParser.Token.END_ARRAY)); + } + } + } + + public void testDocValuesNoValues() throws IOException { + String fieldName = "test_field"; + + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { + iw.addDocument(new LuceneDocument()); + } + + try (DirectoryReader reader = DirectoryReader.open(directory)) { + LeafReader leafReader = reader.leaves().get(0).reader(); + IgnoreMalformedStoredValues values = IgnoreMalformedStoredValues.forSyntheticSource(fieldName, IndexVersion.current()); + SourceLoader.SyntheticFieldLoader.DocValuesLoader loader = values.docValuesLoader(leafReader); + assertNull(loader); + assertThat(values.count(), equalTo(0)); + } + } + } + + private static XContentParser ignoreMalformedFromStoredField(XContentType type, Object value) throws IOException { String fieldName = randomAlphaOfLength(10); StoredField s = ignoreMalformedStoredField(type, value); Object stored = Stream.of(s.numericValue(), s.binaryValue(), s.stringValue()).filter(v -> v != null).findFirst().get(); @@ -158,6 +344,45 @@ private static StoredField ignoreMalformedStoredField(XContentType type, Object } } + private XContentParser ignoreMalformedFromDocValues(XContentType type, Object value) throws IOException { + String fieldName = randomAlphaOfLength(10); + String dvFieldName = IgnoreMalformedStoredValues.name(fieldName); + BytesRef encoded = encodeValue(type, value); + + XContentParser result; + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { + LuceneDocument doc = new LuceneDocument(); + MultiValuedBinaryDocValuesField.SeparateCount.addToSeparateCountMultiBinaryFieldInDoc(doc, dvFieldName, encoded, true); + iw.addDocument(doc); + } + + try (DirectoryReader reader = DirectoryReader.open(directory)) { + LeafReader leafReader = reader.leaves().get(0).reader(); + IgnoreMalformedStoredValues values = IgnoreMalformedStoredValues.forSyntheticSource(fieldName, IndexVersion.current()); + SourceLoader.SyntheticFieldLoader.DocValuesLoader loader = values.docValuesLoader(leafReader); + assertNotNull(loader); + assertTrue(loader.advanceToDoc(0)); + assertThat(values.count(), equalTo(1)); + + result = parserFrom(values, fieldName); + } + } + return result; + } + + private static BytesRef encodeValue(XContentType type, Object value) throws IOException { + XContentBuilder b = XContentBuilder.builder(type.xContent()); + b.startObject().field("name", value).endObject(); + try (XContentParser p = type.xContent().createParser(XContentParserConfiguration.EMPTY, BytesReference.bytes(b).streamInput())) { + assertThat(p.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(p.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(p.currentName(), equalTo("name")); + p.nextToken(); + return XContentDataHelper.encodeToken(p); + } + } + private static XContentParser parserFrom(IgnoreMalformedStoredValues values, String fieldName) throws IOException { XContentBuilder b = CborXContent.contentBuilder(); b.startObject(); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 73094ae749b08..40eba8083b837 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -1238,6 +1238,33 @@ public void testSyntheticSourceIgnoreMalformedExamples() throws IOException { } } + /** + * Tests that synthetic source with ignore_malformed works correctly using stored fields, which is the storage format used by indices + * created before {@link IndexVersions#STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES}. + */ + public void testSyntheticSourceIgnoreMalformedExamplesUsingStoredFields() throws IOException { + assumeTrue("type doesn't support ignore_malformed", supportsIgnoreMalformed()); + syntheticSourceSupport(true); + + IndexVersion oldVersion = IndexVersionUtils.randomPreviousCompatibleVersion( + IndexVersions.STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES + ); + var settings = Settings.builder().put("index.mapping.source.mode", "synthetic").build(); + for (ExampleMalformedValue v : exampleMalformedValues()) { + CheckedConsumer mapping = b -> { + v.mapping.accept(b); + b.field("ignore_malformed", true); + }; + SyntheticSourceExample example = new SyntheticSourceExample(v.value, v.value, mapping); + DocumentMapper mapper = createMapperService(oldVersion, settings, () -> true, mapping(b -> { + b.startObject("field"); + example.mapping().accept(b); + b.endObject(); + })).documentMapper(); + assertThat(syntheticSource(mapper, example::buildInput), equalTo(example.expected())); + } + } + private void assertSyntheticSource(SyntheticSourceExample example) throws IOException { DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> { b.startObject("field"); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java index e4bd8bb0ea5db..d4787650276f7 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java @@ -15,6 +15,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeRequest; import org.elasticsearch.action.admin.indices.refresh.RefreshAction; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; @@ -22,13 +23,19 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.query.IdsQueryBuilder; import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.json.JsonXContent; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -81,6 +88,8 @@ public void testSynthesizeArrayRandomIgnoresMalformed() throws Exception { for (int j = 0; j < malformed.length; j++) { malformed[j] = getMalformedValue(); } + // Binary doc values sort malformed values by their encoded BytesRef representation + Arrays.sort(malformed, encodedBytesRefComparator()); var expectedDocument = jsonBuilder().startObject(); var inputDocument = jsonBuilder().startObject(); @@ -445,4 +454,33 @@ private FieldInfos getFieldInfos(DirectoryReader reader) { } return fieldInfos; } + + /** + * Returns a comparator that orders objects by their {@link XContentDataHelper} encoded {@code BytesRef} representation. + * This matches the sort order used by binary doc values for malformed values. + */ + private static Comparator encodedBytesRefComparator() { + return (a, b) -> { + try { + return encodeToBytesRef(a).compareTo(encodeToBytesRef(b)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + } + + private static BytesRef encodeToBytesRef(Object value) throws IOException { + XContentBuilder builder = jsonBuilder().startObject().field("v", value).endObject(); + try ( + var parser = JsonXContent.jsonXContent.createParser( + XContentParserConfiguration.EMPTY, + BytesReference.bytes(builder).streamInput() + ) + ) { + parser.nextToken(); // START_OBJECT + parser.nextToken(); // FIELD_NAME + parser.nextToken(); // value token + return XContentDataHelper.encodeToken(parser); + } + } } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ExponentialHistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ExponentialHistogramFieldMapper.java index c523bbee8f6a8..d5240b2e3e9d0 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ExponentialHistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/ExponentialHistogramFieldMapper.java @@ -30,6 +30,7 @@ import org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils; import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent; import org.elasticsearch.exponentialhistogram.ZeroBucket; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.FormattedDocValues; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -153,9 +154,11 @@ static class Builder extends FieldMapper.Builder { * Only the metric type histogram is supported. */ private final Parameter metric; + private final IndexVersion indexCreatedVersion; - Builder(String name, boolean ignoreMalformedByDefault, boolean coerceByDefault) { + Builder(String name, boolean ignoreMalformedByDefault, boolean coerceByDefault, IndexVersion indexCreatedVersion) { super(name); + this.indexCreatedVersion = indexCreatedVersion; this.ignoreMalformed = FieldMapper.Parameter.explicitBoolParam( "ignore_malformed", true, @@ -193,7 +196,12 @@ public ExponentialHistogramFieldMapper build(MapperBuilderContext context) { } public static final FieldMapper.TypeParser PARSER = new FieldMapper.TypeParser( - (n, c) -> new Builder(n, IGNORE_MALFORMED_SETTING.get(c.getSettings()), COERCE_SETTING.get(c.getSettings())), + (n, c) -> new Builder( + n, + IGNORE_MALFORMED_SETTING.get(c.getSettings()), + COERCE_SETTING.get(c.getSettings()), + c.getIndexSettings().getIndexVersionCreated() + ), notInMultiFields(CONTENT_TYPE) ); @@ -203,6 +211,7 @@ public ExponentialHistogramFieldMapper build(MapperBuilderContext context) { private final Explicit coerce; private final boolean coerceByDefault; private final TimeSeriesParams.MetricType metricType; + private final IndexVersion indexCreatedVersion; ExponentialHistogramFieldMapper( String simpleName, @@ -216,6 +225,7 @@ public ExponentialHistogramFieldMapper build(MapperBuilderContext context) { this.coerce = builder.coerce.getValue(); this.coerceByDefault = builder.coerce.getDefaultValue().value(); this.metricType = builder.metric.getValue(); + this.indexCreatedVersion = builder.indexCreatedVersion; } @Override @@ -234,7 +244,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), ignoreMalformedByDefault, coerceByDefault).metric(metricType).init(this); + return new Builder(leafName(), ignoreMalformedByDefault, coerceByDefault, indexCreatedVersion).metric(metricType).init(this); } @Override @@ -621,7 +631,7 @@ public void parse(DocumentParserContext context) throws IOException { } if (malformedDataForSyntheticSource != null) { - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), malformedDataForSyntheticSource)); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), malformedDataForSyntheticSource); } context.addIgnoredField(fieldType().name()); @@ -798,7 +808,7 @@ protected FieldMapper.SyntheticSourceSupport syntheticSourceSupport() { leafName(), fullPath(), new ExponentialHistogramSyntheticFieldLoader(), - new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()) + CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexCreatedVersion) ) ); } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index fd34734fbc72f..234d197321d1e 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.FormattedDocValues; import org.elasticsearch.index.fielddata.HistogramValue; @@ -87,9 +88,11 @@ public static class Builder extends FieldMapper.Builder { * Only the metric type histogram is supported. */ private final Parameter metric; + private final IndexVersion indexCreatedVersion; - public Builder(String name, boolean ignoreMalformedByDefault, boolean coerceByDefault) { + public Builder(String name, boolean ignoreMalformedByDefault, boolean coerceByDefault, IndexVersion indexCreatedVersion) { super(name); + this.indexCreatedVersion = indexCreatedVersion; this.ignoreMalformed = Parameter.explicitBoolParam( "ignore_malformed", true, @@ -127,7 +130,12 @@ public HistogramFieldMapper build(MapperBuilderContext context) { } public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, IGNORE_MALFORMED_SETTING.get(c.getSettings()), COERCE_SETTING.get(c.getSettings())), + (n, c) -> new Builder( + n, + IGNORE_MALFORMED_SETTING.get(c.getSettings()), + COERCE_SETTING.get(c.getSettings()), + c.getIndexSettings().getIndexVersionCreated() + ), notInMultiFields(CONTENT_TYPE) ); @@ -137,11 +145,13 @@ public HistogramFieldMapper build(MapperBuilderContext context) { private final Explicit coerce; private final boolean coerceByDefault; private final TimeSeriesParams.MetricType metricType; + private final IndexVersion indexCreatedVersion; public HistogramFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams, Builder builder) { super(simpleName, mappedFieldType, builderParams); this.ignoreMalformed = builder.ignoreMalformed.getValue(); this.ignoreMalformedByDefault = builder.ignoreMalformed.getDefaultValue().value(); + this.indexCreatedVersion = builder.indexCreatedVersion; this.coerce = builder.coerce.getValue(); this.coerceByDefault = builder.coerce.getDefaultValue().value(); this.metricType = builder.metric.get(); @@ -163,7 +173,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), ignoreMalformedByDefault, coerceByDefault).metric(metricType).init(this); + return new Builder(leafName(), ignoreMalformedByDefault, coerceByDefault, indexCreatedVersion).metric(metricType).init(this); } @Override @@ -401,7 +411,7 @@ public void parse(DocumentParserContext context) throws IOException { } if (malformedDataForSyntheticSource != null) { - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), malformedDataForSyntheticSource)); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), malformedDataForSyntheticSource); } context.addIgnoredField(fieldType().name()); @@ -479,7 +489,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() { leafName(), fullPath(), new HistogramSyntheticFieldLoader(), - new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()) + CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexCreatedVersion) ) ); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index 7b289915161cd..cd3c37ade4af4 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -454,8 +454,8 @@ public void testArrayValueSyntheticSource() throws Exception { { expected.startArray("field"); expected.startObject().field("values", new double[] { 1, 2, 3 }).field("counts", new int[] { 1, 2, 3 }).endObject(); - expected.startObject().field("counts", new int[] { 4, 5, 6 }).field("values", new double[] { 4, 5, 6 }).endObject(); expected.value(randomString); + expected.startObject().field("counts", new int[] { 4, 5, 6 }).field("values", new double[] { 4, 5, 6 }).endObject(); expected.endArray(); } expected.endObject(); diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index d8ad787d1e0b3..f939e517b85c8 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -182,6 +182,19 @@ tasks.named("yamlRestCompatTestTransform").configure( task.skipTest("aggregate-metrics/40_avg_agg/Test avg agg with query", "Default metric cannot be defined, so the query is empty") task.skipTest("aggregate-metrics/30_sum_agg/Test sum agg with query", "Default metric cannot be defined, so the query is empty") task.skipTest("aggregate-metrics/50_value_count_agg/Test value_count agg with query", "Default metric cannot be defined, so the query is empty") + // Malformed values are now stored in binary doc values (sorted) instead of stored fields (insertion order). + task.skipTest( + "analytics/histogram/histogram with synthetic source and ignore_malformed", + "Malformed values are now stored in binary doc values which sort differently than stored fields" + ) + task.skipTest( + "analytics/t_digest_fieldtype/histogram with synthetic source and ignore_malformed", + "Malformed values are now stored in binary doc values which sort differently than stored fields" + ) + task.skipTest( + "aggregate-metrics/100_synthetic_source/aggregate_metric_double with ignore_malformed", + "Malformed values are now stored in binary doc values which sort differently than stored fields" + ) } ) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/analytics/mapper/TDigestFieldMapper.java index a7012c218e8f1..9d551ad2d73ea 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/analytics/mapper/TDigestFieldMapper.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.FormattedDocValues; import org.elasticsearch.index.fielddata.HistogramValue; @@ -96,9 +97,11 @@ public static class Builder extends FieldMapper.Builder { * Only the metric type histogram is supported. */ private final Parameter metric; + private final IndexVersion indexCreatedVersion; - public Builder(String name, boolean ignoreMalformedByDefault) { + public Builder(String name, boolean ignoreMalformedByDefault, IndexVersion indexCreatedVersion) { super(name); + this.indexCreatedVersion = indexCreatedVersion; this.ignoreMalformed = Parameter.explicitBoolParam( "ignore_malformed", true, @@ -157,7 +160,7 @@ public TDigestFieldMapper build(MapperBuilderContext context) { } public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, IGNORE_MALFORMED_SETTING.get(c.getSettings())), + (n, c) -> new Builder(n, IGNORE_MALFORMED_SETTING.get(c.getSettings()), c.getIndexSettings().getIndexVersionCreated()), notInMultiFields(CONTENT_TYPE) ); @@ -166,6 +169,7 @@ public TDigestFieldMapper build(MapperBuilderContext context) { private final TDigestExecutionHint digestType; private final double compression; private final TimeSeriesParams.MetricType metricType; + private final IndexVersion indexCreatedVersion; public TDigestFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams, Builder builder) { super(simpleName, mappedFieldType, builderParams); @@ -174,6 +178,7 @@ public TDigestFieldMapper(String simpleName, MappedFieldType mappedFieldType, Bu this.digestType = builder.digestType.getValue(); this.compression = builder.compression.getValue(); this.metricType = builder.metric.get(); + this.indexCreatedVersion = builder.indexCreatedVersion; } @Override @@ -196,7 +201,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), ignoreMalformedByDefault).metric(metricType).init(this); + return new Builder(leafName(), ignoreMalformedByDefault, indexCreatedVersion).metric(metricType).init(this); } @Override @@ -472,7 +477,7 @@ public void parse(DocumentParserContext context) throws IOException { } if (malformedDataForSyntheticSource != null) { - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), malformedDataForSyntheticSource)); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), malformedDataForSyntheticSource); } context.addIgnoredField(fieldType().name()); @@ -568,7 +573,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() { leafName(), fullPath(), new TDigestSyntheticFieldLoader(), - new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()) + CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexCreatedVersion) ) ); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/analytics/mapper/TDigestFieldMapperTests.java index 2b1acede745bb..3cb30a85bdafc 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/analytics/mapper/TDigestFieldMapperTests.java @@ -414,8 +414,8 @@ public void testArrayValueSyntheticSource() throws Exception { expected.field("centroids", new double[] { 1, 2, 3 }); expected.field("counts", new int[] { 1, 2, 3 }); expected.endObject(); - expected.startObject().field("counts", new int[] { 4, 5, 6 }).field("centroids", new double[] { 4, 5, 6 }).endObject(); expected.value(randomString); + expected.startObject().field("counts", new int[] { 4, 5, 6 }).field("centroids", new double[] { 4, 5, 6 }).endObject(); expected.endArray(); } expected.endObject(); diff --git a/x-pack/plugin/mapper-aggregate-metric/src/main/java/org/elasticsearch/xpack/aggregatemetric/mapper/AggregateMetricDoubleFieldMapper.java b/x-pack/plugin/mapper-aggregate-metric/src/main/java/org/elasticsearch/xpack/aggregatemetric/mapper/AggregateMetricDoubleFieldMapper.java index 74369a6d7ca3d..6d9522beb3b0f 100644 --- a/x-pack/plugin/mapper-aggregate-metric/src/main/java/org/elasticsearch/xpack/aggregatemetric/mapper/AggregateMetricDoubleFieldMapper.java +++ b/x-pack/plugin/mapper-aggregate-metric/src/main/java/org/elasticsearch/xpack/aggregatemetric/mapper/AggregateMetricDoubleFieldMapper.java @@ -787,7 +787,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio } if (malformedDataForSyntheticSource != null) { - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), malformedDataForSyntheticSource)); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), malformedDataForSyntheticSource); } context.addIgnoredField(fullPath()); @@ -818,7 +818,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() { leafName(), fullPath(), new AggregateMetricSyntheticFieldLoader(fullPath(), metrics), - new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()) + CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexSettings.getIndexVersionCreated()) ) ); } diff --git a/x-pack/plugin/mapper-aggregate-metric/src/test/java/org/elasticsearch/xpack/aggregatemetric/mapper/AggregateMetricDoubleFieldMapperTests.java b/x-pack/plugin/mapper-aggregate-metric/src/test/java/org/elasticsearch/xpack/aggregatemetric/mapper/AggregateMetricDoubleFieldMapperTests.java index c18a77186a352..b388babed8e35 100644 --- a/x-pack/plugin/mapper-aggregate-metric/src/test/java/org/elasticsearch/xpack/aggregatemetric/mapper/AggregateMetricDoubleFieldMapperTests.java +++ b/x-pack/plugin/mapper-aggregate-metric/src/test/java/org/elasticsearch/xpack/aggregatemetric/mapper/AggregateMetricDoubleFieldMapperTests.java @@ -475,8 +475,8 @@ public void testArrayValueSyntheticSource() throws Exception { { expected.startArray("field"); expected.startObject().field("min", 10.0).field("max", 100.0).endObject(); - expected.startObject().field("max", 200).field("min", 20).endObject(); expected.value(randomString); + expected.startObject().field("max", 200).field("min", 20).endObject(); expected.endArray(); } expected.endObject(); diff --git a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java index ebfa2a299c4bd..863596c3abac3 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java +++ b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java @@ -753,7 +753,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addIgnoredField(mappedFieldType.name()); if (isSourceSynthetic) { // Save a copy of the field so synthetic source can load it - context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser())); + IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser()); } return; } else { @@ -901,11 +901,16 @@ private SourceLoader.SyntheticFieldLoader docValuesSyntheticFieldLoader() { ) ); if (ignoreMalformed.value()) { - layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath())); + layers.add(CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexSettings.getIndexVersionCreated())); } return new CompositeSyntheticFieldLoader(leafName(), fullPath(), layers); } else { - return new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed()) { + return new SortedNumericDocValuesSyntheticFieldLoader( + fullPath(), + leafName(), + ignoreMalformed(), + indexSettings.getIndexVersionCreated() + ) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(DocValueFormat.UNSIGNED_LONG_SHIFTED.format(value)); diff --git a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java index 9d94defc0fe68..f48ae76e01d22 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java +++ b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java @@ -34,6 +34,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.function.Supplier; import java.util.stream.Stream; @@ -445,10 +446,15 @@ public SyntheticSourceExample example(int maxVals) { .map(Value::output) .sorted() .toList(); - Stream malformedOutput = values.stream().filter(v -> v.malformedOutput != null).map(Value::malformedOutput); + // Malformed values are stored as BytesRef with a type-prefix byte and sorted lexicographically. + List malformedOutput = values.stream() + .filter(v -> v.malformedOutput != null) + .map(Value::malformedOutput) + .sorted(Comparator.comparing(Object::toString)) + .toList(); // Malformed values are always last in the implementation. - List outList = Stream.concat(outputFromDocValues.stream(), malformedOutput).toList(); + List outList = Stream.concat(outputFromDocValues.stream(), malformedOutput.stream()).toList(); Object out = outList.size() == 1 ? outList.get(0) : outList; return new SyntheticSourceExample(in, out, this::mapping); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml index d11da1fe24f3f..07dd949792ca9 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml @@ -359,4 +359,4 @@ histogram with synthetic source and ignore_malformed: id: 2 - match: _source: - latency: [{"values": [2.0], "counts": [2]}, {"values": [1.0], "counts": [1], "hello": "world"}, 123, 456, "fox"] + latency: [{"values": [2.0], "counts": [2]}, "fox", 123, 456, {"values": [1.0], "counts": [1], "hello": "world"}] diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml index d02b05427c7d3..7ccafa2f01b84 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/t_digest_fieldtype.yml @@ -245,8 +245,8 @@ histogram with synthetic source and ignore_malformed: "centroids": [ 2.0 ], "counts": [ 2 ] }, - { "centroids": [ 1.0 ], "counts": [ 1 ], "hello": "world" }, - 123, 456, "fox" ] + "fox", 123, 456, + { "centroids": [ 1.0 ], "counts": [ 1 ], "hello": "world" } ] --- TDigest with synthetic source and empty digest: - requires: