Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
context.addIgnoredField(mappedFieldType.name());
if (isSourceSynthetic) {
// Save a copy of the field so synthetic source can load it
context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser()));
IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser());
}
return;
} else {
Expand Down Expand Up @@ -685,7 +685,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
context.addIgnoredField(mappedFieldType.name());
if (isSourceSynthetic) {
// Save a copy of the field so synthetic source can load it
context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser()));
IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser());
}
return;
} else {
Expand Down Expand Up @@ -854,11 +854,16 @@ private SourceLoader.SyntheticFieldLoader docValuesSyntheticFieldLoader() {
)
);
if (ignoreMalformed.value()) {
layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()));
layers.add(CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexSettings.getIndexVersionCreated()));
}
return new CompositeSyntheticFieldLoader(leafName(), fullPath(), layers);
} else {
return new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed.value()) {
return new SortedNumericDocValuesSyntheticFieldLoader(
fullPath(),
leafName(),
ignoreMalformed.value(),
indexSettings.getIndexVersionCreated()
) {
@Override
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(decodeForSyntheticSource(value, scalingFactor));
Expand Down
4 changes: 4 additions & 0 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,8 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
task.addAllowedWarningRegex("Use of the \\[max_size\\] rollover condition has been deprecated in favour of the \\[max_primary_shard_size\\] condition and will be removed in a later version")
task.skipTest("search.vectors/42_knn_search_bbq_flat/Vector rescoring has same scoring as exact search for kNN section", "scores have changed slightly with native implementations")
task.skipTest("search.vectors/41_knn_search_bbq_hnsw/Vector rescoring has same scoring as exact search for kNN section", "scores have changed slightly with native implementations")
task.skipTest(
"get/100_synthetic_source/fields with ignore_malformed",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't use replaceValueInMatch here due to how the yaml is structured - fields with ignore_malformed is large and has many match assertions that specifically check _source.ip. Using replaceValueInMatch here means all of those assertions will be changed, which is not what we want.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder whether instead of skipping the test entirely can we maybe use removeMatch?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that wouldn't work well - each yaml test uses a nested format for all of its match assertions (ex. _source: { ip: [...] }). removeMatch() looks only at the name of the top-level key, (ex. _source), so using it here effectively removes all match assertion and renders the tests useless. This applies to all the test I've skipped in this PR. We would need to unnest some of the match assertions and then backport that in order for the yamlRestCompatTests to pass

"Malformed values are now stored in binary doc values which sort differently than stored fields"
)
})
Original file line number Diff line number Diff line change
Expand Up @@ -799,8 +799,8 @@ fields with ignore_malformed:
ip:
- 10.10.1.1
- 192.8.1.2
- hot garbage # fields saved by ignore_malformed are sorted after doc values
- 7
- 7 # malformed values come after doc_values and are sorted by encoded byte representation
- hot garbage
- is_false: fields

- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion ID_FIELD_USE_ES812_POSTINGS_FORMAT = def(9_070_0_00, Version.LUCENE_10_3_2);
public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID_94 = def(9_071_0_00, Version.LUCENE_10_3_2);
public static final IndexVersion TIME_SERIES_DOC_VALUES_FORMAT_VERSION_3 = def(9_072_0_00, Version.LUCENE_10_3_2);
public static final IndexVersion STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES = def(9_073_0_00, Version.LUCENE_10_3_2);


/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import java.io.IOException;

public final class BinaryDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer {
public class BinaryDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed to removed final in order to override BinaryDocValuesSyntheticFieldLoaderLayer.writeValue() to support malformed values. By default writeValue() treats BytesRef values as raw UTF8 strings. This is not the case for malformed values, which can be of any type. For malformed values, we override writeValue() like so:

        protected void writeValue(XContentBuilder b, BytesRef value) throws IOException {
            XContentDataHelper.decodeAndWrite(b, value);
        }


private final String name;
private SortedBinaryDocValues bytesValues;
Expand All @@ -42,6 +42,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
}

bytesValues = MultiValuedSortedBinaryDocValues.from(leafReader, name, docValues);

return docId -> {
hasValue = bytesValues.advanceExact(docId);
return hasValue;
Expand All @@ -61,10 +62,17 @@ public void write(XContentBuilder b) throws IOException {

for (int i = 0; i < bytesValues.docValueCount(); ++i) {
BytesRef value = bytesValues.nextValue();
b.utf8Value(value.bytes, value.offset, value.length);
writeValue(b, value);
}
}

/**
* Write a single value to the builder. Subclasses can override to change the write format.
*/
protected void writeValue(XContentBuilder b, BytesRef value) throws IOException {
b.utf8Value(value.bytes, value.offset, value.length);
}

@Override
public String fieldName() {
return name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
context.addIgnoredField(mappedFieldType.name());
if (storeMalformedFields) {
// Save a copy of the field so synthetic source can load it
context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser()));
IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser());
}
return;
} else {
Expand Down Expand Up @@ -659,11 +659,16 @@ private SourceLoader.SyntheticFieldLoader docValuesSyntheticFieldLoader() {
)
);
if (ignoreMalformed.value()) {
layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()));
layers.add(CompositeSyntheticFieldLoader.malformedValuesLayer(fullPath(), indexSettings.getIndexVersionCreated()));
}
return new CompositeSyntheticFieldLoader(leafName(), fullPath(), layers);
} else {
return new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed.value()) {
return new SortedNumericDocValuesSyntheticFieldLoader(
fullPath(),
leafName(),
ignoreMalformed.value(),
indexSettings.getIndexVersionCreated()
) {
@Override
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(value == 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import org.apache.lucene.index.LeafReader;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
Expand Down Expand Up @@ -173,11 +175,23 @@ default void reset() {
}

/**
* Layer that loads malformed values stored in a dedicated field with a conventional name.
* Returns the appropriate malformed values layer for the given index version.
* Uses binary doc values for new indices and stored fields for old indices.
*/
public static Layer malformedValuesLayer(String fieldName, IndexVersion indexVersion) {
if (indexVersion.onOrAfter(IndexVersions.STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES)) {
return new MalformedValuesBinaryDocValuesLayer(fieldName);
} else {
return new MalformedValuesStoredFieldLayer(fieldName);
}
}

/**
* Layer that loads malformed values from stored fields for synthetic source.
* @see IgnoreMalformedStoredValues
*/
public static class MalformedValuesLayer extends StoredFieldLayer {
public MalformedValuesLayer(String fieldName) {
private static class MalformedValuesStoredFieldLayer extends StoredFieldLayer {
MalformedValuesStoredFieldLayer(String fieldName) {
super(IgnoreMalformedStoredValues.name(fieldName));
}

Expand All @@ -191,6 +205,20 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
}
}

/**
* Layer that loads malformed values from binary doc values for synthetic source.
*/
private static class MalformedValuesBinaryDocValuesLayer extends BinaryDocValuesSyntheticFieldLoaderLayer {
MalformedValuesBinaryDocValuesLayer(String fieldName) {
super(IgnoreMalformedStoredValues.name(fieldName));
}

@Override
protected void writeValue(XContentBuilder b, BytesRef value) throws IOException {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here is that override of writeValue() that I mentioned above

XContentDataHelper.decodeAndWrite(b, value);
}
}

/**
* Layer that loads field values from a provided stored field.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
context.addIgnoredField(mappedFieldType.name());
if (isSourceSynthetic) {
// Save a copy of the field so synthetic source can load it
context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), context.parser()));
IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), context.parser());
}
return;
} else {
Expand Down Expand Up @@ -1243,7 +1243,12 @@ public Long getNullValue() {
protected SyntheticSourceSupport syntheticSourceSupport() {
if (hasDocValues) {
return new SyntheticSourceSupport.Native(
() -> new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed) {
() -> new SortedNumericDocValuesSyntheticFieldLoader(
fullPath(),
leafName(),
ignoreMalformed,
indexSettings.getIndexVersionCreated()
) {
@Override
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(fieldType().format(value, fieldType().dateTimeFormatter()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -648,15 +648,20 @@ protected void onMalformedValue(DocumentParserContext context, XContentBuilder m
throws IOException {
super.onMalformedValue(context, malformedDataForSyntheticSource, cause);
if (malformedDataForSyntheticSource != null) {
context.doc().add(IgnoreMalformedStoredValues.storedField(fullPath(), malformedDataForSyntheticSource));
IgnoreMalformedStoredValues.storeMalformedValueForSyntheticSource(context, fullPath(), malformedDataForSyntheticSource);
}
}

@Override
protected SyntheticSourceSupport syntheticSourceSupport() {
if (fieldType().hasDocValues()) {
return new SyntheticSourceSupport.Native(
() -> new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed()) {
() -> new SortedNumericDocValuesSyntheticFieldLoader(
fullPath(),
leafName(),
ignoreMalformed(),
indexSettings.getIndexVersionCreated()
) {
final GeoPoint point = new GeoPoint();

@Override
Expand Down
Loading