diff --git a/docs/changelog/112397.yaml b/docs/changelog/112397.yaml new file mode 100644 index 0000000000000..e67478ec69b1c --- /dev/null +++ b/docs/changelog/112397.yaml @@ -0,0 +1,5 @@ +pr: 112397 +summary: Control storing array source with index setting +area: Mapping +type: enhancement +issues: [] diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java index 8bd62480f333d..ad4302cb04b44 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java @@ -8,9 +8,11 @@ package org.elasticsearch.datastreams.logsdb.qa; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.time.FormatNames; import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.ObjectMapper; import org.elasticsearch.logsdb.datageneration.DataGenerator; import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; @@ -36,12 +38,14 @@ */ public class StandardVersusLogsIndexModeRandomDataChallengeRestIT extends StandardVersusLogsIndexModeChallengeRestIT { private final ObjectMapper.Subobjects subobjects; + private final boolean keepArraySource; private final DataGenerator dataGenerator; public StandardVersusLogsIndexModeRandomDataChallengeRestIT() { super(); this.subobjects = randomFrom(ObjectMapper.Subobjects.values()); + this.keepArraySource = randomBoolean(); var specificationBuilder = DataGeneratorSpecification.builder().withFullyDynamicMapping(randomBoolean()); if (subobjects != ObjectMapper.Subobjects.ENABLED) { @@ -120,6 +124,13 @@ public void contenderMappings(XContentBuilder builder) throws IOException { } } + @Override + public void contenderSettings(Settings.Builder builder) { + if (keepArraySource) { + builder.put(Mapper.SYNTHETIC_SOURCE_KEEP_INDEX_SETTING.getKey(), "arrays"); + } + } + @Override protected XContentBuilder generateDocument(final Instant timestamp) throws IOException { var document = XContentFactory.jsonBuilder(); diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index fa08efe402b43..265aec75dc9c2 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -446,260 +446,6 @@ mixed disabled and enabled objects: - match: { hits.hits.0._source.path.to.bad.value: false } ---- -object array: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - id: - type: integer - regular: - properties: - span: - properties: - id: - type: keyword - trace: - properties: - id: - type: keyword - stored: - store_array_source: true - properties: - span: - properties: - id: - type: keyword - trace: - properties: - id: - type: keyword - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "id": 1, "regular": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' - - '{ "create": { } }' - - '{ "id": 2, "stored": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' - - - do: - search: - index: test - sort: id - - - length: { hits.hits.0._source.regular: 2 } - - match: { hits.hits.0._source.regular.span.id: "1" } - - match: { hits.hits.0._source.regular.trace.id: [ "a", "b" ] } - - - length: { hits.hits.1._source.stored: 2 } - - match: { hits.hits.1._source.stored.0.trace.id: a } - - match: { hits.hits.1._source.stored.0.span.id: "1" } - - match: { hits.hits.1._source.stored.1.trace.id: b } - - match: { hits.hits.1._source.stored.1.span.id: "1" } - - ---- -object array within array: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - stored: - store_array_source: true - properties: - path: - store_array_source: true - properties: - to: - properties: - trace: - type: keyword - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "stored": [ { "path": [{ "to": { "trace": "A" } }, { "to": { "trace": "B" } } ] }, { "path": { "to": { "trace": "C" } } } ] }' - - - do: - search: - index: test - - - length: { hits.hits.0._source.stored: 2 } - - match: { hits.hits.0._source.stored.0.path.0.to.trace: A } - - match: { hits.hits.0._source.stored.0.path.1.to.trace: B } - - match: { hits.hits.0._source.stored.1.path.to.trace: C } - - ---- -no object array: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - stored: - store_array_source: true - properties: - span: - properties: - id: - type: keyword - trace: - properties: - id: - type: keyword - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "stored": { "trace": { "id": "a" }, "span": { "id": "b" } } }' - - - do: - search: - index: test - - - match: { hits.hits.0._source.stored.trace.id: a } - - match: { hits.hits.0._source.stored.span.id: b } - - ---- -field ordering in object array: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - a: - type: keyword - b: - store_array_source: true - properties: - aa: - type: keyword - bb: - type: keyword - c: - type: keyword - d: - store_array_source: true - properties: - aa: - type: keyword - bb: - type: keyword - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "c": 1, "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ], "a": 2, "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ] }' - - - do: - search: - index: test - - - length: { hits.hits.0._source: 4 } - - match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": "1", "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } } - - ---- -nested object array next to other fields: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - a: - type: keyword - b: - properties: - c: - store_array_source: true - properties: - aa: - type: keyword - bb: - type: keyword - d: - properties: - aa: - type: keyword - bb: - type: keyword - e: - type: keyword - f: - type: keyword - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "a": 1, "b": { "c": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ], "d": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "e": 1000 }, "f": 2000 }' - - - do: - search: - index: test - - - match: { hits.hits.0._source.a: "1" } - - match: { hits.hits.0._source.b.c: [{ "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 }] } - - match: { hits.hits.0._source.b.d.aa: [ "200", "300" ] } - - match: { hits.hits.0._source.b.d.bb: [ "100", "400" ] } - - match: { hits.hits.0._source.b.e: "1000" } - - match: { hits.hits.0._source.f: "2000" } - - --- object with dynamic override: - requires: @@ -1157,99 +903,6 @@ doubly nested object: - match: { hits.hits.3._source.id: 3 } ---- -nested object with stored array: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - nested_array_regular: - type: nested - nested_array_stored: - type: nested - store_array_source: true - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "name": "A", "nested_array_regular": [ { "b": [ { "c": 10 }, { "c": 100 } ] }, { "b": [ { "c": 20 }, { "c": 200 } ] } ] }' - - '{ "create": { } }' - - '{ "name": "B", "nested_array_stored": [ { "b": [ { "c": 10 }, { "c": 100 } ] }, { "b": [ { "c": 20 }, { "c": 200 } ] } ] }' - - - match: { errors: false } - - - do: - search: - index: test - sort: name - - match: { hits.total.value: 2 } - - match: { hits.hits.0._source.name: A } - - match: { hits.hits.0._source.nested_array_regular.0.b.c: [ 10, 100] } - - match: { hits.hits.0._source.nested_array_regular.1.b.c: [ 20, 200] } - - match: { hits.hits.1._source.name: B } - - match: { hits.hits.1._source.nested_array_stored.0.b.0.c: 10 } - - match: { hits.hits.1._source.nested_array_stored.0.b.1.c: 100 } - - match: { hits.hits.1._source.nested_array_stored.1.b.0.c: 20 } - - match: { hits.hits.1._source.nested_array_stored.1.b.1.c: 200 } - ---- -empty nested object sorted as a first document: - - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored source - - - do: - indices.create: - index: test - body: - settings: - index: - sort.field: "name" - sort.order: "asc" - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - nested: - type: nested - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "name": "B", "nested": { "a": "b" } }' - - '{ "create": { } }' - - '{ "name": "A" }' - - - match: { errors: false } - - - do: - search: - index: test - sort: name - - - match: { hits.total.value: 2 } - - match: { hits.hits.0._source.name: A } - - match: { hits.hits.1._source.name: B } - - match: { hits.hits.1._source.nested.a: "b" } - --- subobjects auto: - requires: @@ -1337,54 +990,3 @@ subobjects auto: - match: { hits.hits.3._source.id: 4 } - match: { hits.hits.3._source.auto_obj.foo: 40 } - match: { hits.hits.3._source.auto_obj.foo\.bar: 400 } - ---- -# 112156 -stored field under object with store_array_source: - - requires: - cluster_features: ["mapper.source.synthetic_source_stored_fields_advance_fix"] - reason: requires bug fix to be implemented - - - do: - indices.create: - index: test - body: - settings: - index: - sort.field: "name" - sort.order: "asc" - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - obj: - store_array_source: true - properties: - foo: - type: keyword - store: true - - - do: - bulk: - index: test - refresh: true - body: - - '{ "create": { } }' - - '{ "name": "B", "obj": null }' - - '{ "create": { } }' - - '{ "name": "A", "obj": [ { "foo": "hello_from_the_other_side" } ] }' - - - match: { errors: false } - - - do: - search: - index: test - sort: name - - - match: { hits.total.value: 2 } - - match: { hits.hits.0._source.name: A } - - match: { hits.hits.0._source.obj: [ { "foo": "hello_from_the_other_side" } ] } - - match: { hits.hits.1._source.name: B } - - match: { hits.hits.1._source.obj: null } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml new file mode 100644 index 0000000000000..917f0540c4dd4 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml @@ -0,0 +1,732 @@ +--- +object param - object array: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + id: + type: integer + regular: + properties: + span: + properties: + id: + type: keyword + trace: + properties: + id: + type: keyword + stored: + store_array_source: true + properties: + span: + properties: + id: + type: keyword + trace: + properties: + id: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 1, "regular": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' + - '{ "create": { } }' + - '{ "id": 2, "stored": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' + + - do: + search: + index: test + sort: id + + - length: { hits.hits.0._source.regular: 2 } + - match: { hits.hits.0._source.regular.span.id: "1" } + - match: { hits.hits.0._source.regular.trace.id: [ "a", "b" ] } + + - length: { hits.hits.1._source.stored: 2 } + - match: { hits.hits.1._source.stored.0.trace.id: a } + - match: { hits.hits.1._source.stored.0.span.id: "1" } + - match: { hits.hits.1._source.stored.1.trace.id: b } + - match: { hits.hits.1._source.stored.1.span.id: "1" } + + +--- +object param - object array within array: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + stored: + store_array_source: true + properties: + path: + store_array_source: true + properties: + to: + properties: + trace: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "stored": [ { "path": [{ "to": { "trace": "A" } }, { "to": { "trace": "B" } } ] }, { "path": { "to": { "trace": "C" } } } ] }' + + - do: + search: + index: test + + - length: { hits.hits.0._source.stored: 2 } + - match: { hits.hits.0._source.stored.0.path.0.to.trace: A } + - match: { hits.hits.0._source.stored.0.path.1.to.trace: B } + - match: { hits.hits.0._source.stored.1.path.to.trace: C } + + +--- +object param - no object array: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + stored: + store_array_source: true + properties: + span: + properties: + id: + type: keyword + trace: + properties: + id: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "stored": { "trace": { "id": "a" }, "span": { "id": "b" } } }' + + - do: + search: + index: test + + - match: { hits.hits.0._source.stored.trace.id: a } + - match: { hits.hits.0._source.stored.span.id: b } + + +--- +object param - field ordering in object array: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + a: + type: keyword + b: + store_array_source: true + properties: + aa: + type: keyword + bb: + type: keyword + c: + type: keyword + d: + store_array_source: true + properties: + aa: + type: keyword + bb: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "c": 1, "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ], "a": 2, "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ] }' + + - do: + search: + index: test + + - length: { hits.hits.0._source: 4 } + - match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": "1", "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } } + + +--- +object param - nested object array next to other fields: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + a: + type: keyword + b: + properties: + c: + store_array_source: true + properties: + aa: + type: keyword + bb: + type: keyword + d: + properties: + aa: + type: keyword + bb: + type: keyword + e: + type: keyword + f: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "a": 1, "b": { "c": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ], "d": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "e": 1000 }, "f": 2000 }' + + - do: + search: + index: test + + - match: { hits.hits.0._source.a: "1" } + - match: { hits.hits.0._source.b.c: [{ "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 }] } + - match: { hits.hits.0._source.b.d.aa: [ "200", "300" ] } + - match: { hits.hits.0._source.b.d.bb: [ "100", "400" ] } + - match: { hits.hits.0._source.b.e: "1000" } + - match: { hits.hits.0._source.f: "2000" } + + +--- +object param - nested object with stored array: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + nested_array_regular: + type: nested + nested_array_stored: + type: nested + store_array_source: true + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "name": "A", "nested_array_regular": [ { "b": [ { "c": 10 }, { "c": 100 } ] }, { "b": [ { "c": 20 }, { "c": 200 } ] } ] }' + - '{ "create": { } }' + - '{ "name": "B", "nested_array_stored": [ { "b": [ { "c": 10 }, { "c": 100 } ] }, { "b": [ { "c": 20 }, { "c": 200 } ] } ] }' + + - match: { errors: false } + + - do: + search: + index: test + sort: name + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: A } + - match: { hits.hits.0._source.nested_array_regular.0.b.c: [ 10, 100] } + - match: { hits.hits.0._source.nested_array_regular.1.b.c: [ 20, 200] } + - match: { hits.hits.1._source.name: B } + - match: { hits.hits.1._source.nested_array_stored.0.b.0.c: 10 } + - match: { hits.hits.1._source.nested_array_stored.0.b.1.c: 100 } + - match: { hits.hits.1._source.nested_array_stored.1.b.0.c: 20 } + - match: { hits.hits.1._source.nested_array_stored.1.b.1.c: 200 } + + +--- +# 112156 +stored field under object with store_array_source: + - requires: + cluster_features: ["mapper.source.synthetic_source_stored_fields_advance_fix"] + reason: requires bug fix to be implemented + + - do: + indices.create: + index: test + body: + settings: + index: + sort.field: "name" + sort.order: "asc" + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + obj: + store_array_source: true + properties: + foo: + type: keyword + store: true + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "name": "B", "obj": null }' + - '{ "create": { } }' + - '{ "name": "A", "obj": [ { "foo": "hello_from_the_other_side" } ] }' + + - match: { errors: false } + + - do: + search: + index: test + sort: name + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: A } + - match: { hits.hits.0._source.obj: [ { "foo": "hello_from_the_other_side" } ] } + - match: { hits.hits.1._source.name: B } + - match: { hits.hits.1._source.obj: null } + + +--- +index param - root arrays: + - requires: + cluster_features: ["mapper.synthetic_source_keep"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + synthetic_source_keep: arrays + mappings: + _source: + mode: synthetic + properties: + id: + type: integer + leaf: + type: integer + obj: + properties: + span: + properties: + id: + type: keyword + trace: + properties: + id: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 1, "leaf": [30, 20, 10], "obj": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' + - '{ "create": { } }' + - '{ "id": 2, "leaf": [130, 120, 110], "obj": [ { "trace": { "id": "aa" }, "span": { "id": "2" } }, { "trace": { "id": "bb" }, "span": { "id": "2" } } ] }' + + - do: + search: + index: test + sort: id + + - match: { hits.hits.0._source.id: 1 } + - match: { hits.hits.0._source.leaf: [30, 20, 10] } + - length: { hits.hits.0._source.obj: 2 } + - match: { hits.hits.0._source.obj.0.trace.id: a } + - match: { hits.hits.0._source.obj.0.span.id: "1" } + - match: { hits.hits.0._source.obj.1.trace.id: b } + - match: { hits.hits.0._source.obj.1.span.id: "1" } + + - match: { hits.hits.1._source.id: 2 } + - match: { hits.hits.1._source.leaf: [ 130, 120, 110 ] } + - length: { hits.hits.1._source.obj: 2 } + - match: { hits.hits.1._source.obj.0.trace.id: aa } + - match: { hits.hits.1._source.obj.0.span.id: "2" } + - match: { hits.hits.1._source.obj.1.trace.id: bb } + - match: { hits.hits.1._source.obj.1.span.id: "2" } + + +--- +index param - dynamic root arrays: + - requires: + cluster_features: ["mapper.synthetic_source_keep"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + synthetic_source_keep: arrays + mappings: + _source: + mode: synthetic + properties: + id: + type: integer + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 1, "leaf": [30, 20, 10], "obj": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' + - '{ "create": { } }' + - '{ "id": 2, "leaf": [130, 120, 110], "obj": [ { "trace": { "id": "aa" }, "span": { "id": "2" } }, { "trace": { "id": "bb" }, "span": { "id": "2" } } ] }' + + - do: + search: + index: test + sort: id + + - match: { hits.hits.0._source.id: 1 } + - match: { hits.hits.0._source.leaf: [30, 20, 10] } + - length: { hits.hits.0._source.obj: 2 } + - match: { hits.hits.0._source.obj.0.trace.id: a } + - match: { hits.hits.0._source.obj.0.span.id: "1" } + - match: { hits.hits.0._source.obj.1.trace.id: b } + - match: { hits.hits.0._source.obj.1.span.id: "1" } + + - match: { hits.hits.1._source.id: 2 } + - match: { hits.hits.1._source.leaf: [ 130, 120, 110 ] } + - length: { hits.hits.1._source.obj: 2 } + - match: { hits.hits.1._source.obj.0.trace.id: aa } + - match: { hits.hits.1._source.obj.0.span.id: "2" } + - match: { hits.hits.1._source.obj.1.trace.id: bb } + - match: { hits.hits.1._source.obj.1.span.id: "2" } + + +--- +index param - object array within array: + - requires: + cluster_features: ["mapper.synthetic_source_keep"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + synthetic_source_keep: arrays + mappings: + _source: + mode: synthetic + properties: + stored: + properties: + path: + properties: + to: + properties: + trace: + type: keyword + values: + type: integer + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "stored": [ { "path": [{ "to": { "trace": "A", "values": [2, 1] } }, { "to": { "trace": "B", "values": [2, 1] } } ] }, { "path": { "to": { "trace": "C", "values": 3 } } } ] }' + + - do: + search: + index: test + + - length: { hits.hits.0._source.stored: 2 } + - match: { hits.hits.0._source.stored.0.path.0.to.trace: A } + - match: { hits.hits.0._source.stored.0.path.0.to.values: [2, 1] } + - match: { hits.hits.0._source.stored.0.path.1.to.trace: B } + - match: { hits.hits.0._source.stored.0.path.1.to.values: [2, 1] } + - match: { hits.hits.0._source.stored.1.path.to.trace: C } + - match: { hits.hits.0._source.stored.1.path.to.values: 3 } + + +--- +index param - no object array: + - requires: + cluster_features: ["mapper.synthetic_source_keep"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + synthetic_source_keep: arrays + mappings: + _source: + mode: synthetic + properties: + stored: + properties: + span: + properties: + id: + type: keyword + trace: + properties: + id: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "stored": { "trace": { "id": "a" }, "span": { "id": "b" } } }' + + - do: + search: + index: test + + - match: { hits.hits.0._source.stored.trace.id: a } + - match: { hits.hits.0._source.stored.span.id: b } + + +--- +index param - field ordering: + - requires: + cluster_features: ["mapper.synthetic_source_keep"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + synthetic_source_keep: arrays + mappings: + _source: + mode: synthetic + properties: + a: + type: keyword + b: + properties: + aa: + type: keyword + bb: + type: keyword + c: + type: keyword + d: + properties: + aa: + type: keyword + bb: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "c": [30, 20, 10], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ], "a": 2, "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ] }' + + - do: + search: + index: test + + - length: { hits.hits.0._source: 4 } + - match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": [30, 20, 10], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } } + + +--- +index param - nested arrays: + - requires: + cluster_features: ["mapper.synthetic_source_keep"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + synthetic_source_keep: arrays + mappings: + _source: + mode: synthetic + properties: + a: + type: keyword + b: + properties: + c: + properties: + aa: + type: keyword + bb: + type: keyword + d: + type: integer + e: + type: keyword + f: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "a": 1, "b": { "c": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ], "d": [ 300, 200, 100 ], "e": 1000 }, "f": 2000 }' + - '{ "create": { } }' + - '{ "a": 11, "b": { "c": [ { "bb": 110, "aa": 120 }, { "aa": 130, "bb": 140 } ], "d": [ 1300, 1200, 1100 ], "e": 11000 }, "f": 12000 }' + + + - do: + search: + index: test + sort: a + + - match: { hits.hits.0._source.a: "1" } + - match: { hits.hits.0._source.b.c: [{ "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 }] } + - match: { hits.hits.0._source.b.d: [ 300, 200, 100 ] } + - match: { hits.hits.0._source.b.e: "1000" } + - match: { hits.hits.0._source.f: "2000" } + + - match: { hits.hits.1._source.a: "11" } + - match: { hits.hits.1._source.b.c: [ { "bb": 110, "aa": 120 }, { "aa": 130, "bb": 140 } ] } + - match: { hits.hits.1._source.b.d: [ 1300, 1200, 1100 ] } + - match: { hits.hits.1._source.b.e: "11000" } + - match: { hits.hits.1._source.f: "12000" } + +--- +index param - nested object with stored array: + - requires: + cluster_features: ["mapper.synthetic_source_keep"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + synthetic_source_keep: arrays + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + nested: + type: nested + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "name": "A", "nested": [ { "b": [ { "c": 10 }, { "c": 100 } ] }, { "b": [ { "c": 20 }, { "c": 200 } ] } ] }' + - '{ "create": { } }' + - '{ "name": "B", "nested": [ { "b": [ { "c": 30 }, { "c": 300 } ] }, { "b": [ { "c": 40 }, { "c": 400 } ] } ] }' + + - match: { errors: false } + + - do: + search: + index: test + sort: name + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: A } + - match: { hits.hits.0._source.nested.0.b.0.c: 10 } + - match: { hits.hits.0._source.nested.0.b.1.c: 100 } + - match: { hits.hits.0._source.nested.1.b.0.c: 20 } + - match: { hits.hits.0._source.nested.1.b.1.c: 200 } + - match: { hits.hits.1._source.name: B } + - match: { hits.hits.1._source.nested.0.b.0.c: 30 } + - match: { hits.hits.1._source.nested.0.b.1.c: 300 } + - match: { hits.hits.1._source.nested.1.b.0.c: 40 } + - match: { hits.hits.1._source.nested.1.b.1.c: 400 } diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index fe6616cb4fb8e..6adf181014023 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -181,6 +181,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING, IndexSettings.PREFER_ILM_SETTING, DataStreamFailureStoreDefinition.FAILURE_STORE_DEFINITION_VERSION_SETTING, + FieldMapper.SYNTHETIC_SOURCE_KEEP_INDEX_SETTING, // validate that built-in similarities don't get redefined Setting.groupSetting("index.similarity.", (s) -> { diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 944d50f7ea06c..509d37fd6077d 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.translog.Translog; import org.elasticsearch.ingest.IngestService; import org.elasticsearch.node.Node; @@ -793,6 +794,16 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) { private final IndexRouting indexRouting; + /** + * The default mode for storing source, for all mappers not overriding this setting. + * This is only relevant for indexes configured with synthetic-source code. + */ + public Mapper.SourceKeepMode sourceKeepMode() { + return sourceKeepMode; + } + + private final Mapper.SourceKeepMode sourceKeepMode; + /** * Returns the default search fields for this index. */ @@ -922,6 +933,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti mappingFieldNameLengthLimit = scopedSettings.get(INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING); mappingDimensionFieldsLimit = scopedSettings.get(INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING); indexRouting = IndexRouting.fromIndexMetadata(indexMetadata); + sourceKeepMode = scopedSettings.get(Mapper.SYNTHETIC_SOURCE_KEEP_INDEX_SETTING); es87TSDBCodecEnabled = scopedSettings.get(TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING); scopedSettings.addSettingsUpdateConsumer( diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 35f0130c58706..f020b8128bb13 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -297,7 +297,7 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio if (context.parent().isNested()) { // Handle a nested object that doesn't contain an array. Arrays are handled in #parseNonDynamicArray. - if (context.parent().storeArraySource() && context.mappingLookup().isSourceSynthetic() && context.getClonedSource() == false) { + if (context.parent().storeArraySource() && context.canAddIgnoredField()) { Tuple tuple = XContentDataHelper.cloneSubContext(context); context.addIgnoredField( new IgnoredSourceFieldMapper.NameValue( @@ -686,11 +686,16 @@ private static void parseNonDynamicArray( // Check if we need to record the array source. This only applies to synthetic source. if (context.canAddIgnoredField()) { boolean objectRequiresStoringSource = mapper instanceof ObjectMapper objectMapper - && (objectMapper.storeArraySource() || objectMapper.dynamic == ObjectMapper.Dynamic.RUNTIME); + && (objectMapper.storeArraySource() + || (context.sourceKeepModeFromIndexSettings() == Mapper.SourceKeepMode.ARRAYS + && objectMapper instanceof NestedObjectMapper == false) + || objectMapper.dynamic == ObjectMapper.Dynamic.RUNTIME); boolean fieldWithFallbackSyntheticSource = mapper instanceof FieldMapper fieldMapper && fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK; + boolean fieldWithStoredArraySource = mapper instanceof FieldMapper fieldMapper + && context.sourceKeepModeFromIndexSettings() == Mapper.SourceKeepMode.ARRAYS; boolean dynamicRuntimeContext = context.dynamic() == ObjectMapper.Dynamic.RUNTIME; - if (objectRequiresStoringSource || fieldWithFallbackSyntheticSource || dynamicRuntimeContext) { + if (objectRequiresStoringSource || fieldWithFallbackSyntheticSource || dynamicRuntimeContext || fieldWithStoredArraySource) { Tuple tuple = XContentDataHelper.cloneSubContext(context); context.addIgnoredField( IgnoredSourceFieldMapper.NameValue.fromContext( diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 248369b249007..3a84162b86c27 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -329,6 +329,10 @@ public final boolean canAddIgnoredField() { return mappingLookup.isSourceSynthetic() && clonedSource == false; } + Mapper.SourceKeepMode sourceKeepModeFromIndexSettings() { + return indexSettings().sourceKeepMode(); + } + /** * Description on the document being parsed used in error messages. Not * called unless there is an error. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java index 2c1e01c3cd196..9469ee29ff0a3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java @@ -10,7 +10,9 @@ import org.apache.lucene.document.FieldType; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.util.StringLiteralDeduplicator; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.xcontent.ToXContentFragment; @@ -22,6 +24,57 @@ public abstract class Mapper implements ToXContentFragment, Iterable { + public static final NodeFeature SYNTHETIC_SOURCE_KEEP_FEATURE = new NodeFeature("mapper.synthetic_source_keep"); + + static final String SYNTHETIC_SOURCE_KEEP_PARAM = "synthetic_source_keep"; + + // Only relevant for synthetic source mode. + public enum SourceKeepMode { + NONE("none"), // No source recording + ARRAYS("arrays"), // Store source for arrays of mapped fields + ALL("all"); // Store source for both singletons and arrays of mapped fields + + SourceKeepMode(String name) { + this.name = name; + } + + static SourceKeepMode from(String input) { + if (input.equals(NONE.name)) { + return NONE; + } + if (input.equals(ALL.name)) { + return ALL; + } + if (input.equals(ARRAYS.name)) { + return ARRAYS; + } + throw new IllegalArgumentException("Unknown " + SYNTHETIC_SOURCE_KEEP_PARAM + " value [" + input + "]"); + } + + @Override + public String toString() { + return name; + } + + private final String name; + } + + // Only relevant for indexes configured with synthetic source mode. Otherwise, it has no effect. + // Controls the default behavior for storing the source of leaf fields and objects, in singleton or array form. + // Setting to SourceKeepMode.ALL is equivalent to disabling synthetic source, so this is not allowed. + public static final Setting SYNTHETIC_SOURCE_KEEP_INDEX_SETTING = Setting.enumSetting( + SourceKeepMode.class, + "index.mapping.synthetic_source_keep", + SourceKeepMode.NONE, + value -> { + if (value == SourceKeepMode.ALL) { + throw new IllegalArgumentException("index.mapping.synthetic_source_keep can't be set to [" + value.toString() + "]"); + } + }, + Setting.Property.IndexScope, + Setting.Property.ServerlessPublic + ); + public abstract static class Builder { private String leafName; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 63bbef061c61f..2e250726b98ca 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -34,7 +34,8 @@ public Set getFeatures() { BooleanFieldMapper.BOOLEAN_DIMENSION, ObjectMapper.SUBOBJECTS_AUTO, KeywordFieldMapper.KEYWORD_NORMALIZER_SYNTHETIC_SOURCE, - SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX + SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX, + Mapper.SYNTHETIC_SOURCE_KEEP_FEATURE ); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index dcb5cd1711c8c..61c4068cedf4a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -48,6 +48,14 @@ private String getSyntheticSourceWithFieldLimit(CheckedConsumer b.field("my_value", value))); @@ -485,6 +493,108 @@ public void testMixedDisabledEnabledObjects() throws IOException { ); } + public void testIndexStoredArraySourceRootValueArray() throws IOException { + DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(syntheticSourceMapping(b -> { + b.startObject("int_value").field("type", "integer").endObject(); + b.startObject("bool_value").field("type", "boolean").endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.array("int_value", new int[] { 30, 20, 10 }); + b.field("bool_value", true); + }); + assertEquals(""" + {"bool_value":true,"int_value":[30,20,10]}""", syntheticSource); + } + + public void testIndexStoredArraySourceRootObjectArray() throws IOException { + DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(syntheticSourceMapping(b -> { + b.startObject("path"); + { + b.field("type", "object"); + b.startObject("properties"); + { + b.startObject("int_value").field("type", "integer").endObject(); + } + b.endObject(); + } + b.endObject(); + b.startObject("bool_value").field("type", "boolean").endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startArray("path"); + b.startObject().field("int_value", 10).endObject(); + b.startObject().field("int_value", 20).endObject(); + b.endArray(); + b.field("bool_value", true); + }); + assertEquals(""" + {"bool_value":true,"path":[{"int_value":10},{"int_value":20}]}""", syntheticSource); + } + + public void testIndexStoredArraySourceNestedValueArray() throws IOException { + DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(syntheticSourceMapping(b -> { + b.startObject("path"); + { + b.field("type", "object"); + b.startObject("properties"); + { + b.startObject("int_value").field("type", "integer").endObject(); + b.startObject("bool_value").field("type", "boolean").endObject(); + } + b.endObject(); + } + b.endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startObject("path"); + { + b.array("int_value", new int[] { 30, 20, 10 }); + b.field("bool_value", true); + } + b.endObject(); + }); + assertEquals(""" + {"path":{"bool_value":true,"int_value":[30,20,10]}}""", syntheticSource); + } + + public void testIndexStoredArraySourceNestedObjectArray() throws IOException { + DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(syntheticSourceMapping(b -> { + b.startObject("path"); + { + b.field("type", "object"); + b.startObject("properties"); + { + b.startObject("to"); + { + b.field("type", "object"); + b.startObject("properties"); + { + b.startObject("int_value").field("type", "integer").endObject(); + } + b.endObject(); + } + b.endObject(); + b.startObject("bool_value").field("type", "boolean").endObject(); + } + b.endObject(); + } + b.endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startObject("path"); + { + b.startArray("to"); + b.startObject().field("int_value", 10).endObject(); + b.startObject().field("int_value", 20).endObject(); + b.endArray(); + b.field("bool_value", true); + } + b.endObject(); + }); + assertEquals(""" + {"path":{"bool_value":true,"to":[{"int_value":10},{"int_value":20}]}}""", syntheticSource); + } + public void testRootArray() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path");