From c86a9d94e9b7f3ba8076cc42926a359d31fcd1a6 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 20 Nov 2017 14:24:00 +0100 Subject: [PATCH] Ensure nested documents have consistent version and seq_ids Today we index dummy values for seq_ids and version on nested documents. This is on the one hand trappy since users can request these values via inner hits and on the other hand not necessarily good for compression since the dummy value will likely not compress well when seqIDs are lowish. This change ensures that we share the same field values for all documents in a nested block. This won't have any overhead, in-fact it might be more efficient since we even reduce the work needed slightly. --- .../index/mapper/SeqNoFieldMapper.java | 15 +++-- .../index/mapper/VersionFieldMapper.java | 4 +- .../test/search.inner_hits/10_basic.yml | 57 +++++++++++++++++++ 3 files changed, 69 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java index 7d74f9e52aa4a..55fc1333d2030 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java @@ -245,15 +245,18 @@ public Mapper parse(ParseContext context) throws IOException { @Override public void postParse(ParseContext context) throws IOException { - // In the case of nested docs, let's fill nested docs with seqNo=1 and - // primaryTerm=0 so that Lucene doesn't write a Bitset for documents - // that don't have the field. This is consistent with the default value + // In the case of nested docs, let's fill nested docs with the original + // so that Lucene doesn't write a Bitset for documents that + // don't have the field. This is consistent with the default value // for efficiency. + // we share the parent docs fields to ensure good compression + SequenceIDFields seqID = context.seqID(); + assert seqID != null; for (int i = 1; i < context.docs().size(); i++) { final Document doc = context.docs().get(i); - doc.add(new LongPoint(NAME, 1)); - doc.add(new NumericDocValuesField(NAME, 1L)); - doc.add(new NumericDocValuesField(PRIMARY_TERM_NAME, 0L)); + doc.add(seqID.seqNo); + doc.add(seqID.seqNoDocValue); + doc.add(seqID.primaryTerm); } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java index 90ea85024c119..c5ead1327cc9b 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java @@ -126,9 +126,11 @@ public Mapper parse(ParseContext context) throws IOException { public void postParse(ParseContext context) throws IOException { // In the case of nested docs, let's fill nested docs with version=1 so that Lucene doesn't write a Bitset for documents // that don't have the field. This is consistent with the default value for efficiency. + Field version = context.version(); + assert version != null; for (int i = 1; i < context.docs().size(); i++) { final Document doc = context.docs().get(i); - doc.add(new NumericDocValuesField(NAME, 1L)); + doc.add(version); } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/10_basic.yml index 524c1c593965d..e211aeeedc5fd 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/10_basic.yml @@ -41,3 +41,60 @@ setup: - match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._nested.offset: 0 } - is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0._nested.child + +--- +"Nested doc version and seqIDs": + + - skip: + # fixed in 7.0 + version: " - 6.99.99" + reason: "version and seq IDs where not accurate in previous versions" + + - do: + index: + index: test + type: type_1 + id: 1 + body: + "nested_field" : [ { "foo": "bar" } ] + - do: + indices.refresh: {} + + - do: + search: + body: { "query" : { "nested" : { "path" : "nested_field", "query" : { "match_all" : {} }, "inner_hits" : { version: true, "docvalue_fields": ["_seq_no"]} }}, "version": true, "docvalue_fields" : ["_seq_no"] } + + - match: { hits.total: 1 } + - match: { hits.hits.0._index: "test" } + - match: { hits.hits.0._type: "type_1" } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._version: 1 } + - match: { hits.hits.0.fields._seq_no: [0] } + - match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [0] } + + + - do: + index: + index: test + type: type_1 + id: 1 + body: + "nested_field" : [ { "foo": "baz" } ] + - do: + indices.refresh: {} + + - do: + search: + body: { "query" : { "nested" : { "path" : "nested_field", "query" : { "match_all" : {} }, "inner_hits" : { version: true, "docvalue_fields": ["_seq_no"]} }}, "version": true, "docvalue_fields" : ["_seq_no"] } + + - match: { hits.total: 1 } + - match: { hits.hits.0._index: "test" } + - match: { hits.hits.0._type: "type_1" } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._version: 2 } + - match: { hits.hits.0.fields._seq_no: [1] } + - match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._version: 2 } + - match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [1] } + + +