Skip to content

Commit 2f3263f

Browse files
committed
Ensure nested documents have consistent version and seq_ids (#27455)
Today we index dummy values for seq_ids and version on nested documents. This is on the one hand trappy since users can request these values via inner hits and on the other hand not necessarily good for compression since the dummy value will likely not compress well when seqIDs are lowish. This change ensures that we share the same field values for all documents in a nested block. This won't have any overhead, in-fact it might be more efficient since we even reduce the work needed slightly.
1 parent 8227dba commit 2f3263f

File tree

3 files changed

+69
-7
lines changed

3 files changed

+69
-7
lines changed

core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -245,15 +245,18 @@ public Mapper parse(ParseContext context) throws IOException {
245245

246246
@Override
247247
public void postParse(ParseContext context) throws IOException {
248-
// In the case of nested docs, let's fill nested docs with seqNo=1 and
249-
// primaryTerm=0 so that Lucene doesn't write a Bitset for documents
250-
// that don't have the field. This is consistent with the default value
248+
// In the case of nested docs, let's fill nested docs with the original
249+
// so that Lucene doesn't write a Bitset for documents that
250+
// don't have the field. This is consistent with the default value
251251
// for efficiency.
252+
// we share the parent docs fields to ensure good compression
253+
SequenceIDFields seqID = context.seqID();
254+
assert seqID != null;
252255
for (int i = 1; i < context.docs().size(); i++) {
253256
final Document doc = context.docs().get(i);
254-
doc.add(new LongPoint(NAME, 1));
255-
doc.add(new NumericDocValuesField(NAME, 1L));
256-
doc.add(new NumericDocValuesField(PRIMARY_TERM_NAME, 0L));
257+
doc.add(seqID.seqNo);
258+
doc.add(seqID.seqNoDocValue);
259+
doc.add(seqID.primaryTerm);
257260
}
258261
}
259262

core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,11 @@ public Mapper parse(ParseContext context) throws IOException {
126126
public void postParse(ParseContext context) throws IOException {
127127
// In the case of nested docs, let's fill nested docs with version=1 so that Lucene doesn't write a Bitset for documents
128128
// that don't have the field. This is consistent with the default value for efficiency.
129+
Field version = context.version();
130+
assert version != null;
129131
for (int i = 1; i < context.docs().size(); i++) {
130132
final Document doc = context.docs().get(i);
131-
doc.add(new NumericDocValuesField(NAME, 1L));
133+
doc.add(version);
132134
}
133135
}
134136

rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/10_basic.yml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,60 @@ setup:
4141
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._nested.offset: 0 }
4242
- is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0._nested.child
4343

44+
45+
---
46+
"Nested doc version and seqIDs":
47+
48+
- skip:
49+
# fixed in 6.0.1
50+
version: " - 6.0.0"
51+
reason: "version and seq IDs where not accurate in previous versions"
52+
53+
- do:
54+
index:
55+
index: test
56+
type: type_1
57+
id: 1
58+
body:
59+
"nested_field" : [ { "foo": "bar" } ]
60+
- do:
61+
indices.refresh: {}
62+
63+
- do:
64+
search:
65+
body: { "query" : { "nested" : { "path" : "nested_field", "query" : { "match_all" : {} }, "inner_hits" : { version: true, "docvalue_fields": ["_seq_no"]} }}, "version": true, "docvalue_fields" : ["_seq_no"] }
66+
67+
- match: { hits.total: 1 }
68+
- match: { hits.hits.0._index: "test" }
69+
- match: { hits.hits.0._type: "type_1" }
70+
- match: { hits.hits.0._id: "1" }
71+
- match: { hits.hits.0._version: 1 }
72+
- match: { hits.hits.0.fields._seq_no: [0] }
73+
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [0] }
74+
75+
76+
- do:
77+
index:
78+
index: test
79+
type: type_1
80+
id: 1
81+
body:
82+
"nested_field" : [ { "foo": "baz" } ]
83+
- do:
84+
indices.refresh: {}
85+
86+
- do:
87+
search:
88+
body: { "query" : { "nested" : { "path" : "nested_field", "query" : { "match_all" : {} }, "inner_hits" : { version: true, "docvalue_fields": ["_seq_no"]} }}, "version": true, "docvalue_fields" : ["_seq_no"] }
89+
90+
- match: { hits.total: 1 }
91+
- match: { hits.hits.0._index: "test" }
92+
- match: { hits.hits.0._type: "type_1" }
93+
- match: { hits.hits.0._id: "1" }
94+
- match: { hits.hits.0._version: 2 }
95+
- match: { hits.hits.0.fields._seq_no: [1] }
96+
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._version: 2 }
97+
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [1] }
98+
99+
100+

0 commit comments

Comments
 (0)