Skip to content

Commit 9cf063e

Browse files
authored
Fix Term Vectors with artificial docs and keyword fields (#53504) (#53551)
Previously, Term Vectors API was returning empty results for artificial documents with keyword fields. Checking only for `string()` on `IndexableField` is not enough, since for `KeywordFieldType` `binaryValue()` must be used instead. Fixes #53494 (cherry picked from commit 1fc3fe3)
1 parent a9de31b commit 9cf063e

File tree

8 files changed

+53
-30
lines changed

8 files changed

+53
-30
lines changed

server/src/main/java/org/elasticsearch/index/mapper/ParseContext.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -121,24 +121,6 @@ public IndexableField[] getFields(String name) {
121121
return f.toArray(new IndexableField[f.size()]);
122122
}
123123

124-
/**
125-
* Returns an array of values of the field specified as the method parameter.
126-
* This method returns an empty array when there are no
127-
* matching fields. It never returns null.
128-
* If you want the actual numeric field instances back, use {@link #getFields}.
129-
* @param name the name of the field
130-
* @return a <code>String[]</code> of field values
131-
*/
132-
public final String[] getValues(String name) {
133-
List<String> result = new ArrayList<>();
134-
for (IndexableField field : fields) {
135-
if (field.name().equals(name) && field.stringValue() != null) {
136-
result.add(field.stringValue());
137-
}
138-
}
139-
return result.toArray(new String[result.size()]);
140-
}
141-
142124
public IndexableField getField(String name) {
143125
for (IndexableField field : fields) {
144126
if (field.name().equals(name)) {

server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.elasticsearch.index.get.GetResult;
4545
import org.elasticsearch.index.mapper.DocumentMapperForType;
4646
import org.elasticsearch.index.mapper.IdFieldMapper;
47+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
4748
import org.elasticsearch.index.mapper.MappedFieldType;
4849
import org.elasticsearch.index.mapper.MapperService;
4950
import org.elasticsearch.index.mapper.ParseContext;
@@ -56,6 +57,7 @@
5657
import org.elasticsearch.search.dfs.AggregatedDfs;
5758

5859
import java.io.IOException;
60+
import java.util.ArrayList;
5961
import java.util.Arrays;
6062
import java.util.Collection;
6163
import java.util.Collections;
@@ -316,14 +318,33 @@ private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVect
316318
else {
317319
seenFields.add(field.name());
318320
}
319-
String[] values = doc.getValues(field.name());
321+
String[] values = getValues(doc.getFields(field.name()));
320322
documentFields.add(new DocumentField(field.name(), Arrays.asList((Object[]) values)));
321323
}
322324
return generateTermVectors(indexShard,
323325
XContentHelper.convertToMap(parsedDocument.source(), true, request.xContentType()).v2(), documentFields,
324326
request.offsets(), request.perFieldAnalyzer(), seenFields);
325327
}
326328

329+
/**
330+
* Returns an array of values of the field specified as the method parameter.
331+
* This method returns an empty array when there are no
332+
* matching fields. It never returns null.
333+
* @param fields The <code>IndexableField</code> to get the values from
334+
* @return a <code>String[]</code> of field values
335+
*/
336+
public static String[] getValues(IndexableField[] fields) {
337+
List<String> result = new ArrayList<>();
338+
for (IndexableField field : fields) {
339+
if (field.fieldType() instanceof KeywordFieldMapper.KeywordFieldType) {
340+
result.add(field.binaryValue().utf8ToString());
341+
} else if (field.fieldType() instanceof StringFieldType) {
342+
result.add(field.stringValue());
343+
}
344+
}
345+
return result.toArray(new String[0]);
346+
}
347+
327348
private static ParsedDocument parseDocument(IndexShard indexShard, String index, String type, BytesReference doc,
328349
XContentType xContentType, String routing) {
329350
MapperService mapperService = indexShard.mapperService();

server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.elasticsearch.common.xcontent.XContentType;
3131
import org.elasticsearch.index.IndexService;
3232
import org.elasticsearch.index.mapper.MapperService.MergeReason;
33+
import org.elasticsearch.index.termvectors.TermVectorsService;
3334
import org.elasticsearch.plugins.Plugin;
3435
import org.elasticsearch.test.ESSingleNodeTestCase;
3536
import org.elasticsearch.test.InternalSettingsPlugin;
@@ -202,7 +203,7 @@ private void testIgnoreMalfomedForValue(String value, String expectedException)
202203

203204
IndexableField[] fields = doc.rootDoc().getFields("field");
204205
assertEquals(0, fields.length);
205-
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
206+
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
206207
}
207208

208209
public void testChangeFormat() throws IOException {

server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,15 @@ public void testDotsWithExistingMapper() throws Exception {
154154
.endObject());
155155
ParsedDocument doc = mapper.parse(new SourceToParse("test", "type", "1", bytes, XContentType.JSON));
156156
assertNull(doc.dynamicMappingsUpdate()); // no update!
157-
String[] values = doc.rootDoc().getValues("foo.bar.baz");
158-
assertEquals(3, values.length);
159-
assertEquals("123", values[0]);
160-
assertEquals("456", values[1]);
161-
assertEquals("789", values[2]);
157+
158+
IndexableField[] fields = doc.rootDoc().getFields("foo.bar.baz");
159+
assertEquals(6, fields.length);
160+
assertEquals(123, fields[0].numericValue());
161+
assertEquals("123", fields[1].stringValue());
162+
assertEquals(456, fields[2].numericValue());
163+
assertEquals("456", fields[3].stringValue());
164+
assertEquals(789, fields[4].numericValue());
165+
assertEquals("789", fields[5].stringValue());
162166
}
163167

164168
public void testDotsWithExistingNestedMapper() throws Exception {

server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,19 @@
2020
package org.elasticsearch.index.mapper;
2121

2222
import org.apache.lucene.index.IndexOptions;
23+
import org.apache.lucene.index.IndexableField;
2324
import org.elasticsearch.common.Strings;
2425
import org.elasticsearch.common.bytes.BytesReference;
2526
import org.elasticsearch.common.compress.CompressedXContent;
2627
import org.elasticsearch.common.xcontent.XContentFactory;
2728
import org.elasticsearch.common.xcontent.XContentType;
29+
import org.elasticsearch.index.mapper.FieldNamesFieldMapper.FieldNamesFieldType;
2830
import org.elasticsearch.test.ESSingleNodeTestCase;
2931

32+
import java.util.ArrayList;
3033
import java.util.Arrays;
3134
import java.util.Collections;
35+
import java.util.List;
3236
import java.util.Set;
3337
import java.util.SortedSet;
3438
import java.util.TreeSet;
@@ -48,8 +52,14 @@ private static SortedSet<String> set(String... values) {
4852
}
4953

5054
void assertFieldNames(Set<String> expected, ParsedDocument doc) {
51-
String[] got = doc.rootDoc().getValues("_field_names");
52-
assertEquals(expected, set(got));
55+
IndexableField[] fields = doc.rootDoc().getFields("_field_names");
56+
List<String> result = new ArrayList<>();
57+
for (IndexableField field : fields) {
58+
if (field.fieldType() instanceof FieldNamesFieldType) {
59+
result.add(field.stringValue());
60+
}
61+
}
62+
assertEquals(expected, set(result.toArray(new String[0])));
5363
}
5464

5565
public void testExtractFieldNames() {

server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.elasticsearch.common.xcontent.XContentFactory;
3333
import org.elasticsearch.common.xcontent.XContentType;
3434
import org.elasticsearch.index.IndexService;
35+
import org.elasticsearch.index.termvectors.TermVectorsService;
3536
import org.elasticsearch.plugins.Plugin;
3637
import org.elasticsearch.test.ESSingleNodeTestCase;
3738
import org.elasticsearch.test.InternalSettingsPlugin;
@@ -194,7 +195,7 @@ public void testIgnoreMalformed() throws Exception {
194195

195196
IndexableField[] fields = doc.rootDoc().getFields("field");
196197
assertEquals(0, fields.length);
197-
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
198+
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
198199
}
199200

200201
public void testNullValue() throws IOException {

server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
3737
import org.elasticsearch.index.analysis.TokenizerFactory;
3838
import org.elasticsearch.index.mapper.MapperService.MergeReason;
39+
import org.elasticsearch.index.termvectors.TermVectorsService;
3940
import org.elasticsearch.indices.analysis.AnalysisModule;
4041
import org.elasticsearch.plugins.AnalysisPlugin;
4142
import org.elasticsearch.plugins.Plugin;
@@ -128,6 +129,9 @@ public void testDefaults() throws Exception {
128129
fieldType = fields[1].fieldType();
129130
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
130131
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
132+
133+
// used by TermVectorsService
134+
assertArrayEquals(new String[] { "1234" }, TermVectorsService.getValues(doc.rootDoc().getFields("field")));
131135
}
132136

133137
public void testIgnoreAbove() throws IOException {

server/src/test/java/org/elasticsearch/index/mapper/NumberFieldMapperTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
package org.elasticsearch.index.mapper;
2121

2222
import com.carrotsearch.randomizedtesting.annotations.Timeout;
23-
2423
import org.apache.lucene.index.DocValuesType;
2524
import org.apache.lucene.index.IndexableField;
2625
import org.elasticsearch.common.Strings;
@@ -32,6 +31,7 @@
3231
import org.elasticsearch.common.xcontent.XContentType;
3332
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
3433
import org.elasticsearch.index.mapper.NumberFieldTypeTests.OutOfRangeSpec;
34+
import org.elasticsearch.index.termvectors.TermVectorsService;
3535

3636
import java.io.ByteArrayInputStream;
3737
import java.io.IOException;
@@ -250,7 +250,7 @@ public void testIgnoreMalformed() throws Exception {
250250

251251
IndexableField[] fields = doc.rootDoc().getFields("field");
252252
assertEquals(0, fields.length);
253-
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
253+
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
254254
}
255255
}
256256
}

0 commit comments

Comments
 (0)