From 0ccbd9fefbc5995821b5ed554586fe20a1194ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Tue, 2 Nov 2021 19:48:13 +0100 Subject: [PATCH] Better error message for long keys in flattened fields --- .../flattened/FlattenedFieldParser.java | 29 ++++++++-- .../flattened/FlattenedFieldMapperTests.java | 58 ++++++++++++++++++- 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java index b92867dc8264b..5f65bb8b7bdbf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java @@ -11,6 +11,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.xcontent.XContentParserUtils; @@ -131,15 +132,33 @@ private void addField(ContentPath path, String currentName, String value, List IndexWriter.MAX_TERM_LENGTH) { + String msg = "Flattened field [" + + rootFieldName + + "] contains one immense field" + + " whose keyed encoding is longer than the allowed max length of " + + IndexWriter.MAX_TERM_LENGTH + + " bytes. Key length: " + + key.length() + + ", value length: " + + value.length() + + " for key starting with [" + + key.substring(0, Math.min(key.length(), 50)) + + "]"; + throw new IllegalArgumentException(msg); + } + BytesRef bytesValue = new BytesRef(value); if (fieldType.isSearchable()) { - fields.add(new StringField(rootFieldName, new BytesRef(value), Field.Store.NO)); - fields.add(new StringField(keyedFieldName, new BytesRef(keyedValue), Field.Store.NO)); + fields.add(new StringField(rootFieldName, bytesValue, Field.Store.NO)); + fields.add(new StringField(keyedFieldName, bytesKeyedValue, Field.Store.NO)); } if (fieldType.hasDocValues()) { - fields.add(new SortedSetDocValuesField(rootFieldName, new BytesRef(value))); - fields.add(new SortedSetDocValuesField(keyedFieldName, new BytesRef(keyedValue))); + fields.add(new SortedSetDocValuesField(rootFieldName, bytesValue)); + fields.add(new SortedSetDocValuesField(keyedFieldName, bytesKeyedValue)); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java index 3123e75bef633..2e303d957cbe9 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java @@ -262,7 +262,8 @@ public void testEagerGlobalOrdinals() throws IOException { public void testIgnoreAbove() throws IOException { // First verify the default behavior when ignore_above is not set. - DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping)); + DocumentMapper mapper = mapperService.documentMapper(); ParsedDocument parsedDoc = mapper.parse(source(b -> { b.startArray("field"); @@ -280,15 +281,66 @@ public void testIgnoreAbove() throws IOException { b.field("ignore_above", 10); })); - ParsedDocument newParsedDoc = newMapper.parse(source(b -> { + parsedDoc = newMapper.parse(source(b -> { b.startArray("field"); { b.startObject().field("key", "a longer then usual value").endObject(); } b.endArray(); })); - IndexableField[] newFields = newParsedDoc.rootDoc().getFields("field"); + IndexableField[] newFields = parsedDoc.rootDoc().getFields("field"); assertEquals(0, newFields.length); + + // using a key bigger than ignore_above should not prevent the field from being indexed, although we store key:value pairs + parsedDoc = newMapper.parse(source(b -> { + b.startArray("field"); + { + b.startObject().field("key_longer_than_10chars", "value").endObject(); + } + b.endArray(); + })); + newFields = parsedDoc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + } + + /** + * using a key:value pair above the Lucene term length limit would throw an error on indexing + * that we pre-empt with a nices exception + */ + public void testImmenseKeyedTermException() throws IOException { + DocumentMapper newMapper = createDocumentMapper(fieldMapping(b -> { b.field("type", "flattened"); })); + + String longKey = "x".repeat(32800); + MapperParsingException ex = expectThrows(MapperParsingException.class, () -> newMapper.parse(source(b -> { + b.startArray("field"); + { + b.startObject().field(longKey, "value").endObject(); + } + b.endArray(); + }))); + assertEquals( + "Flattened field [field] contains one immense field whose keyed encoding is longer " + + "than the allowed max length of 32766 bytes. Key length: " + + longKey.length() + + ", value length: 5 for key starting with [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]", + ex.getCause().getMessage() + ); + + String value = "x".repeat(32800); + ex = expectThrows(MapperParsingException.class, () -> newMapper.parse(source(b -> { + b.startArray("field"); + { + b.startObject().field("key", value).endObject(); + } + b.endArray(); + }))); + assertEquals( + "Flattened field [field] contains one immense field whose keyed encoding is longer " + + "than the allowed max length of 32766 bytes. Key length: 3, value length: " + + value.length() + + " for key starting with [key]", + ex.getCause().getMessage() + ); } public void testNullValues() throws Exception {