From b7a9dd0e61b570e5c16d73b93b7d1ad42db273ea Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Mon, 28 Jul 2025 09:42:10 -0700 Subject: [PATCH] Fix decoding of non-ascii field names in ignored source (#132018) When encoding an ignored source entry, we write the string length of the field name, not the encoded byte count; however, the decode logic treats this encoded value as the byte length. This patch updates the decode logic to instead properly treat the value as the string length. (cherry picked from commit 178c0c9ae47d28388687bcdcb035c792dd22a0e1) --- docs/changelog/132018.yaml | 5 +++++ .../index/mapper/IgnoredSourceFieldMapper.java | 8 ++++++-- .../index/mapper/IgnoredSourceFieldMapperTests.java | 11 +++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/132018.yaml diff --git a/docs/changelog/132018.yaml b/docs/changelog/132018.yaml new file mode 100644 index 0000000000000..9032707df8542 --- /dev/null +++ b/docs/changelog/132018.yaml @@ -0,0 +1,5 @@ +pr: 132018 +summary: Fix decoding of non-ascii field names in ignored source +area: Mapping +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index d8d8200baac31..812192d79cdce 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -181,8 +181,12 @@ static NameValue decode(Object field) { int encodedSize = ByteUtils.readIntLE(bytes, 0); int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET; int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET; - String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8); - BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4); + + String decoded = new String(bytes, 4, bytes.length - 4, StandardCharsets.UTF_8); + String name = decoded.substring(0, nameSize); + int nameByteCount = name.getBytes(StandardCharsets.UTF_8).length; + + BytesRef value = new BytesRef(bytes, 4 + nameByteCount, bytes.length - nameByteCount - 4); return new NameValue(name, parentOffset, value, null); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index 628b64de19bd1..a98ae5219fc44 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -10,12 +10,14 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.DirectoryReader; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.test.FieldMaskingReader; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; import org.hamcrest.Matchers; import java.io.IOException; @@ -122,6 +124,15 @@ public void testIgnoredString() throws IOException { ); } + public void testIgnoredStringFullUnicode() throws IOException { + String value = randomUnicodeOfCodepointLengthBetween(5, 20); + String fieldName = randomUnicodeOfCodepointLength(5); + + String expected = Strings.toString(JsonXContent.contentBuilder().startObject().field(fieldName, value).endObject()); + + assertEquals(expected, getSyntheticSourceWithFieldLimit(b -> b.field(fieldName, value))); + } + public void testIgnoredInt() throws IOException { int value = randomInt(); assertEquals("{\"my_value\":" + value + "}", getSyntheticSourceWithFieldLimit(b -> b.field("my_value", value)));