diff --git a/docs/changelog/143501.yaml b/docs/changelog/143501.yaml new file mode 100644 index 0000000000000..5922048cfe010 --- /dev/null +++ b/docs/changelog/143501.yaml @@ -0,0 +1,6 @@ +pr: 143501 +summary: Expose byte offsets on XContentParser via getCurrentLocation() +area: Infra/Core +type: enhancement +issues: + - 142873 diff --git a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java index af96e7a8ed34d..9aea0a44dc619 100644 --- a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java +++ b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java @@ -57,7 +57,7 @@ public void allowDuplicateKeys(boolean allowDuplicateKeys) { private static XContentLocation getLocation(JsonProcessingException e) { JsonLocation loc = e.getLocation(); if (loc != null) { - return new XContentLocation(loc.getLineNr(), loc.getColumnNr()); + return new XContentLocation(loc.getLineNr(), loc.getColumnNr(), loc.getByteOffset()); } else { return null; } @@ -311,7 +311,16 @@ public XContentLocation getTokenLocation() { if (loc == null) { return null; } - return new XContentLocation(loc.getLineNr(), loc.getColumnNr()); + return new XContentLocation(loc.getLineNr(), loc.getColumnNr(), loc.getByteOffset()); + } + + @Override + public XContentLocation getCurrentLocation() { + JsonLocation loc = parser.getCurrentLocation(); + if (loc == null) { + return null; + } + return new XContentLocation(loc.getLineNr(), loc.getColumnNr(), loc.getByteOffset()); } @Override diff --git a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/JsonXContentParserByteOffsetTests.java b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/JsonXContentParserByteOffsetTests.java new file mode 100644 index 0000000000000..bf960e1e0e047 --- /dev/null +++ b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/JsonXContentParserByteOffsetTests.java @@ -0,0 +1,604 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.xcontent.provider.json; + +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentLocation; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParser.Token; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +public class JsonXContentParserByteOffsetTests extends ESTestCase { + + public void testTokenLocationByteOffsets() throws IOException { + byte[] json = "{\"key\":\"val\"}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + // START_OBJECT at byte 0 + assertEquals(Token.START_OBJECT, parser.nextToken()); + XContentLocation loc = parser.getTokenLocation(); + assertEquals(0L, loc.byteOffset()); + + // FIELD_NAME "key" at byte 1 + assertEquals(Token.FIELD_NAME, parser.nextToken()); + loc = parser.getTokenLocation(); + assertEquals(1L, loc.byteOffset()); + + // VALUE_STRING "val" at byte 7 + assertEquals(Token.VALUE_STRING, parser.nextToken()); + loc = parser.getTokenLocation(); + assertEquals(7L, loc.byteOffset()); + + // END_OBJECT at byte 12 + assertEquals(Token.END_OBJECT, parser.nextToken()); + loc = parser.getTokenLocation(); + assertEquals(12L, loc.byteOffset()); + } + } + + public void testGetCurrentLocationAdvancesPastToken() throws IOException { + byte[] json = "{\"a\":1}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + XContentLocation current = parser.getCurrentLocation(); + // After consuming START_OBJECT '{', current location should be past it + assertTrue(current.byteOffset() > 0); + } + } + + public void testByteSlicingWithSkipChildren() throws IOException { + // The inner object {"b":2} starts at byte 5 and ends at byte 11 (inclusive) + byte[] json = "{\"a\":{\"b\":2},\"c\":3}".getBytes(StandardCharsets.UTF_8); + // 0123456789012345678 + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); // outer { + assertEquals(Token.FIELD_NAME, parser.nextToken()); // "a" + assertEquals(Token.START_OBJECT, parser.nextToken()); // inner { + + long startOffset = parser.getTokenLocation().byteOffset(); + assertEquals(5L, startOffset); + + parser.skipChildren(); + + long endOffset = parser.getCurrentLocation().byteOffset(); + // After skipChildren() from START_OBJECT, getCurrentLocation should point past '}' + assertTrue(endOffset > startOffset); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + String sliced = new String(slice, StandardCharsets.UTF_8); + assertEquals("{\"b\":2}", sliced); + } + } + + public void testByteSlicingArray() throws IOException { + byte[] json = "{\"a\":[1,2,3]}".getBytes(StandardCharsets.UTF_8); + // 0123456789012 + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.START_ARRAY, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + assertEquals(5L, startOffset); + + parser.skipChildren(); + + long endOffset = parser.getCurrentLocation().byteOffset(); + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + String sliced = new String(slice, StandardCharsets.UTF_8); + assertEquals("[1,2,3]", sliced); + } + } + + public void testNestedObjectByteSlicing() throws IOException { + byte[] json = "{\"a\":{\"b\":{\"c\":true}}}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); // outer + assertEquals(Token.FIELD_NAME, parser.nextToken()); // "a" + assertEquals(Token.START_OBJECT, parser.nextToken()); // {"b":{"c":true}} + + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + String sliced = new String(slice, StandardCharsets.UTF_8); + assertEquals("{\"b\":{\"c\":true}}", sliced); + } + } + + public void testGetCurrentLocationIsNotNull() throws IOException { + byte[] json = "{\"x\":42}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertNotNull(parser.getCurrentLocation()); + assertTrue(parser.getCurrentLocation().byteOffset() >= 0); + } + } + + public void testScalarValueSlicing() throws IOException { + byte[] json = "{\"n\":12345}".getBytes(StandardCharsets.UTF_8); + // 01234567890 + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.VALUE_NUMBER, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + long endOffset = parser.getCurrentLocation().byteOffset(); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + String sliced = new String(slice, StandardCharsets.UTF_8); + assertEquals("12345", sliced); + } + } + + public void testMultiByteUtf8ObjectSlicing() throws IOException { + // Multi-byte UTF-8: \u00e9 = 2 bytes (c3 a9), \u2603 = 3 bytes (e2 98 83) + // Verify byte offsets are counted in bytes, not characters + byte[] json = "{\"k\":{\"x\":\"\u00e9\u2603\"}}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.START_OBJECT, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + String sliced = new String(slice, StandardCharsets.UTF_8); + assertEquals("{\"x\":\"\u00e9\u2603\"}", sliced); + } + } + + public void testSurrogatePairObjectSlicing() throws IOException { + // U+1F389 (🎉) = 4 bytes in UTF-8 (f0 9f 8e 89), 2 chars in Java (surrogate pair) + // This is the case most likely to expose byte-vs-character offset confusion + byte[] json = "{\"k\":{\"x\":\"\uD83C\uDF89\"}}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.START_OBJECT, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + String sliced = new String(slice, StandardCharsets.UTF_8); + assertEquals("{\"x\":\"\uD83C\uDF89\"}", sliced); + } + } + + public void testEmptyObjectSlicing() throws IOException { + byte[] json = "{\"a\":{}}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.START_OBJECT, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + assertEquals("{}", new String(slice, StandardCharsets.UTF_8)); + } + } + + public void testEmptyArraySlicing() throws IOException { + byte[] json = "{\"a\":[]}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.START_ARRAY, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + assertEquals("[]", new String(slice, StandardCharsets.UTF_8)); + } + } + + public void testBooleanValueSlicing() throws IOException { + byte[] json = "{\"b\":false}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.VALUE_BOOLEAN, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + long endOffset = parser.getCurrentLocation().byteOffset(); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + assertEquals("false", new String(slice, StandardCharsets.UTF_8)); + } + } + + public void testNullValueSlicing() throws IOException { + byte[] json = "{\"n\":null}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.VALUE_NULL, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + long endOffset = parser.getCurrentLocation().byteOffset(); + + byte[] slice = Arrays.copyOfRange(json, (int) startOffset, (int) endOffset); + assertEquals("null", new String(slice, StandardCharsets.UTF_8)); + } + } + + public void testMultiLineJsonByteOffsets() throws IOException { + String multiLine = "{\n \"a\": 1,\n \"b\": 2\n}"; + byte[] json = multiLine.getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(0L, parser.getTokenLocation().byteOffset()); + + assertEquals(Token.FIELD_NAME, parser.nextToken()); // "a" + long aFieldOffset = parser.getTokenLocation().byteOffset(); + assertEquals(4L, aFieldOffset); // after "{\n " + + assertEquals(Token.VALUE_NUMBER, parser.nextToken()); // 1 + assertEquals(Token.FIELD_NAME, parser.nextToken()); // "b" + long bFieldOffset = parser.getTokenLocation().byteOffset(); + // "b" starts on line 3: "{\n \"a\": 1,\n " = 14 bytes + assertEquals(14L, bFieldOffset); + } + } + + public void testGetCurrentLocationAfterEveryTokenType() throws IOException { + byte[] json = "{\"s\":\"v\",\"n\":42,\"b\":true,\"z\":null,\"a\":[1],\"o\":{}}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + Token token; + while ((token = parser.nextToken()) != null) { + XContentLocation current = parser.getCurrentLocation(); + assertNotNull("getCurrentLocation() should not be null after " + token, current); + assertTrue( + "byte offset should be non-negative after " + token + " but was " + current.byteOffset(), + current.byteOffset() >= 0 + ); + } + } + } + + // ===== Streaming JSON navigation + byte-offset slicing ===== + // These tests validate end-to-end scenarios: navigate to a value by key or path using standard + // streaming token walking, then extract it via byte-offset slicing instead of copyCurrentStructure(). + + /** + * Navigate to an object field by key, skipping non-matching fields, then byte-slice the value. + */ + public void testExtractObjectByKeyByteSlice() throws IOException { + byte[] json = "{\"skip\":1,\"target\":{\"nested\":true},\"after\":2}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT (root) + + // Walk fields to find "target" + Token token; + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == Token.FIELD_NAME) { + String fieldName = parser.currentName(); + parser.nextToken(); // advance to value + if (fieldName.equals("target")) { + // Found it — byte-slice the object value + assertEquals(Token.START_OBJECT, parser.currentToken()); + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + String sliced = new String(Arrays.copyOfRange(json, (int) startOffset, (int) endOffset), StandardCharsets.UTF_8); + assertEquals("{\"nested\":true}", sliced); + return; + } else { + parser.skipChildren(); + } + } + } + fail("field 'target' not found"); + } + } + + /** + * Navigate to an array field by key, then byte-slice the entire array. + */ + public void testExtractArrayByKeyByteSlice() throws IOException { + byte[] json = "{\"x\":\"skip\",\"items\":[1,{\"a\":2},3]}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT (root) + + Token token; + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == Token.FIELD_NAME) { + String fieldName = parser.currentName(); + parser.nextToken(); + if (fieldName.equals("items")) { + assertEquals(Token.START_ARRAY, parser.currentToken()); + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + String sliced = new String(Arrays.copyOfRange(json, (int) startOffset, (int) endOffset), StandardCharsets.UTF_8); + assertEquals("[1,{\"a\":2},3]", sliced); + return; + } else { + parser.skipChildren(); + } + } + } + fail("field 'items' not found"); + } + } + + /** + * Navigate a nested object path (a → b → c), skipping non-matching fields at each level, then byte-slice. + */ + public void testExtractNestedPathByteSlice() throws IOException { + byte[] json = "{\"a\":{\"x\":0,\"b\":{\"c\":{\"deep\":42}}}}".getBytes(StandardCharsets.UTF_8); + String[] path = { "a", "b", "c" }; + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT (root) + + // Navigate path segments: at each level, walk fields to find the matching key + for (String segment : path) { + assertEquals(Token.START_OBJECT, parser.currentToken()); + boolean found = false; + Token token; + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == Token.FIELD_NAME) { + String fieldName = parser.currentName(); + parser.nextToken(); + if (fieldName.equals(segment)) { + found = true; + break; + } else { + parser.skipChildren(); + } + } + } + assertTrue("segment '" + segment + "' not found", found); + } + + // At the target: extract via byte slice + assertEquals(Token.START_OBJECT, parser.currentToken()); + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + String sliced = new String(Arrays.copyOfRange(json, (int) startOffset, (int) endOffset), StandardCharsets.UTF_8); + assertEquals("{\"deep\":42}", sliced); + } + } + + /** + * Walk an array to a specific index, skipping preceding elements, then byte-slice the target element. + */ + public void testExtractArrayIndexByteSlice() throws IOException { + byte[] json = "{\"arr\":[\"skip\",{\"target\":true},[3,4]]}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT + parser.nextToken(); // FIELD_NAME "arr" + parser.nextToken(); // START_ARRAY + + // Walk array to index 1 + int targetIndex = 1; + int currentIndex = 0; + while (parser.nextToken() != Token.END_ARRAY) { + if (currentIndex == targetIndex) { + assertEquals(Token.START_OBJECT, parser.currentToken()); + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + String sliced = new String(Arrays.copyOfRange(json, (int) startOffset, (int) endOffset), StandardCharsets.UTF_8); + assertEquals("{\"target\":true}", sliced); + return; + } + parser.skipChildren(); + currentIndex++; + } + fail("index " + targetIndex + " not found"); + } + } + + /** + * Navigate to scalar values by key and extract them using parser.text() / parser.booleanValue(). + * Numbers, booleans, and nulls can also be byte-sliced; strings cannot because Jackson's + * getCurrentLocation() for strings points past the opening quote, not the closing quote. + * This is fine — only objects and arrays benefit from byte slicing in practice. + */ + public void testExtractScalarsByKey() throws IOException { + byte[] json = "{\"s\":\"hello\",\"n\":42,\"b\":true,\"z\":null}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT + + Token token; + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == Token.FIELD_NAME) { + String fieldName = parser.currentName(); + parser.nextToken(); + + // Scalars: use parser.text() / parser.booleanValue() + switch (fieldName) { + case "s" -> { + assertEquals(Token.VALUE_STRING, parser.currentToken()); + assertEquals("hello", parser.text()); + } + case "n" -> { + assertEquals(Token.VALUE_NUMBER, parser.currentToken()); + assertEquals("42", parser.text()); + // Numbers can also be byte-sliced + long start = parser.getTokenLocation().byteOffset(); + long end = parser.getCurrentLocation().byteOffset(); + assertEquals("42", new String(Arrays.copyOfRange(json, (int) start, (int) end), StandardCharsets.UTF_8)); + } + case "b" -> { + assertEquals(Token.VALUE_BOOLEAN, parser.currentToken()); + assertTrue(parser.booleanValue()); + // Booleans can also be byte-sliced + long start = parser.getTokenLocation().byteOffset(); + long end = parser.getCurrentLocation().byteOffset(); + assertEquals("true", new String(Arrays.copyOfRange(json, (int) start, (int) end), StandardCharsets.UTF_8)); + } + case "z" -> { + assertEquals(Token.VALUE_NULL, parser.currentToken()); + // Nulls can also be byte-sliced + long start = parser.getTokenLocation().byteOffset(); + long end = parser.getCurrentLocation().byteOffset(); + assertEquals("null", new String(Arrays.copyOfRange(json, (int) start, (int) end), StandardCharsets.UTF_8)); + } + } + } + } + } + } + + /** + * Proves byte-offset slicing produces identical output to copyCurrentStructure(). + * Both approaches are run on the same input and their results compared. + */ + public void testByteSliceMatchesCopyCurrentStructure() throws IOException { + byte[] json = "{\"data\":{\"users\":[{\"name\":\"Alice\",\"age\":30},{\"name\":\"Bob\"}],\"count\":2}}".getBytes( + StandardCharsets.UTF_8 + ); + + // First pass: extract via copyCurrentStructure + String fromCopy; + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT + parser.nextToken(); // FIELD_NAME "data" + parser.nextToken(); // START_OBJECT (the value) + try (XContentBuilder builder = XContentBuilder.builder(XContentType.JSON.xContent())) { + builder.copyCurrentStructure(parser); + fromCopy = BytesReference.bytes(builder).utf8ToString(); + } + } + + // Second pass: extract via byte-offset slicing + String fromSlice; + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT + parser.nextToken(); // FIELD_NAME "data" + parser.nextToken(); // START_OBJECT (the value) + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + fromSlice = new String(Arrays.copyOfRange(json, (int) startOffset, (int) endOffset), StandardCharsets.UTF_8); + } + + assertEquals(fromCopy, fromSlice); + } + + /** + * Combined navigation: object key → array index → object key, then byte-slice. + * Validates that mixed object/array navigation produces correct byte ranges. + */ + public void testExtractNestedPathWithArrayIndex() throws IOException { + byte[] json = "{\"a\":[0,1,{\"k\":{\"found\":true}}]}".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT (root) + + // Step 1: find field "a" + while (parser.nextToken() != Token.END_OBJECT) { + if (parser.currentToken() == Token.FIELD_NAME && parser.currentName().equals("a")) { + parser.nextToken(); // START_ARRAY + break; + } + parser.nextToken(); + parser.skipChildren(); + } + assertEquals(Token.START_ARRAY, parser.currentToken()); + + // Step 2: walk array to index 2 + int currentIndex = 0; + while (parser.nextToken() != Token.END_ARRAY) { + if (currentIndex == 2) { + break; + } + parser.skipChildren(); + currentIndex++; + } + assertEquals(Token.START_OBJECT, parser.currentToken()); + + // Step 3: find field "k" + while (parser.nextToken() != Token.END_OBJECT) { + if (parser.currentToken() == Token.FIELD_NAME && parser.currentName().equals("k")) { + parser.nextToken(); // the value + break; + } + parser.nextToken(); + parser.skipChildren(); + } + assertEquals(Token.START_OBJECT, parser.currentToken()); + + // Extract via byte slice + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + String sliced = new String(Arrays.copyOfRange(json, (int) startOffset, (int) endOffset), StandardCharsets.UTF_8); + assertEquals("{\"found\":true}", sliced); + } + } + + /** + * Navigate and byte-slice from pretty-printed JSON. + * Byte offsets account for whitespace; the sliced result includes it verbatim. + */ + public void testExtractFromPrettyPrintedJson() throws IOException { + String pretty = "{\n \"info\": {\n \"version\": 1,\n \"name\": \"test\"\n }\n}"; + byte[] json = pretty.getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + parser.nextToken(); // START_OBJECT + + // Find field "info" + while (parser.nextToken() != Token.END_OBJECT) { + if (parser.currentToken() == Token.FIELD_NAME && parser.currentName().equals("info")) { + parser.nextToken(); + break; + } + parser.nextToken(); + parser.skipChildren(); + } + assertEquals(Token.START_OBJECT, parser.currentToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + parser.skipChildren(); + long endOffset = parser.getCurrentLocation().byteOffset(); + String sliced = new String(Arrays.copyOfRange(json, (int) startOffset, (int) endOffset), StandardCharsets.UTF_8); + + // The slice preserves original whitespace — still valid JSON, just formatted + // differently than what copyCurrentStructure() would produce + assertEquals("{\n \"version\": 1,\n \"name\": \"test\"\n }", sliced); + } + } + + public void testStringTokenLocationAtOpeningQuote() throws IOException { + byte[] json = "{\"s\":\"hello\"}".getBytes(StandardCharsets.UTF_8); + // 0123456789012 + try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + assertEquals(Token.START_OBJECT, parser.nextToken()); + assertEquals(Token.FIELD_NAME, parser.nextToken()); + assertEquals(Token.VALUE_STRING, parser.nextToken()); + + long startOffset = parser.getTokenLocation().byteOffset(); + // Token location for VALUE_STRING points to the opening quote + assertEquals(5L, startOffset); + assertEquals('"', (char) json[(int) startOffset]); + } + } + +} diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/FilterXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/FilterXContentParser.java index 4ff1185dc560c..9a9c33c1e11b9 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/FilterXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/FilterXContentParser.java @@ -228,6 +228,11 @@ public XContentLocation getTokenLocation() { return delegate().getTokenLocation(); } + @Override + public XContentLocation getCurrentLocation() { + return delegate().getCurrentLocation(); + } + @Override public T namedObject(Class categoryClass, String name, Object context) throws IOException { return delegate().namedObject(categoryClass, name, context); diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentLocation.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentLocation.java index b337e99fc4106..e731952f875e4 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentLocation.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentLocation.java @@ -14,10 +14,38 @@ * in some XContent e.g. JSON. Locations are typically used to communicate the * position of a parsing error to end users and consequently have line and * column numbers starting from 1. + * + *

The optional {@code byteOffset} field holds the absolute byte position + * within the source stream ({@code -1} when not available). Byte offsets are + * used for programmatic byte-range slicing and are not included in the + * human-readable {@link #toString()} output. */ -public record XContentLocation(int lineNumber, int columnNumber) { +public record XContentLocation(int lineNumber, int columnNumber, long byteOffset) { - public static final XContentLocation UNKNOWN = new XContentLocation(-1, -1); + public static final XContentLocation UNKNOWN = new XContentLocation(-1, -1, -1L); + + /** + * Backward-compatible constructor that sets {@code byteOffset} to {@code -1} + * (not available). + */ + public XContentLocation(int lineNumber, int columnNumber) { + this(lineNumber, columnNumber, -1L); + } + + /** Returns {@code true} if the line number is valid (1-based, so must be ≥ 1). */ + public boolean hasValidLineNumber() { + return lineNumber >= 1; + } + + /** Returns {@code true} if the column number is valid (1-based, so must be ≥ 1). */ + public boolean hasValidColumnNumber() { + return columnNumber >= 1; + } + + /** Returns {@code true} if the byte offset is available (non-negative). */ + public boolean hasValidByteOffset() { + return byteOffset >= 0; + } @Override public String toString() { diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParser.java index c8ca681722e23..1f334c3969108 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParser.java @@ -206,6 +206,30 @@ Map map(Supplier> mapFactory, CheckedFunctionFor scalar tokens (strings, numbers, booleans, null), {@code nextToken()} + * fully consumes the value, so this returns the position just past it. + * For structural tokens ({@code START_OBJECT}, {@code START_ARRAY}), + * only the opening delimiter has been consumed. + * + *

To get the byte range of an arbitrary value (scalar or composite), + * use the pattern: + *

{@code
+     * long start = parser.getTokenLocation().byteOffset();
+     * parser.skipChildren();  // no-op for scalars
+     * long end = parser.getCurrentLocation().byteOffset();
+     * }
+ * + * @return the current read position, or null if cannot be determined + * @see #getTokenLocation() + */ + XContentLocation getCurrentLocation(); + // TODO remove context entirely when it isn't needed /** * Parse an object by name. diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/MapXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/MapXContentParser.java index 8893ed7450c81..df85bb8716fb5 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/MapXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/MapXContentParser.java @@ -214,6 +214,11 @@ public XContentLocation getTokenLocation() { return new XContentLocation(0, 0); } + @Override + public XContentLocation getCurrentLocation() { + return new XContentLocation(0, 0); + } + @Override public boolean isClosed() { return closed; diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/MapXContentParserTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/MapXContentParserTests.java index ca72a504c3046..eab8c66a7950f 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/MapXContentParserTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/MapXContentParserTests.java @@ -131,6 +131,29 @@ public void testParseBooleanStringValue() throws IOException { } } + public void testLocationReturnsZeros() throws IOException { + try ( + MapXContentParser parser = new MapXContentParser( + xContentRegistry(), + LoggingDeprecationHandler.INSTANCE, + Map.of("key", "value"), + randomFrom(XContentType.values()) + ) + ) { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + XContentLocation tokenLoc = parser.getTokenLocation(); + assertEquals(0, tokenLoc.lineNumber()); + assertEquals(0, tokenLoc.columnNumber()); + assertFalse(tokenLoc.hasValidLineNumber()); + assertFalse(tokenLoc.hasValidColumnNumber()); + assertFalse(tokenLoc.hasValidByteOffset()); + XContentLocation currentLoc = parser.getCurrentLocation(); + assertEquals(0, currentLoc.lineNumber()); + assertEquals(0, currentLoc.columnNumber()); + assertFalse(currentLoc.hasValidByteOffset()); + } + } + private void compareTokens(CheckedConsumer consumer) throws IOException { for (XContentType xContentType : EnumSet.allOf(XContentType.class)) { logger.info("--> testing with xcontent type: {}", xContentType); diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentLocationTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentLocationTests.java new file mode 100644 index 0000000000000..3a0c63a6bc869 --- /dev/null +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentLocationTests.java @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.xcontent; + +import org.elasticsearch.test.ESTestCase; + +public class XContentLocationTests extends ESTestCase { + + public void testTwoArgConstructorDefaultsByteOffset() { + XContentLocation loc = new XContentLocation(1, 5); + assertEquals(1, loc.lineNumber()); + assertEquals(5, loc.columnNumber()); + assertEquals(-1L, loc.byteOffset()); + } + + public void testThreeArgConstructorPreservesAllFields() { + XContentLocation loc = new XContentLocation(3, 10, 42L); + assertEquals(3, loc.lineNumber()); + assertEquals(10, loc.columnNumber()); + assertEquals(42L, loc.byteOffset()); + } + + public void testUnknownHasByteOffsetMinusOne() { + assertEquals(-1, XContentLocation.UNKNOWN.lineNumber()); + assertEquals(-1, XContentLocation.UNKNOWN.columnNumber()); + assertEquals(-1L, XContentLocation.UNKNOWN.byteOffset()); + } + + public void testEqualityIncludesByteOffset() { + XContentLocation a = new XContentLocation(1, 1, 0L); + XContentLocation b = new XContentLocation(1, 1, 99L); + XContentLocation c = new XContentLocation(1, 1, 0L); + assertNotEquals(a, b); + assertEquals(a, c); + assertEquals(a.hashCode(), c.hashCode()); + } + + public void testHasValidLineNumber() { + assertTrue(new XContentLocation(1, 5, 0L).hasValidLineNumber()); + assertTrue(new XContentLocation(100, 1).hasValidLineNumber()); + assertFalse(new XContentLocation(0, 1).hasValidLineNumber()); + assertFalse(new XContentLocation(-1, 1).hasValidLineNumber()); + assertFalse(XContentLocation.UNKNOWN.hasValidLineNumber()); + } + + public void testHasValidColumnNumber() { + assertTrue(new XContentLocation(1, 1, 0L).hasValidColumnNumber()); + assertTrue(new XContentLocation(1, 99).hasValidColumnNumber()); + assertFalse(new XContentLocation(1, 0).hasValidColumnNumber()); + assertFalse(new XContentLocation(1, -1).hasValidColumnNumber()); + assertFalse(XContentLocation.UNKNOWN.hasValidColumnNumber()); + } + + public void testHasValidByteOffset() { + assertTrue(new XContentLocation(1, 1, 0L).hasValidByteOffset()); + assertTrue(new XContentLocation(1, 1, 999L).hasValidByteOffset()); + assertFalse(new XContentLocation(1, 1).hasValidByteOffset()); + assertFalse(new XContentLocation(1, 1, -1L).hasValidByteOffset()); + assertFalse(XContentLocation.UNKNOWN.hasValidByteOffset()); + } + + public void testToStringOmitsByteOffset() { + XContentLocation loc = new XContentLocation(5, 12, 100L); + assertEquals("5:12", loc.toString()); + } +} diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java index 5aa3b1e140074..0d485837af500 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java @@ -670,6 +670,80 @@ public void testJsonIncludeSourceOnParserError() throws IOException { assertThat(parseException.getMessage(), not(containsString(source))); } + public void testYamlTokenLocationReturnsMinusOneByteOffset() throws IOException { + byte[] yaml = "key: value\n".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.YAML.xContent().createParser(XContentParserConfiguration.EMPTY, yaml)) { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + assertEquals(-1L, parser.getTokenLocation().byteOffset()); + } + } + + public void testYamlGetCurrentLocationReturnsMinusOneByteOffset() throws IOException { + byte[] yaml = "key: value\n".getBytes(StandardCharsets.UTF_8); + try (XContentParser parser = XContentType.YAML.xContent().createParser(XContentParserConfiguration.EMPTY, yaml)) { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + XContentLocation current = parser.getCurrentLocation(); + assertNotNull(current); + assertEquals(-1L, current.byteOffset()); + } + } + + public void testCborHasByteOffsets() throws IOException { + byte[] json = "{\"k\":1}".getBytes(StandardCharsets.UTF_8); + byte[] cbor; + try (var builder = XContentBuilder.builder(XContentType.CBOR.xContent())) { + try (XContentParser jsonParser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + builder.copyCurrentStructure(jsonParser); + } + cbor = BytesReference.bytes(builder).toBytesRef().bytes; + } + try (XContentParser parser = XContentType.CBOR.xContent().createParser(XContentParserConfiguration.EMPTY, cbor)) { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + XContentLocation tokenLoc = parser.getTokenLocation(); + assertTrue(tokenLoc.byteOffset() >= 0); + + XContentLocation currentLoc = parser.getCurrentLocation(); + assertNotNull(currentLoc); + assertTrue(currentLoc.byteOffset() > tokenLoc.byteOffset()); + } + } + + public void testSmileHasByteOffsets() throws IOException { + byte[] json = "{\"k\":1}".getBytes(StandardCharsets.UTF_8); + byte[] smile; + try (var builder = XContentBuilder.builder(XContentType.SMILE.xContent())) { + try (XContentParser jsonParser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + builder.copyCurrentStructure(jsonParser); + } + smile = BytesReference.bytes(builder).toBytesRef().bytes; + } + try (XContentParser parser = XContentType.SMILE.xContent().createParser(XContentParserConfiguration.EMPTY, smile)) { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + XContentLocation tokenLoc = parser.getTokenLocation(); + assertTrue(tokenLoc.byteOffset() >= 0); + + XContentLocation currentLoc = parser.getCurrentLocation(); + assertNotNull(currentLoc); + assertTrue(currentLoc.byteOffset() > tokenLoc.byteOffset()); + } + } + + public void testFilterXContentParserDelegatesGetCurrentLocation() throws IOException { + byte[] json = "{\"a\":1}".getBytes(StandardCharsets.UTF_8); + try (XContentParser inner = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) { + XContentParser wrapper = new FilterXContentParserWrapper(inner); + assertEquals(XContentParser.Token.START_OBJECT, wrapper.nextToken()); + + XContentLocation tokenLoc = wrapper.getTokenLocation(); + assertEquals(inner.getTokenLocation(), tokenLoc); + assertEquals(0L, tokenLoc.byteOffset()); + + XContentLocation currentLoc = wrapper.getCurrentLocation(); + assertEquals(inner.getCurrentLocation(), currentLoc); + assertTrue(currentLoc.byteOffset() > 0); + } + } + private XContentParser createParser(XContent xContent, XContentParserConfiguration config, String content) throws IOException { return randomBoolean() ? xContent.createParser(config, content) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java index bd7cd95ac5c50..5abc9cd18eac1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java @@ -761,9 +761,15 @@ public String currentName() throws IOException { @Override public XContentLocation getTokenLocation() { - // return fixed token location: it's not possible to match the token location while parsing through the object structure, + // return fixed location: it's not possible to match the real location while parsing through the object structure, // because completion metadata have been rewritten hence they won't match the incoming document return locationOffset; } + + @Override + public XContentLocation getCurrentLocation() { + // same as getTokenLocation() — real positions are not available for rewritten metadata + return locationOffset; + } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java index 59205eef9b2d1..533ad28420adc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java @@ -339,6 +339,14 @@ public XContentLocation getTokenLocation() { return currentLocation; } + @Override + public XContentLocation getCurrentLocation() { + if (state == State.PARSING_ORIGINAL_CONTENT) { + return super.getCurrentLocation(); + } + return currentLocation; + } + @Override public Token currentToken() { return switch (state) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java index a4f86676a597a..0428e37698190 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java @@ -890,6 +890,7 @@ public void testMultiFieldParserSimpleValue() throws IOException { // we don't check currentToken here because it returns START_OBJECT that is inconsistent with returning a value assertEquals("text", multiFieldParser.textOrNull()); assertEquals(documentParser.getTokenLocation(), multiFieldParser.getTokenLocation()); + assertEquals(documentParser.getTokenLocation(), multiFieldParser.getCurrentLocation()); assertEquals(documentParser.currentName(), multiFieldParser.currentName()); } @@ -918,6 +919,7 @@ public void testMultiFieldParserCompletionSubfield() throws IOException { assertEquals(expectedParser.currentToken(), token); assertEquals(expectedParser.currentToken(), multiFieldParser.currentToken()); assertEquals(expectedTokenLocation, multiFieldParser.getTokenLocation()); + assertEquals(expectedTokenLocation, multiFieldParser.getCurrentLocation()); assertEquals(documentParser.nextToken(), multiFieldParser.currentToken()); assertEquals(documentParser.currentName(), multiFieldParser.currentName()); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DotExpandingXContentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DotExpandingXContentParserTests.java index b38c65c1710d6..53cf7d76cbadc 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DotExpandingXContentParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DotExpandingXContentParserTests.java @@ -450,4 +450,82 @@ public void testParseList() throws Exception { assertEquals("one", list.get(0)); assertEquals("two", list.get(1)); } + + public void testGetCurrentLocation() throws IOException { + // Mirrors testGetTokenLocation structure. getCurrentLocation() tracks the parser cursor, + // not the token start. For synthetic tokens (expanded dots), it returns the cached location + // (same as getTokenLocation). For original content tokens, it delegates to the underlying parser. + String jsonInput = """ + {"first.dot":{"second.dot":"value", + "value":null}}\ + """; + XContentParser expectedParser = createParser(JsonXContent.jsonXContent, jsonInput); + XContentParser dotExpandedParser = DotExpandingXContentParser.expandDots( + createParser(JsonXContent.jsonXContent, jsonInput), + new ContentPath() + ); + + assertNotNull(dotExpandedParser.getCurrentLocation()); + // START_OBJECT - original content + assertEquals(XContentParser.Token.START_OBJECT, dotExpandedParser.nextToken()); + assertEquals(XContentParser.Token.START_OBJECT, expectedParser.nextToken()); + assertEquals(expectedParser.getCurrentLocation(), dotExpandedParser.getCurrentLocation()); + // FIELD_NAME "first" - synthetic, getCurrentLocation == getTokenLocation (cached) + assertEquals(XContentParser.Token.FIELD_NAME, expectedParser.nextToken()); + assertEquals(XContentParser.Token.FIELD_NAME, dotExpandedParser.nextToken()); + assertEquals("first", dotExpandedParser.currentName()); + assertEquals(dotExpandedParser.getTokenLocation(), dotExpandedParser.getCurrentLocation()); + // START_OBJECT - synthetic + assertEquals(XContentParser.Token.START_OBJECT, dotExpandedParser.nextToken()); + assertEquals(dotExpandedParser.getTokenLocation(), dotExpandedParser.getCurrentLocation()); + // FIELD_NAME "dot" - synthetic + assertEquals(XContentParser.Token.FIELD_NAME, dotExpandedParser.nextToken()); + assertEquals("dot", dotExpandedParser.currentName()); + assertEquals(dotExpandedParser.getTokenLocation(), dotExpandedParser.getCurrentLocation()); + // START_OBJECT - original content + assertEquals(XContentParser.Token.START_OBJECT, expectedParser.nextToken()); + assertEquals(XContentParser.Token.START_OBJECT, dotExpandedParser.nextToken()); + assertEquals(expectedParser.getCurrentLocation(), dotExpandedParser.getCurrentLocation()); + // FIELD_NAME "second" - synthetic + assertEquals(XContentParser.Token.FIELD_NAME, expectedParser.nextToken()); + assertEquals(XContentParser.Token.FIELD_NAME, dotExpandedParser.nextToken()); + assertEquals("second", dotExpandedParser.currentName()); + assertEquals(dotExpandedParser.getTokenLocation(), dotExpandedParser.getCurrentLocation()); + // START_OBJECT - synthetic + assertEquals(XContentParser.Token.START_OBJECT, dotExpandedParser.nextToken()); + assertEquals(dotExpandedParser.getTokenLocation(), dotExpandedParser.getCurrentLocation()); + // FIELD_NAME "dot" - synthetic + assertEquals(XContentParser.Token.FIELD_NAME, dotExpandedParser.nextToken()); + assertEquals("dot", dotExpandedParser.currentName()); + assertEquals(dotExpandedParser.getTokenLocation(), dotExpandedParser.getCurrentLocation()); + // VALUE_STRING "value" - original content + assertEquals(XContentParser.Token.VALUE_STRING, expectedParser.nextToken()); + assertEquals(XContentParser.Token.VALUE_STRING, dotExpandedParser.nextToken()); + assertEquals(expectedParser.getCurrentLocation(), dotExpandedParser.getCurrentLocation()); + // END_OBJECT - synthetic + assertEquals(XContentParser.Token.END_OBJECT, dotExpandedParser.nextToken()); + assertEquals(dotExpandedParser.getTokenLocation(), dotExpandedParser.getCurrentLocation()); + // FIELD_NAME "value" - original content + assertEquals(XContentParser.Token.FIELD_NAME, expectedParser.nextToken()); + assertEquals(XContentParser.Token.FIELD_NAME, dotExpandedParser.nextToken()); + assertEquals("value", dotExpandedParser.currentName()); + assertEquals(expectedParser.getCurrentLocation(), dotExpandedParser.getCurrentLocation()); + // VALUE_NULL - original content + assertEquals(XContentParser.Token.VALUE_NULL, expectedParser.nextToken()); + assertEquals(XContentParser.Token.VALUE_NULL, dotExpandedParser.nextToken()); + assertEquals(expectedParser.getCurrentLocation(), dotExpandedParser.getCurrentLocation()); + // END_OBJECT - original content (inner `}`) + assertEquals(XContentParser.Token.END_OBJECT, dotExpandedParser.nextToken()); + assertEquals(XContentParser.Token.END_OBJECT, expectedParser.nextToken()); + assertEquals(expectedParser.getCurrentLocation(), dotExpandedParser.getCurrentLocation()); + // END_OBJECT - synthetic (closing "first.dot" expansion) + assertEquals(XContentParser.Token.END_OBJECT, dotExpandedParser.nextToken()); + assertEquals(dotExpandedParser.getTokenLocation(), dotExpandedParser.getCurrentLocation()); + // END_OBJECT - original content (outer `}`) + assertEquals(XContentParser.Token.END_OBJECT, dotExpandedParser.nextToken()); + assertEquals(XContentParser.Token.END_OBJECT, expectedParser.nextToken()); + assertEquals(expectedParser.getCurrentLocation(), dotExpandedParser.getCurrentLocation()); + assertNull(dotExpandedParser.nextToken()); + assertNull(expectedParser.nextToken()); + } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldMapperTests.java index 72f7e0b05e685..5bdbf5eed3950 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldMapperTests.java @@ -436,6 +436,8 @@ public void testGeoHashMultiFieldParser() throws IOException { assertEquals(expectedParser.currentToken(), parser.currentToken()); assertEquals(expectedParser.currentName(), parser.currentName()); assertEquals(expectedParser.getTokenLocation(), parser.getTokenLocation()); + // getCurrentLocation() delegates to docParser, whose cursor differs from expectedParser's + assertEquals(docParser.getCurrentLocation(), parser.getCurrentLocation()); assertEquals(expectedParser.textOrNull(), parser.textOrNull()); expectThrows(UnsupportedOperationException.class, parser::nextToken); } diff --git a/test/yaml-rest-runner/src/main/java/org/elasticsearch/test/rest/yaml/ParameterizableYamlXContentParser.java b/test/yaml-rest-runner/src/main/java/org/elasticsearch/test/rest/yaml/ParameterizableYamlXContentParser.java index 786953b505f9e..7669aa3425c76 100644 --- a/test/yaml-rest-runner/src/main/java/org/elasticsearch/test/rest/yaml/ParameterizableYamlXContentParser.java +++ b/test/yaml-rest-runner/src/main/java/org/elasticsearch/test/rest/yaml/ParameterizableYamlXContentParser.java @@ -274,6 +274,11 @@ public XContentLocation getTokenLocation() { return delegate.getTokenLocation(); } + @Override + public XContentLocation getCurrentLocation() { + return delegate.getCurrentLocation(); + } + @Override public T namedObject(Class categoryClass, String name, Object context) throws IOException { return getXContentRegistry().parseNamedObject(categoryClass, name, this, context);