Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/143501.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 143501
summary: Expose byte offsets on XContentParser via getCurrentLocation()
area: Infra/Core
type: enhancement
issues:
- 142873
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public void allowDuplicateKeys(boolean allowDuplicateKeys) {
private static XContentLocation getLocation(JsonProcessingException e) {
JsonLocation loc = e.getLocation();
if (loc != null) {
return new XContentLocation(loc.getLineNr(), loc.getColumnNr());
return new XContentLocation(loc.getLineNr(), loc.getColumnNr(), loc.getByteOffset());
} else {
return null;
}
Expand Down Expand Up @@ -311,7 +311,16 @@ public XContentLocation getTokenLocation() {
if (loc == null) {
return null;
}
return new XContentLocation(loc.getLineNr(), loc.getColumnNr());
return new XContentLocation(loc.getLineNr(), loc.getColumnNr(), loc.getByteOffset());
}

@Override
public XContentLocation getCurrentLocation() {
JsonLocation loc = parser.getCurrentLocation();
if (loc == null) {
return null;
}
return new XContentLocation(loc.getLineNr(), loc.getColumnNr(), loc.getByteOffset());
}

@Override
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ public XContentLocation getTokenLocation() {
return delegate().getTokenLocation();
}

@Override
public XContentLocation getCurrentLocation() {
return delegate().getCurrentLocation();
}

@Override
public <T> T namedObject(Class<T> categoryClass, String name, Object context) throws IOException {
return delegate().namedObject(categoryClass, name, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,38 @@
* in some XContent e.g. JSON. Locations are typically used to communicate the
* position of a parsing error to end users and consequently have line and
* column numbers starting from 1.
*
* <p>The optional {@code byteOffset} field holds the absolute byte position
* within the source stream ({@code -1} when not available). Byte offsets are
* used for programmatic byte-range slicing and are not included in the
* human-readable {@link #toString()} output.
*/
public record XContentLocation(int lineNumber, int columnNumber) {
public record XContentLocation(int lineNumber, int columnNumber, long byteOffset) {

public static final XContentLocation UNKNOWN = new XContentLocation(-1, -1);
public static final XContentLocation UNKNOWN = new XContentLocation(-1, -1, -1L);

/**
* Backward-compatible constructor that sets {@code byteOffset} to {@code -1}
* (not available).
*/
public XContentLocation(int lineNumber, int columnNumber) {
this(lineNumber, columnNumber, -1L);
}

/** Returns {@code true} if the line number is valid (1-based, so must be &ge; 1). */
public boolean hasValidLineNumber() {
return lineNumber >= 1;
}

/** Returns {@code true} if the column number is valid (1-based, so must be &ge; 1). */
public boolean hasValidColumnNumber() {
return columnNumber >= 1;
}

/** Returns {@code true} if the byte offset is available (non-negative). */
public boolean hasValidByteOffset() {
return byteOffset >= 0;
}

@Override
public String toString() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,30 @@ <T> Map<String, T> map(Supplier<Map<String, T>> mapFactory, CheckedFunction<XCon
*/
XContentLocation getTokenLocation();

/**
* Returns the location of the last processed input unit (byte or character).
* This tracks the parser's current read position — how far it has consumed
* into the underlying stream — not necessarily the end of the current value.
* The semantics match Jackson's {@code JsonParser.currentLocation()}.
*
* <p>For scalar tokens (strings, numbers, booleans, null), {@code nextToken()}
* fully consumes the value, so this returns the position just past it.
* For structural tokens ({@code START_OBJECT}, {@code START_ARRAY}),
* only the opening delimiter has been consumed.
*
* <p>To get the byte range of an arbitrary value (scalar or composite),
* use the pattern:
* <pre>{@code
* long start = parser.getTokenLocation().byteOffset();
* parser.skipChildren(); // no-op for scalars
* long end = parser.getCurrentLocation().byteOffset();
* }</pre>
*
* @return the current read position, or null if cannot be determined
* @see #getTokenLocation()
*/
XContentLocation getCurrentLocation();

// TODO remove context entirely when it isn't needed
/**
* Parse an object by name.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,11 @@ public XContentLocation getTokenLocation() {
return new XContentLocation(0, 0);
}

@Override
public XContentLocation getCurrentLocation() {
return new XContentLocation(0, 0);
}

@Override
public boolean isClosed() {
return closed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,29 @@ public void testParseBooleanStringValue() throws IOException {
}
}

public void testLocationReturnsZeros() throws IOException {
try (
MapXContentParser parser = new MapXContentParser(
xContentRegistry(),
LoggingDeprecationHandler.INSTANCE,
Map.of("key", "value"),
randomFrom(XContentType.values())
)
) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
XContentLocation tokenLoc = parser.getTokenLocation();
assertEquals(0, tokenLoc.lineNumber());
assertEquals(0, tokenLoc.columnNumber());
assertFalse(tokenLoc.hasValidLineNumber());
assertFalse(tokenLoc.hasValidColumnNumber());
assertFalse(tokenLoc.hasValidByteOffset());
XContentLocation currentLoc = parser.getCurrentLocation();
assertEquals(0, currentLoc.lineNumber());
assertEquals(0, currentLoc.columnNumber());
assertFalse(currentLoc.hasValidByteOffset());
}
}

private void compareTokens(CheckedConsumer<XContentBuilder, IOException> consumer) throws IOException {
for (XContentType xContentType : EnumSet.allOf(XContentType.class)) {
logger.info("--> testing with xcontent type: {}", xContentType);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xcontent;

import org.elasticsearch.test.ESTestCase;

public class XContentLocationTests extends ESTestCase {

public void testTwoArgConstructorDefaultsByteOffset() {
XContentLocation loc = new XContentLocation(1, 5);
assertEquals(1, loc.lineNumber());
assertEquals(5, loc.columnNumber());
assertEquals(-1L, loc.byteOffset());
}

public void testThreeArgConstructorPreservesAllFields() {
XContentLocation loc = new XContentLocation(3, 10, 42L);
assertEquals(3, loc.lineNumber());
assertEquals(10, loc.columnNumber());
assertEquals(42L, loc.byteOffset());
}

public void testUnknownHasByteOffsetMinusOne() {
assertEquals(-1, XContentLocation.UNKNOWN.lineNumber());
assertEquals(-1, XContentLocation.UNKNOWN.columnNumber());
assertEquals(-1L, XContentLocation.UNKNOWN.byteOffset());
}

public void testEqualityIncludesByteOffset() {
XContentLocation a = new XContentLocation(1, 1, 0L);
XContentLocation b = new XContentLocation(1, 1, 99L);
XContentLocation c = new XContentLocation(1, 1, 0L);
assertNotEquals(a, b);
assertEquals(a, c);
assertEquals(a.hashCode(), c.hashCode());
}

public void testHasValidLineNumber() {
assertTrue(new XContentLocation(1, 5, 0L).hasValidLineNumber());
assertTrue(new XContentLocation(100, 1).hasValidLineNumber());
assertFalse(new XContentLocation(0, 1).hasValidLineNumber());
assertFalse(new XContentLocation(-1, 1).hasValidLineNumber());
assertFalse(XContentLocation.UNKNOWN.hasValidLineNumber());
}

public void testHasValidColumnNumber() {
assertTrue(new XContentLocation(1, 1, 0L).hasValidColumnNumber());
assertTrue(new XContentLocation(1, 99).hasValidColumnNumber());
assertFalse(new XContentLocation(1, 0).hasValidColumnNumber());
assertFalse(new XContentLocation(1, -1).hasValidColumnNumber());
assertFalse(XContentLocation.UNKNOWN.hasValidColumnNumber());
}

public void testHasValidByteOffset() {
assertTrue(new XContentLocation(1, 1, 0L).hasValidByteOffset());
assertTrue(new XContentLocation(1, 1, 999L).hasValidByteOffset());
assertFalse(new XContentLocation(1, 1).hasValidByteOffset());
assertFalse(new XContentLocation(1, 1, -1L).hasValidByteOffset());
assertFalse(XContentLocation.UNKNOWN.hasValidByteOffset());
}

public void testToStringOmitsByteOffset() {
XContentLocation loc = new XContentLocation(5, 12, 100L);
assertEquals("5:12", loc.toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,80 @@ public void testJsonIncludeSourceOnParserError() throws IOException {
assertThat(parseException.getMessage(), not(containsString(source)));
}

public void testYamlTokenLocationReturnsMinusOneByteOffset() throws IOException {
byte[] yaml = "key: value\n".getBytes(StandardCharsets.UTF_8);
try (XContentParser parser = XContentType.YAML.xContent().createParser(XContentParserConfiguration.EMPTY, yaml)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(-1L, parser.getTokenLocation().byteOffset());
}
}

public void testYamlGetCurrentLocationReturnsMinusOneByteOffset() throws IOException {
byte[] yaml = "key: value\n".getBytes(StandardCharsets.UTF_8);
try (XContentParser parser = XContentType.YAML.xContent().createParser(XContentParserConfiguration.EMPTY, yaml)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
XContentLocation current = parser.getCurrentLocation();
assertNotNull(current);
assertEquals(-1L, current.byteOffset());
}
}

public void testCborHasByteOffsets() throws IOException {
byte[] json = "{\"k\":1}".getBytes(StandardCharsets.UTF_8);
byte[] cbor;
try (var builder = XContentBuilder.builder(XContentType.CBOR.xContent())) {
try (XContentParser jsonParser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) {
builder.copyCurrentStructure(jsonParser);
}
cbor = BytesReference.bytes(builder).toBytesRef().bytes;
}
try (XContentParser parser = XContentType.CBOR.xContent().createParser(XContentParserConfiguration.EMPTY, cbor)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
XContentLocation tokenLoc = parser.getTokenLocation();
assertTrue(tokenLoc.byteOffset() >= 0);

XContentLocation currentLoc = parser.getCurrentLocation();
assertNotNull(currentLoc);
assertTrue(currentLoc.byteOffset() > tokenLoc.byteOffset());
}
}

public void testSmileHasByteOffsets() throws IOException {
byte[] json = "{\"k\":1}".getBytes(StandardCharsets.UTF_8);
byte[] smile;
try (var builder = XContentBuilder.builder(XContentType.SMILE.xContent())) {
try (XContentParser jsonParser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) {
builder.copyCurrentStructure(jsonParser);
}
smile = BytesReference.bytes(builder).toBytesRef().bytes;
}
try (XContentParser parser = XContentType.SMILE.xContent().createParser(XContentParserConfiguration.EMPTY, smile)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
XContentLocation tokenLoc = parser.getTokenLocation();
assertTrue(tokenLoc.byteOffset() >= 0);

XContentLocation currentLoc = parser.getCurrentLocation();
assertNotNull(currentLoc);
assertTrue(currentLoc.byteOffset() > tokenLoc.byteOffset());
}
}

public void testFilterXContentParserDelegatesGetCurrentLocation() throws IOException {
byte[] json = "{\"a\":1}".getBytes(StandardCharsets.UTF_8);
try (XContentParser inner = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, json)) {
XContentParser wrapper = new FilterXContentParserWrapper(inner);
assertEquals(XContentParser.Token.START_OBJECT, wrapper.nextToken());

XContentLocation tokenLoc = wrapper.getTokenLocation();
assertEquals(inner.getTokenLocation(), tokenLoc);
assertEquals(0L, tokenLoc.byteOffset());

XContentLocation currentLoc = wrapper.getCurrentLocation();
assertEquals(inner.getCurrentLocation(), currentLoc);
assertTrue(currentLoc.byteOffset() > 0);
}
}

private XContentParser createParser(XContent xContent, XContentParserConfiguration config, String content) throws IOException {
return randomBoolean()
? xContent.createParser(config, content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -761,9 +761,15 @@ public String currentName() throws IOException {

@Override
public XContentLocation getTokenLocation() {
// return fixed token location: it's not possible to match the token location while parsing through the object structure,
// return fixed location: it's not possible to match the real location while parsing through the object structure,
// because completion metadata have been rewritten hence they won't match the incoming document
return locationOffset;
}

@Override
public XContentLocation getCurrentLocation() {
// same as getTokenLocation() — real positions are not available for rewritten metadata
return locationOffset;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,14 @@ public XContentLocation getTokenLocation() {
return currentLocation;
}

@Override
public XContentLocation getCurrentLocation() {
if (state == State.PARSING_ORIGINAL_CONTENT) {
return super.getCurrentLocation();
}
return currentLocation;
}

@Override
public Token currentToken() {
return switch (state) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,7 @@ public void testMultiFieldParserSimpleValue() throws IOException {
// we don't check currentToken here because it returns START_OBJECT that is inconsistent with returning a value
assertEquals("text", multiFieldParser.textOrNull());
assertEquals(documentParser.getTokenLocation(), multiFieldParser.getTokenLocation());
assertEquals(documentParser.getTokenLocation(), multiFieldParser.getCurrentLocation());
assertEquals(documentParser.currentName(), multiFieldParser.currentName());
}

Expand Down Expand Up @@ -918,6 +919,7 @@ public void testMultiFieldParserCompletionSubfield() throws IOException {
assertEquals(expectedParser.currentToken(), token);
assertEquals(expectedParser.currentToken(), multiFieldParser.currentToken());
assertEquals(expectedTokenLocation, multiFieldParser.getTokenLocation());
assertEquals(expectedTokenLocation, multiFieldParser.getCurrentLocation());
assertEquals(documentParser.nextToken(), multiFieldParser.currentToken());
assertEquals(documentParser.currentName(), multiFieldParser.currentName());
}
Expand Down
Loading