Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
# Issue: https://github.com/opensearch-project/sql/issues/4896
# ArrayIndexOutOfBoundsException when querying index with dot-containing field names
#
# Root cause: JSON field names containing dots (e.g., ".", "..", "a...", ".a")
# were incorrectly split as path separators, causing crashes or data corruption.
#
# This can happen when an index has a disabled object field (enabled: false),
# which allows storing documents without validating inner field names.
#
# Fix: Treat JSON field names as literal strings, not dot-separated paths.
# Dots in field names are literal characters, not path separators.

setup:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: true
# Create index with disabled object field
- do:
indices.create:
index: test_disabled_object_4896
body:
mappings:
properties:
log:
type: object
enabled: false
"@timestamp":
type: date
message:
type: text

# Document 1: Single dot field name "."
- do:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change L35-L112 single doc index to bulk index

  - do:
      bulk:
        index: test
        refresh: true
        body:
          - '{"index": {}}'
          - '{"id": 1}'
          - '{"index": {}}'
          - '{"id": 2}'
          - '{"index": {}}'
          - '{"id": 3}'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

index:
index: test_disabled_object_4896
id: 1
body:
"@timestamp": "2025-11-26T17:10:00.000Z"
message: "single dot"
log:
".": "single dot value"

# Document 2: Multiple dots field name ".."
- do:
index:
index: test_disabled_object_4896
id: 2
body:
"@timestamp": "2025-11-26T17:11:00.000Z"
message: "multiple dots"
log:
"..": "double dot value"

# Document 3: Trailing dots field name "a..."
- do:
index:
index: test_disabled_object_4896
id: 3
body:
"@timestamp": "2025-11-26T17:12:00.000Z"
message: "trailing dots"
log:
"a...": "trailing dots value"

# Document 4: Leading dot field name ".a"
- do:
index:
index: test_disabled_object_4896
id: 4
body:
"@timestamp": "2025-11-26T17:13:00.000Z"
message: "leading dot"
log:
".a": "leading dot value"

# Document 5: Middle dots field name "a..b"
- do:
index:
index: test_disabled_object_4896
id: 5
body:
"@timestamp": "2025-11-26T17:14:00.000Z"
message: "middle dots"
log:
"a..b": "middle dots value"

# Document 6: Multiple unusual fields in same object
- do:
index:
index: test_disabled_object_4896
id: 6
body:
"@timestamp": "2025-11-26T17:15:00.000Z"
message: "mixed fields"
log:
".": "dot1"
"..": "dot2"
"normal": "normal value"

# Document 7: Malformed top-level field name "log." (trailing dot)
- do:
index:
index: test_disabled_object_4896
id: 7
refresh: true
body:
"@timestamp": "2025-11-26T17:16:00.000Z"
message: "malformed top-level"
"log.":
"nested": "value in log."

---
teardown:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: false
- do:
indices.delete:
index: test_disabled_object_4896

---
"Single dot field name returns actual value":
- skip:
features:
- headers

- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_disabled_object_4896 | where message = 'single dot' | fields log
- match: { "total": 1 }
- match: { "datarows.0.0": { ".": "single dot value" } }

---
"Multiple dots field name returns actual value":
- skip:
features:
- headers

- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_disabled_object_4896 | where message = 'multiple dots' | fields log
- match: { "total": 1 }
- match: { "datarows.0.0": { "..": "double dot value" } }

---
"Trailing dots field name preserves original name":
- skip:
features:
- headers

# Field "a..." should NOT be truncated to "a"
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_disabled_object_4896 | where message = 'trailing dots' | fields log
- match: { "total": 1 }
- match: { "datarows.0.0": { "a...": "trailing dots value" } }

---
"Leading dot field name preserves original name":
- skip:
features:
- headers

# Field ".a" should NOT create nested structure
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_disabled_object_4896 | where message = 'leading dot' | fields log
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is results if using fields log.a? should be log.a not exist, right?, user should use fields log.'.a'

Copy link
Contributor Author

@ahkcs ahkcs Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I have added test cases to verify this behavior
fields log.a → returns null
fields log.'.a'→ returns"leading dot value"

- match: { "total": 1 }
- match: { "datarows.0.0": { ".a": "leading dot value" } }

---
"Middle dots field name preserves original name":
- skip:
features:
- headers

# Field "a..b" should NOT create nested structure a -> "" -> b
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_disabled_object_4896 | where message = 'middle dots' | fields log
- match: { "total": 1 }
- match: { "datarows.0.0": { "a..b": "middle dots value" } }

---
"Multiple unusual fields coexist with normal fields":
- skip:
features:
- headers

- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_disabled_object_4896 | where message = 'mixed fields' | fields log
- match: { "total": 1 }
- match: { "datarows.0.0": { ".": "dot1", "..": "dot2", "normal": "normal value" } }

---
"Malformed top-level field name preserves original name":
- skip:
features:
- headers

# Top-level field "log." should be preserved, not truncated to "log"
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_disabled_object_4896 | where message = 'malformed top-level'
- match: { "total": 1 }

---
"Query all documents with unusual field names succeeds":
- skip:
features:
- headers

# All 7 documents should be queryable without crash
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_disabled_object_4896
- match: { "total": 7 }
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,9 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr
entry ->
populateValueRecursive(
result,
new JsonPath(entry.getKey()),
// Use literal() because entry.getKey() is a JSON field name where dots are
// literal characters, not path separators (e.g., ".", "..", "a...", ".a")
JsonPath.literal(entry.getKey()),
parse(
entry.getValue(),
makeField(prefix, entry.getKey()),
Expand Down Expand Up @@ -411,11 +413,30 @@ static void populateValueRecursive(ExprTupleValue result, JsonPath path, ExprVal
static class JsonPath {
private final List<String> paths;

public JsonPath(String rawPath) {
this.paths = List.of(rawPath.split("\\."));
/**
* Create a JsonPath from a literal field name (no splitting by dots). Use this when the field
* name comes directly from JSON object keys, where dots are literal characters in the field
* name, not path separators.
*
* @param fieldName The literal field name (e.g., ".", "..", "a...", ".a")
* @return A JsonPath with a single element containing the literal field name
*/
public static JsonPath literal(String fieldName) {
return new JsonPath(List.of(fieldName));
}

public JsonPath(List<String> paths) {
/**
* Create a JsonPath by splitting a dot-separated path into components. Use this when the path
* represents a nested field structure (e.g., "log.json.time" → ["log", "json", "time"]).
*
* @param path The dot-separated path
* @return A JsonPath with components split by dots
*/
public static JsonPath fromPath(String path) {
return new JsonPath(List.of(path.split("\\.")));
}

private JsonPath(List<String> paths) {
this.paths = paths;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -999,15 +999,15 @@ public void testPopulateValueRecursive() {
ExprTupleValue tupleValue = ExprTupleValue.empty();

OpenSearchExprValueFactory.populateValueRecursive(
tupleValue, new JsonPath("log.json.time"), ExprValueUtils.integerValue(100));
tupleValue, JsonPath.fromPath("log.json.time"), ExprValueUtils.integerValue(100));
ExprValue expectedValue =
ExprValueUtils.tupleValue(
Map.of("log", Map.of("json", new LinkedHashMap<>(Map.of("time", 100)))));
assertEquals(expectedValue, tupleValue);

OpenSearchExprValueFactory.populateValueRecursive(
tupleValue,
new JsonPath("log.json"),
JsonPath.fromPath("log.json"),
ExprValueUtils.tupleValue(new LinkedHashMap<>(Map.of("status", "SUCCESS"))));
expectedValue =
ExprValueUtils.tupleValue(
Expand All @@ -1025,7 +1025,7 @@ public void testPopulateValueRecursive() {

// update the conflict value with the latest
OpenSearchExprValueFactory.populateValueRecursive(
tupleValue, new JsonPath("log.json.status"), ExprValueUtils.stringValue("FAILED"));
tupleValue, JsonPath.fromPath("log.json.status"), ExprValueUtils.stringValue("FAILED"));
expectedValue =
ExprValueUtils.tupleValue(
Map.of(
Expand All @@ -1041,6 +1041,66 @@ public void testPopulateValueRecursive() {
assertEquals(expectedValue, tupleValue);
}

@Test
public void constructWithDotOnlyFieldNameReturnsValue() {
// Field name "." (single dot) should return the actual value, not crash or return null
// This can happen with disabled object fields in OpenSearch
Map<String, ExprValue> result = tupleValue("{\"structV\":{\".\":\"value\"}}");
ExprValue structValue = result.get("structV");
// The "." field should contain the actual value
assertEquals(stringValue("value"), structValue.tupleValue().get("."));
}

@Test
public void constructWithMultipleDotsFieldNameReturnsValue() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

np: Could you verify what if the malformed field is not on top level? E.g., a...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to have more comprehensive test cases for both UT and yaml test

// Field name ".." (multiple dots) should also return the actual value
Map<String, ExprValue> result = tupleValue("{\"structV\":{\"..\":\"value\"}}");
ExprValue structValue = result.get("structV");
assertEquals(stringValue("value"), structValue.tupleValue().get(".."));
}

@Test
public void constructWithDotFieldAlongsideValidFieldsReturnsAll() {
// Both dot field and valid fields should be returned
Map<String, ExprValue> result =
tupleValue(
"{\"structV\":{\".\":\"dotValue\",\"id\":1,\"state\":\"WA\"},\"stringV\":\"test\"}");

// stringV field should be returned normally
assertEquals(stringValue("test"), result.get("stringV"));

// All fields inside structV should be returned
ExprValue structValue = result.get("structV");
assertEquals(stringValue("dotValue"), structValue.tupleValue().get("."));
assertEquals(integerValue(1), structValue.tupleValue().get("id"));
assertEquals(stringValue("WA"), structValue.tupleValue().get("state"));
}

@Test
public void constructWithTrailingDotsFieldNameReturnsValue() {
// Field name "a..." (with trailing dots) should preserve the original name
Map<String, ExprValue> result = tupleValue("{\"structV\":{\"a...\":\"value\"}}");
ExprValue structValue = result.get("structV");
// The field should be stored under "a...", not "a"
assertEquals(stringValue("value"), structValue.tupleValue().get("a..."));
}

@Test
public void constructWithLeadingDotsFieldNameReturnsValue() {
// Field name ".a" (with leading dot) should preserve the original name
Map<String, ExprValue> result = tupleValue("{\"structV\":{\".a\":\"value\"}}");
ExprValue structValue = result.get("structV");
assertEquals(stringValue("value"), structValue.tupleValue().get(".a"));
}

@Test
public void constructWithMiddleDotsFieldNameReturnsValue() {
// Field name "a..b" (with consecutive dots in middle) should preserve the original name
Map<String, ExprValue> result = tupleValue("{\"structV\":{\"a..b\":\"value\"}}");
ExprValue structValue = result.get("structV");
assertEquals(stringValue("value"), structValue.tupleValue().get("a..b"));
}

public Map<String, ExprValue> tupleValue(String jsonString) {
final ExprValue construct = exprValueFactory.construct(jsonString, false);
return construct.tupleValue();
Expand Down
Loading