diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml new file mode 100644 index 00000000000..9cae4b629c5 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml @@ -0,0 +1,215 @@ +# Issue: https://github.com/opensearch-project/sql/issues/4896 +# ArrayIndexOutOfBoundsException when querying index with dot-containing field names +# +# Root cause: JSON field names containing dots (e.g., ".", "..", "a...", ".a") +# were incorrectly split as path separators, causing crashes or data corruption. +# +# This can happen when an index has a disabled object field (enabled: false), +# which allows storing documents without validating inner field names. +# +# Fix: Treat JSON field names as literal strings, not dot-separated paths. +# Dots in field names are literal characters, not path separators. + +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + # Create index with disabled object field + - do: + indices.create: + index: test_disabled_object_4896 + body: + mappings: + properties: + log: + type: object + enabled: false + "@timestamp": + type: date + message: + type: text + + # Bulk index all test documents + - do: + bulk: + index: test_disabled_object_4896 + refresh: true + body: + # Document 1: Single dot field name "." + - '{"index": {"_id": "1"}}' + - '{"@timestamp": "2025-11-26T17:10:00.000Z", "message": "single dot", "log": {".": "single dot value"}}' + # Document 2: Multiple dots field name ".." + - '{"index": {"_id": "2"}}' + - '{"@timestamp": "2025-11-26T17:11:00.000Z", "message": "multiple dots", "log": {"..": "double dot value"}}' + # Document 3: Trailing dots field name "a..." + - '{"index": {"_id": "3"}}' + - '{"@timestamp": "2025-11-26T17:12:00.000Z", "message": "trailing dots", "log": {"a...": "trailing dots value"}}' + # Document 4: Leading dot field name ".a" + - '{"index": {"_id": "4"}}' + - '{"@timestamp": "2025-11-26T17:13:00.000Z", "message": "leading dot", "log": {".a": "leading dot value"}}' + # Document 5: Middle dots field name "a..b" + - '{"index": {"_id": "5"}}' + - '{"@timestamp": "2025-11-26T17:14:00.000Z", "message": "middle dots", "log": {"a..b": "middle dots value"}}' + # Document 6: Multiple unusual fields in same object + - '{"index": {"_id": "6"}}' + - '{"@timestamp": "2025-11-26T17:15:00.000Z", "message": "mixed fields", "log": {".": "dot1", "..": "dot2", "normal": "normal value"}}' + # Document 7: Malformed top-level field name "log." (trailing dot) + - '{"index": {"_id": "7"}}' + - '{"@timestamp": "2025-11-26T17:16:00.000Z", "message": "malformed top-level", "log.": {"nested": "value in log."}}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + - do: + indices.delete: + index: test_disabled_object_4896 + +--- +"Single dot field name returns actual value": + - skip: + features: + - headers + + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'single dot' | fields log + - match: { "total": 1 } + - match: { "datarows.0.0": { ".": "single dot value" } } + +--- +"Multiple dots field name returns actual value": + - skip: + features: + - headers + + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'multiple dots' | fields log + - match: { "total": 1 } + - match: { "datarows.0.0": { "..": "double dot value" } } + +--- +"Trailing dots field name preserves original name": + - skip: + features: + - headers + + # Field "a..." should NOT be truncated to "a" + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'trailing dots' | fields log + - match: { "total": 1 } + - match: { "datarows.0.0": { "a...": "trailing dots value" } } + +--- +"Leading dot field name preserves original name": + - skip: + features: + - headers + + # Field ".a" should NOT create nested structure + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'leading dot' | fields log + - match: { "total": 1 } + - match: { "datarows.0.0": { ".a": "leading dot value" } } + + # Querying "log.a" should NOT return the ".a" field value + # Because "log.a" means nested field "a" inside "log", not literal ".a" + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'leading dot' | fields log.a + - match: { "total": 1 } + - match: { "datarows.0.0": null } + + # To access literal field ".a" inside log, use backticks: log.`.a` + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'leading dot' | fields log.`.a` + - match: { "total": 1 } + - match: { "datarows.0.0": "leading dot value" } + +--- +"Middle dots field name preserves original name": + - skip: + features: + - headers + + # Field "a..b" should NOT create nested structure a -> "" -> b + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'middle dots' | fields log + - match: { "total": 1 } + - match: { "datarows.0.0": { "a..b": "middle dots value" } } + +--- +"Multiple unusual fields coexist with normal fields": + - skip: + features: + - headers + + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'mixed fields' | fields log + - match: { "total": 1 } + - match: { "datarows.0.0": { ".": "dot1", "..": "dot2", "normal": "normal value" } } + +--- +"Malformed top-level field name preserves original name": + - skip: + features: + - headers + + # Top-level field "log." should be preserved, not truncated to "log" + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 | where message = 'malformed top-level' + - match: { "total": 1 } + +--- +"Query all documents with unusual field names succeeds": + - skip: + features: + - headers + + # All 7 documents should be queryable without crash + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_disabled_object_4896 + - match: { "total": 7 } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index fc5610d73f0..666e1b13c7f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -376,7 +376,10 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr entry -> populateValueRecursive( result, - new JsonPath(entry.getKey()), + // Use fromPath() to split dot-separated field names into nested paths + // (e.g., "log.json" -> ["log", "json"] for flattening support). + // fromPath() handles edge cases like "." or ".." that would otherwise crash. + JsonPath.fromPath(entry.getKey()), parse( entry.getValue(), makeField(prefix, entry.getKey()), @@ -411,11 +414,53 @@ static void populateValueRecursive(ExprTupleValue result, JsonPath path, ExprVal static class JsonPath { private final List paths; - public JsonPath(String rawPath) { - this.paths = List.of(rawPath.split("\\.")); + /** + * Create a JsonPath from a literal field name (no splitting by dots). Use this when the field + * name comes directly from JSON object keys, where dots are literal characters in the field + * name, not path separators. + * + * @param fieldName The literal field name (e.g., ".", "..", "a...", ".a") + * @return A JsonPath with a single element containing the literal field name + */ + public static JsonPath literal(String fieldName) { + return new JsonPath(List.of(fieldName)); } - public JsonPath(List paths) { + /** + * Create a JsonPath by splitting a dot-separated path into components. Use this when the path + * represents a nested field structure (e.g., "log.json.time" → ["log", "json", "time"]). + * + *

Handles edge cases: + * + *

+ * + * @param path The dot-separated path + * @return A JsonPath with components split by dots, or literal if splitting would produce empty + * keys + */ + public static JsonPath fromPath(String path) { + // Use -1 limit to preserve trailing empty strings (e.g., "a..." -> ["a", "", "", ""]) + String[] parts = path.split("\\.", -1); + // Handle edge cases where splitting would produce problematic results: + // 1. Empty array (dot-only field names like "." or "..") + // 2. Array with empty strings (e.g., ".a" -> ["", "a"], "a..." -> ["a", "", "", ""]) + // In these cases, treat the original string as a literal field name + if (parts.length == 0) { + return new JsonPath(List.of(path)); + } + for (String part : parts) { + if (part.isEmpty()) { + return new JsonPath(List.of(path)); + } + } + return new JsonPath(List.of(parts)); + } + + private JsonPath(List paths) { this.paths = paths; } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 32ba07d4d53..5ada90aa1f0 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -8,6 +8,7 @@ import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.model.ExprValueUtils.booleanValue; @@ -999,7 +1000,7 @@ public void testPopulateValueRecursive() { ExprTupleValue tupleValue = ExprTupleValue.empty(); OpenSearchExprValueFactory.populateValueRecursive( - tupleValue, new JsonPath("log.json.time"), ExprValueUtils.integerValue(100)); + tupleValue, JsonPath.fromPath("log.json.time"), ExprValueUtils.integerValue(100)); ExprValue expectedValue = ExprValueUtils.tupleValue( Map.of("log", Map.of("json", new LinkedHashMap<>(Map.of("time", 100))))); @@ -1007,7 +1008,7 @@ public void testPopulateValueRecursive() { OpenSearchExprValueFactory.populateValueRecursive( tupleValue, - new JsonPath("log.json"), + JsonPath.fromPath("log.json"), ExprValueUtils.tupleValue(new LinkedHashMap<>(Map.of("status", "SUCCESS")))); expectedValue = ExprValueUtils.tupleValue( @@ -1025,7 +1026,7 @@ public void testPopulateValueRecursive() { // update the conflict value with the latest OpenSearchExprValueFactory.populateValueRecursive( - tupleValue, new JsonPath("log.json.status"), ExprValueUtils.stringValue("FAILED")); + tupleValue, JsonPath.fromPath("log.json.status"), ExprValueUtils.stringValue("FAILED")); expectedValue = ExprValueUtils.tupleValue( Map.of( @@ -1041,6 +1042,137 @@ public void testPopulateValueRecursive() { assertEquals(expectedValue, tupleValue); } + @Test + public void constructWithDotOnlyFieldNameReturnsValue() { + // Field name "." (single dot) should return the actual value, not crash or return null + // This can happen with disabled object fields in OpenSearch + Map result = tupleValue("{\"structV\":{\".\":\"value\"}}"); + ExprValue structValue = result.get("structV"); + // The "." field should contain the actual value + assertEquals(stringValue("value"), structValue.tupleValue().get(".")); + } + + @Test + public void constructWithMultipleDotsFieldNameReturnsValue() { + // Field name ".." (multiple dots) should also return the actual value + Map result = tupleValue("{\"structV\":{\"..\":\"value\"}}"); + ExprValue structValue = result.get("structV"); + assertEquals(stringValue("value"), structValue.tupleValue().get("..")); + } + + @Test + public void constructWithDotFieldAlongsideValidFieldsReturnsAll() { + // Both dot field and valid fields should be returned + Map result = + tupleValue( + "{\"structV\":{\".\":\"dotValue\",\"id\":1,\"state\":\"WA\"},\"stringV\":\"test\"}"); + + // stringV field should be returned normally + assertEquals(stringValue("test"), result.get("stringV")); + + // All fields inside structV should be returned + ExprValue structValue = result.get("structV"); + assertEquals(stringValue("dotValue"), structValue.tupleValue().get(".")); + assertEquals(integerValue(1), structValue.tupleValue().get("id")); + assertEquals(stringValue("WA"), structValue.tupleValue().get("state")); + } + + @Test + public void constructWithTrailingDotsFieldNameReturnsValue() { + // Field name "a..." (with trailing dots) should preserve the original name + Map result = tupleValue("{\"structV\":{\"a...\":\"value\"}}"); + ExprValue structValue = result.get("structV"); + // The field should be stored under "a...", not "a" + assertEquals(stringValue("value"), structValue.tupleValue().get("a...")); + } + + @Test + public void constructWithLeadingDotsFieldNameReturnsValue() { + // Field name ".a" (with leading dot) should preserve the original name + Map result = tupleValue("{\"structV\":{\".a\":\"value\"}}"); + ExprValue structValue = result.get("structV"); + assertEquals(stringValue("value"), structValue.tupleValue().get(".a")); + } + + @Test + public void constructWithMiddleDotsFieldNameReturnsValue() { + // Field name "a..b" (with consecutive dots in middle) should preserve the original name + Map result = tupleValue("{\"structV\":{\"a..b\":\"value\"}}"); + ExprValue structValue = result.get("structV"); + assertEquals(stringValue("value"), structValue.tupleValue().get("a..b")); + } + + @Test + public void constructWithTopLevelTrailingDotFieldName() { + // Top-level field name "field." should be preserved + Map result = tupleValue("{\"field.\":\"value\"}"); + assertEquals(stringValue("value"), result.get("field.")); + } + + @Test + public void constructWithTopLevelLeadingDotFieldName() { + // Top-level field name ".field" should be preserved + Map result = tupleValue("{\".field\":\"value\"}"); + assertEquals(stringValue("value"), result.get(".field")); + } + + @Test + public void constructWithMultipleMalformedFieldsAtSameLevel() { + // Multiple malformed fields at same level should all be preserved + Map result = + tupleValue("{\"structV\":{\".\":\"v1\",\"..\":\"v2\",\"...\":\"v3\"}}"); + ExprValue structValue = result.get("structV"); + assertEquals(stringValue("v1"), structValue.tupleValue().get(".")); + assertEquals(stringValue("v2"), structValue.tupleValue().get("..")); + assertEquals(stringValue("v3"), structValue.tupleValue().get("...")); + } + + @Test + public void constructWithMalformedFieldContainingValue() { + // Malformed field name "a." containing a value should be preserved + Map result = tupleValue("{\"structV\":{\"a.\":\"value\"}}"); + ExprValue structValue = result.get("structV"); + ExprValue aValue = structValue.tupleValue().get("a."); + assertNotNull(aValue); + assertEquals(stringValue("value"), aValue); + } + + @Test + public void constructWithVariousMalformedFieldPatterns() { + // Test various malformed field name patterns + Map result = + tupleValue("{\"structV\":{\"a.b.\":\"v1\",\".a.b\":\"v2\",\"a..b\":\"v3\"}}"); + ExprValue structValue = result.get("structV"); + assertEquals(stringValue("v1"), structValue.tupleValue().get("a.b.")); + assertEquals(stringValue("v2"), structValue.tupleValue().get(".a.b")); + assertEquals(stringValue("v3"), structValue.tupleValue().get("a..b")); + } + + @Test + public void jsonPathLiteralPreservesFieldName() { + // JsonPath.literal() should preserve the exact field name + JsonPath path = JsonPath.literal("a..b"); + assertEquals(1, path.getPaths().size()); + assertEquals("a..b", path.getRootPath()); + } + + @Test + public void jsonPathLiteralWithDotOnlyFieldName() { + // JsonPath.literal() with "." should create single-element path + JsonPath path = JsonPath.literal("."); + assertEquals(1, path.getPaths().size()); + assertEquals(".", path.getRootPath()); + } + + @Test + public void jsonPathFromPathSplitsByDots() { + // JsonPath.fromPath() should split by dots for nested paths + JsonPath path = JsonPath.fromPath("a.b.c"); + assertEquals(3, path.getPaths().size()); + assertEquals("a", path.getRootPath()); + assertEquals(List.of("b", "c"), path.getChildPath().getPaths()); + } + public Map tupleValue(String jsonString) { final ExprValue construct = exprValueFactory.construct(jsonString, false); return construct.tupleValue();