diff --git a/docs/user/ppl/limitations/limitations.rst b/docs/user/ppl/limitations/limitations.rst index f9c620ff18b..41d3a007d23 100644 --- a/docs/user/ppl/limitations/limitations.rst +++ b/docs/user/ppl/limitations/limitations.rst @@ -106,3 +106,27 @@ For the following functionalities, the query will be forwarded to the V2 query e * ``show datasources`` and command * Commands with ``fetch_size`` parameter + +Malformed Field Names in Object Fields +====================================== + +OpenSearch normally rejects field names containing problematic dot patterns (such as ``.``, ``..``, ``.a``, ``a.``, or ``a..b``). However, when an object field has ``enabled: false``, OpenSearch bypasses field name validation and allows storing documents with any field names. + +If a document contains malformed field names inside an object field, PPL ignores those malformed field names. Other valid fields in the document are returned normally. + +**Example of affected data:** + +.. code-block:: json + + { + "log": { + ".": "value1", + ".a": "value2", + "a.": "value3", + "a..b": "value4" + } + } + +When ``log`` is an object field with ``enabled: false``, subfields with malformed names are ignored. + +**Recommendation:** Avoid using field names that contain leading dots, trailing dots, consecutive dots, or consist only of dots. This aligns with OpenSearch's default field naming requirements. diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml new file mode 100644 index 00000000000..724957e7af6 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml @@ -0,0 +1,204 @@ +# Issue: https://github.com/opensearch-project/sql/issues/4896 +# ArrayIndexOutOfBoundsException when querying index with malformed field names in disabled object +# +# Root cause: When a document has a field name with problematic dot patterns (e.g., ".", "..", ".a", +# "a.", "a..b"), the JsonPath parsing logic fails because String.split("\\.") produces empty strings. +# +# This can happen when an index has a disabled object field (enabled: false), which allows storing +# documents without validating inner field names. Normal OpenSearch indices reject such field names. +# +# Fix: The query engine now detects malformed field names and returns null for those fields, +# allowing the rest of the document to be processed normally. + +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + # Create index with disabled object field to allow unusual field names + - do: + indices.create: + index: test_malformed_fields_4896 + body: + mappings: + properties: + log: + type: object + enabled: false + "@timestamp": + type: date + message: + type: text + status: + type: keyword + + # Use bulk indexing to create all test documents + - do: + bulk: + index: test_malformed_fields_4896 + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"@timestamp": "2025-11-26T17:10:00.000Z", "message": "single dot test", "status": "ok", "log": {".": "dot only value", "valid": "normal value"}}' + - '{"index": {"_id": "2"}}' + - '{"@timestamp": "2025-11-26T17:11:00.000Z", "message": "double dot test", "status": "ok", "log": {"..": "double dot value", "valid": "normal value"}}' + - '{"index": {"_id": "3"}}' + - '{"@timestamp": "2025-11-26T17:12:00.000Z", "message": "triple dot test", "status": "ok", "log": {"...": "triple dot value", "valid": "normal value"}}' + - '{"index": {"_id": "4"}}' + - '{"@timestamp": "2025-11-26T17:13:00.000Z", "message": "leading dot test", "status": "ok", "log": {".a": "leading dot value", "valid": "normal value"}}' + - '{"index": {"_id": "5"}}' + - '{"@timestamp": "2025-11-26T17:14:00.000Z", "message": "trailing dot test", "status": "ok", "log": {"a.": "trailing dot value", "valid": "normal value"}}' + - '{"index": {"_id": "6"}}' + - '{"@timestamp": "2025-11-26T17:15:00.000Z", "message": "consecutive dots test", "status": "ok", "log": {"a..b": "consecutive dots value", "valid": "normal value"}}' + - '{"index": {"_id": "7"}}' + - '{"@timestamp": "2025-11-26T17:16:00.000Z", "message": "multiple malformed test", "status": "ok", "log": {".": "dot1", "..": "dot2", ".leading": "dot3", "trailing.": "dot4", "mid..dle": "dot5", "valid1": "normal1", "valid2": "normal2"}}' + - '{"index": {"_id": "8"}}' + - '{"@timestamp": "2025-11-26T17:17:00.000Z", "message": "valid nested test", "status": "ok", "log": {"nested.field": "nested value"}}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + - do: + indices.delete: + index: test_malformed_fields_4896 + +--- +"Query all documents with unusual field names succeeds": + - skip: + features: + - headers + # Before the fix: ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0 + # After the fix: Query succeeds for all documents + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | fields @timestamp, message, status | sort @timestamp + - match: { "total": 8 } + - match: { "datarows.0.0": "2025-11-26 17:10:00" } + - match: { "datarows.0.1": "single dot test" } + - match: { "datarows.7.0": "2025-11-26 17:17:00" } + - match: { "datarows.7.1": "valid nested test" } + +--- +"Single dot field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "single dot test" | fields log + - match: { "total": 1 } + # The "." field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Multiple dots field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "double dot test" | fields log + - match: { "total": 1 } + # The ".." field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "triple dot test" | fields log + - match: { "total": 1 } + # The "..." field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Leading dot field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "leading dot test" | fields log + - match: { "total": 1 } + # The ".a" field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Trailing dot field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "trailing dot test" | fields log + - match: { "total": 1 } + # The "a." field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Consecutive dots field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "consecutive dots test" | fields log + - match: { "total": 1 } + # The "a..b" field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Multiple malformed fields coexist with valid fields": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "multiple malformed test" | fields log + - match: { "total": 1 } + # All malformed fields return null, only valid fields remain + - match: { "datarows.0.0": {"valid1": "normal1", "valid2": "normal2"} } + +--- +"Valid nested field still works (issue #3477 compatibility)": + - skip: + features: + - headers + # This tests that the fix for #4896 doesn't break the flattening behavior from #3477 + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "valid nested test" | fields log + - match: { "total": 1 } + # Valid nested field "nested.field" is properly expanded to nested structure + - match: { "datarows.0.0": {"nested": {"field": "nested value"}} } + diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 06f5d2dc400..4e6032d4f1d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -74,6 +74,7 @@ /** Construct ExprValue from OpenSearch response. */ public class OpenSearchExprValueFactory { + /** The Mapping of Field and ExprType. */ private final Map typeMapping; @@ -368,18 +369,58 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr content .map() .forEachRemaining( - entry -> + entry -> { + String fieldKey = entry.getKey(); + String fullFieldPath = makeField(prefix, fieldKey); + // Check for malformed field names before creating JsonPath. + // See isFieldNameMalformed() for details on what constitutes a malformed field name. + if (isFieldNameMalformed(fieldKey)) { + result.tupleValue().put(fieldKey, ExprNullValue.of()); + } else { populateValueRecursive( result, - new JsonPath(entry.getKey()), - parse( - entry.getValue(), - makeField(prefix, entry.getKey()), - type(makeField(prefix, entry.getKey())), - supportArrays))); + new JsonPath(fieldKey), + parse(entry.getValue(), fullFieldPath, type(fullFieldPath), supportArrays)); + } + }); return result; } + /** + * Check if a field name is malformed and cannot be processed by JsonPath. + * + *

A field name is malformed if it contains dot patterns that would cause String.split("\\.") + * to produce empty strings. This includes: + * + *

+ * + *

Such field names can occur in disabled object fields (enabled: false) which bypass + * OpenSearch's field name validation. Normal OpenSearch indices reject these field names. + * + * @param fieldName The field name to check. + * @return true if the field name is malformed, false otherwise. + */ + static boolean isFieldNameMalformed(String fieldName) { + // Use -1 limit to preserve trailing empty strings (e.g., "a." -> ["a", ""]) + String[] parts = fieldName.split("\\.", -1); + // Dot-only field names produce empty array + if (parts.length == 0) { + return true; + } + // Check for empty parts which indicate leading, trailing, or consecutive dots + for (String part : parts) { + if (part.isEmpty()) { + return true; + } + } + return false; + } + /** * Populate the current ExprTupleValue recursively. * diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 8b6d791639d..485310431ec 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -1075,6 +1075,114 @@ private ExprValue constructFromObjectWithArraySupport(String fieldName, Object v return exprValueFactory.construct(fieldName, value, true); } + // ==================== Malformed Field Name Tests ==================== + // Tests for issue #4896: ArrayIndexOutOfBoundsException with dot-containing field names + + @Test + public void isFieldNameMalformed_dotOnlyFieldNames() { + // Single dot + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed(".")); + // Multiple dots + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("..")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("...")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("....")); + } + + @Test + public void isFieldNameMalformed_leadingDots() { + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed(".a")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("..a")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed(".field")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("..field.name")); + } + + @Test + public void isFieldNameMalformed_trailingDots() { + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a.")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a..")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("field.")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("field.name..")); + } + + @Test + public void isFieldNameMalformed_consecutiveDots() { + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a..b")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a...b")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("field..name")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a..b..c")); + } + + @Test + public void isFieldNameMalformed_validFieldNames() { + // Simple field names + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("a")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("field")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("fieldName")); + // Nested field names (valid dot usage) + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("a.b")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("log.json")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("a.b.c")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("field.name.value")); + } + + @Test + public void constructStructWithDotOnlyFieldName_returnsNull() { + // Test that a single dot field name returns null + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\".\":\"value\"}}").get("structV").tupleValue().get(".")); + } + + @Test + public void constructStructWithMultipleDotFieldName_returnsNull() { + // Test that multiple dot field names return null + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"..\":\"value\"}}").get("structV").tupleValue().get("..")); + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"...\":\"value\"}}").get("structV").tupleValue().get("...")); + } + + @Test + public void constructStructWithLeadingDotFieldName_returnsNull() { + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\".a\":\"value\"}}").get("structV").tupleValue().get(".a")); + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"..a\":\"value\"}}").get("structV").tupleValue().get("..a")); + } + + @Test + public void constructStructWithTrailingDotFieldName_returnsNull() { + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"a.\":\"value\"}}").get("structV").tupleValue().get("a.")); + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"a..\":\"value\"}}").get("structV").tupleValue().get("a..")); + } + + @Test + public void constructStructWithConsecutiveDotsFieldName_returnsNull() { + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"a..b\":\"value\"}}").get("structV").tupleValue().get("a..b")); + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"a...b\":\"value\"}}").get("structV").tupleValue().get("a...b")); + } + + @Test + public void constructStructWithMalformedAndValidFields_preservesValidFields() { + // Test that valid fields are preserved when malformed fields are present + Map structValue = + tupleValue("{\"structV\":{\".\":\"bad\",\"good\":\"value\"}}").get("structV").tupleValue(); + assertEquals(nullValue(), structValue.get(".")); + assertEquals(stringValue("value"), structValue.get("good")); + } + @EqualsAndHashCode(callSuper = false) @ToString private static class TestType extends OpenSearchDataType {