Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions docs/user/ppl/limitations/limitations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,27 @@ For the following functionalities, the query will be forwarded to the V2 query e
* ``show datasources`` and command

* Commands with ``fetch_size`` parameter

Malformed Field Names in Object Fields
======================================

OpenSearch normally rejects field names containing problematic dot patterns (such as ``.``, ``..``, ``.a``, ``a.``, or ``a..b``). However, when an object field has ``enabled: false``, OpenSearch bypasses field name validation and allows storing documents with any field names.

If a document contains malformed field names inside an object field, PPL ignores those malformed field names. Other valid fields in the document are returned normally.

**Example of affected data:**

.. code-block:: json

{
"log": {
".": "value1",
".a": "value2",
"a.": "value3",
"a..b": "value4"
}
}

When ``log`` is an object field with ``enabled: false``, subfields with malformed names are ignored.

**Recommendation:** Avoid using field names that contain leading dots, trailing dots, consecutive dots, or consist only of dots. This aligns with OpenSearch's default field naming requirements.
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
# Issue: https://github.com/opensearch-project/sql/issues/4896
# ArrayIndexOutOfBoundsException when querying index with malformed field names in disabled object
#
# Root cause: When a document has a field name with problematic dot patterns (e.g., ".", "..", ".a",
# "a.", "a..b"), the JsonPath parsing logic fails because String.split("\\.") produces empty strings.
#
# This can happen when an index has a disabled object field (enabled: false), which allows storing
# documents without validating inner field names. Normal OpenSearch indices reject such field names.
#
# Fix: The query engine now detects malformed field names and returns null for those fields,
# allowing the rest of the document to be processed normally.

setup:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: true
# Create index with disabled object field to allow unusual field names
- do:
indices.create:
index: test_malformed_fields_4896
body:
mappings:
properties:
log:
type: object
enabled: false
"@timestamp":
type: date
message:
type: text
status:
type: keyword

# Use bulk indexing to create all test documents
- do:
bulk:
index: test_malformed_fields_4896
refresh: true
body:
- '{"index": {"_id": "1"}}'
- '{"@timestamp": "2025-11-26T17:10:00.000Z", "message": "single dot test", "status": "ok", "log": {".": "dot only value", "valid": "normal value"}}'
- '{"index": {"_id": "2"}}'
- '{"@timestamp": "2025-11-26T17:11:00.000Z", "message": "double dot test", "status": "ok", "log": {"..": "double dot value", "valid": "normal value"}}'
- '{"index": {"_id": "3"}}'
- '{"@timestamp": "2025-11-26T17:12:00.000Z", "message": "triple dot test", "status": "ok", "log": {"...": "triple dot value", "valid": "normal value"}}'
- '{"index": {"_id": "4"}}'
- '{"@timestamp": "2025-11-26T17:13:00.000Z", "message": "leading dot test", "status": "ok", "log": {".a": "leading dot value", "valid": "normal value"}}'
- '{"index": {"_id": "5"}}'
- '{"@timestamp": "2025-11-26T17:14:00.000Z", "message": "trailing dot test", "status": "ok", "log": {"a.": "trailing dot value", "valid": "normal value"}}'
- '{"index": {"_id": "6"}}'
- '{"@timestamp": "2025-11-26T17:15:00.000Z", "message": "consecutive dots test", "status": "ok", "log": {"a..b": "consecutive dots value", "valid": "normal value"}}'
- '{"index": {"_id": "7"}}'
- '{"@timestamp": "2025-11-26T17:16:00.000Z", "message": "multiple malformed test", "status": "ok", "log": {".": "dot1", "..": "dot2", ".leading": "dot3", "trailing.": "dot4", "mid..dle": "dot5", "valid1": "normal1", "valid2": "normal2"}}'
- '{"index": {"_id": "8"}}'
- '{"@timestamp": "2025-11-26T17:17:00.000Z", "message": "valid nested test", "status": "ok", "log": {"nested.field": "nested value"}}'

---
teardown:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: false
- do:
indices.delete:
index: test_malformed_fields_4896

---
"Query all documents with unusual field names succeeds":
- skip:
features:
- headers
# Before the fix: ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
# After the fix: Query succeeds for all documents
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | fields @timestamp, message, status | sort @timestamp
- match: { "total": 8 }
- match: { "datarows.0.0": "2025-11-26 17:10:00" }
- match: { "datarows.0.1": "single dot test" }
- match: { "datarows.7.0": "2025-11-26 17:17:00" }
- match: { "datarows.7.1": "valid nested test" }

---
"Single dot field name returns null for malformed field":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | where message = "single dot test" | fields log
- match: { "total": 1 }
# The "." field returns null, so log contains only the valid field
- match: { "datarows.0.0": {"valid": "normal value"} }

---
"Multiple dots field name returns null for malformed field":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | where message = "double dot test" | fields log
- match: { "total": 1 }
# The ".." field returns null, so log contains only the valid field
- match: { "datarows.0.0": {"valid": "normal value"} }

- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | where message = "triple dot test" | fields log
- match: { "total": 1 }
# The "..." field returns null, so log contains only the valid field
- match: { "datarows.0.0": {"valid": "normal value"} }

---
"Leading dot field name returns null for malformed field":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | where message = "leading dot test" | fields log
- match: { "total": 1 }
# The ".a" field returns null, so log contains only the valid field
- match: { "datarows.0.0": {"valid": "normal value"} }

---
"Trailing dot field name returns null for malformed field":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | where message = "trailing dot test" | fields log
- match: { "total": 1 }
# The "a." field returns null, so log contains only the valid field
- match: { "datarows.0.0": {"valid": "normal value"} }

---
"Consecutive dots field name returns null for malformed field":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | where message = "consecutive dots test" | fields log
- match: { "total": 1 }
# The "a..b" field returns null, so log contains only the valid field
- match: { "datarows.0.0": {"valid": "normal value"} }

---
"Multiple malformed fields coexist with valid fields":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | where message = "multiple malformed test" | fields log
- match: { "total": 1 }
# All malformed fields return null, only valid fields remain
- match: { "datarows.0.0": {"valid1": "normal1", "valid2": "normal2"} }

---
"Valid nested field still works (issue #3477 compatibility)":
- skip:
features:
- headers
# This tests that the fix for #4896 doesn't break the flattening behavior from #3477
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=test_malformed_fields_4896 | where message = "valid nested test" | fields log
- match: { "total": 1 }
# Valid nested field "nested.field" is properly expanded to nested structure
- match: { "datarows.0.0": {"nested": {"field": "nested value"}} }

Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@

/** Construct ExprValue from OpenSearch response. */
public class OpenSearchExprValueFactory {

/** The Mapping of Field and ExprType. */
private final Map<String, OpenSearchDataType> typeMapping;

Expand Down Expand Up @@ -368,18 +369,58 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr
content
.map()
.forEachRemaining(
entry ->
entry -> {
String fieldKey = entry.getKey();
String fullFieldPath = makeField(prefix, fieldKey);
// Check for malformed field names before creating JsonPath.
// See isFieldNameMalformed() for details on what constitutes a malformed field name.
if (isFieldNameMalformed(fieldKey)) {
result.tupleValue().put(fieldKey, ExprNullValue.of());
} else {
populateValueRecursive(
result,
new JsonPath(entry.getKey()),
parse(
entry.getValue(),
makeField(prefix, entry.getKey()),
type(makeField(prefix, entry.getKey())),
supportArrays)));
new JsonPath(fieldKey),
parse(entry.getValue(), fullFieldPath, type(fullFieldPath), supportArrays));
}
});
return result;
}

/**
* Check if a field name is malformed and cannot be processed by JsonPath.
*
* <p>A field name is malformed if it contains dot patterns that would cause String.split("\\.")
* to produce empty strings. This includes:
*
* <ul>
* <li>Dot-only field names: ".", "..", "..."
* <li>Leading dots: ".a", "..a"
* <li>Trailing dots: "a.", "a.."
* <li>Consecutive dots: "a..b", "a...b"
* </ul>
*
* <p>Such field names can occur in disabled object fields (enabled: false) which bypass
* OpenSearch's field name validation. Normal OpenSearch indices reject these field names.
*
* @param fieldName The field name to check.
* @return true if the field name is malformed, false otherwise.
*/
static boolean isFieldNameMalformed(String fieldName) {
// Use -1 limit to preserve trailing empty strings (e.g., "a." -> ["a", ""])
String[] parts = fieldName.split("\\.", -1);
// Dot-only field names produce empty array
if (parts.length == 0) {
return true;
}
// Check for empty parts which indicate leading, trailing, or consecutive dots
for (String part : parts) {
if (part.isEmpty()) {
return true;
}
}
return false;
}

/**
* Populate the current ExprTupleValue recursively.
*
Expand Down
Loading
Loading