opensearch-project · ahkcs · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025
@@ -0,0 +1,215 @@
+# Issue: https://github.com/opensearch-project/sql/issues/4896
+# ArrayIndexOutOfBoundsException when querying index with dot-containing field names
+#
+# Root cause: JSON field names containing dots (e.g., ".", "..", "a...", ".a")
+# were incorrectly split as path separators, causing crashes or data corruption.
+#
+# This can happen when an index has a disabled object field (enabled: false),
+# which allows storing documents without validating inner field names.
+#
+# Fix: Treat JSON field names as literal strings, not dot-separated paths.
+# Dots in field names are literal characters, not path separators.
+
+setup:
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled: true
+  # Create index with disabled object field
+  - do:
+      indices.create:
+        index: test_disabled_object_4896
+        body:
+          mappings:
+            properties:
+              log:
+                type: object
+                enabled: false
+              "@timestamp":
+                type: date
+              message:
+                type: text
+
+  # Bulk index all test documents
+  - do:
+      bulk:
+        index: test_disabled_object_4896
+        refresh: true
+        body:
+          # Document 1: Single dot field name "."
+          - '{"index": {"_id": "1"}}'
+          - '{"@timestamp": "2025-11-26T17:10:00.000Z", "message": "single dot", "log": {".": "single dot value"}}'
+          # Document 2: Multiple dots field name ".."
+          - '{"index": {"_id": "2"}}'
+          - '{"@timestamp": "2025-11-26T17:11:00.000Z", "message": "multiple dots", "log": {"..": "double dot value"}}'
+          # Document 3: Trailing dots field name "a..."
+          - '{"index": {"_id": "3"}}'
+          - '{"@timestamp": "2025-11-26T17:12:00.000Z", "message": "trailing dots", "log": {"a...": "trailing dots value"}}'
+          # Document 4: Leading dot field name ".a"
+          - '{"index": {"_id": "4"}}'
+          - '{"@timestamp": "2025-11-26T17:13:00.000Z", "message": "leading dot", "log": {".a": "leading dot value"}}'
+          # Document 5: Middle dots field name "a..b"
+          - '{"index": {"_id": "5"}}'
+          - '{"@timestamp": "2025-11-26T17:14:00.000Z", "message": "middle dots", "log": {"a..b": "middle dots value"}}'
+          # Document 6: Multiple unusual fields in same object
+          - '{"index": {"_id": "6"}}'
+          - '{"@timestamp": "2025-11-26T17:15:00.000Z", "message": "mixed fields", "log": {".": "dot1", "..": "dot2", "normal": "normal value"}}'
+          # Document 7: Malformed top-level field name "log." (trailing dot)
+          - '{"index": {"_id": "7"}}'
+          - '{"@timestamp": "2025-11-26T17:16:00.000Z", "message": "malformed top-level", "log.": {"nested": "value in log."}}'
+
+---
+teardown:
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled: false
+  - do:
+      indices.delete:
+        index: test_disabled_object_4896
+
+---
+"Single dot field name returns actual value":
+  - skip:
+      features:
+        - headers
+
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'single dot' | fields log
+  - match: { "total": 1 }
+  - match: { "datarows.0.0": { ".": "single dot value" } }
+
+---
+"Multiple dots field name returns actual value":
+  - skip:
+      features:
+        - headers
+
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'multiple dots' | fields log
+  - match: { "total": 1 }
+  - match: { "datarows.0.0": { "..": "double dot value" } }
+
+---
+"Trailing dots field name preserves original name":
+  - skip:
+      features:
+        - headers
+
+  # Field "a..." should NOT be truncated to "a"
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'trailing dots' | fields log
+  - match: { "total": 1 }
+  - match: { "datarows.0.0": { "a...": "trailing dots value" } }
+
+---
+"Leading dot field name preserves original name":
+  - skip:
+      features:
+        - headers
+
+  # Field ".a" should NOT create nested structure
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'leading dot' | fields log
+  - match: { "total": 1 }
+  - match: { "datarows.0.0": { ".a": "leading dot value" } }
+
+  # Querying "log.a" should NOT return the ".a" field value
+  # Because "log.a" means nested field "a" inside "log", not literal ".a"
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'leading dot' | fields log.a
+  - match: { "total": 1 }
+  - match: { "datarows.0.0": null }
+
+  # To access literal field ".a" inside log, use backticks: log.`.a`
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'leading dot' | fields log.`.a`
+  - match: { "total": 1 }
+  - match: { "datarows.0.0": "leading dot value" }
+
+---
+"Middle dots field name preserves original name":
+  - skip:
+      features:
+        - headers
+
+  # Field "a..b" should NOT create nested structure a -> "" -> b
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'middle dots' | fields log
+  - match: { "total": 1 }
+  - match: { "datarows.0.0": { "a..b": "middle dots value" } }
+
+---
+"Multiple unusual fields coexist with normal fields":
+  - skip:
+      features:
+        - headers
+
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'mixed fields' | fields log
+  - match: { "total": 1 }
+  - match: { "datarows.0.0": { ".": "dot1", "..": "dot2", "normal": "normal value" } }
+
+---
+"Malformed top-level field name preserves original name":
+  - skip:
+      features:
+        - headers
+
+  # Top-level field "log." should be preserved, not truncated to "log"
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896 | where message = 'malformed top-level'
+  - match: { "total": 1 }
+
+---
+"Query all documents with unusual field names succeeds":
+  - skip:
+      features:
+        - headers
+
+  # All 7 documents should be queryable without crash
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=test_disabled_object_4896
+  - match: { "total": 7 }
@@ -376,7 +376,10 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr
             entry ->
                 populateValueRecursive(
                     result,
-                    new JsonPath(entry.getKey()),
+                    // Use fromPath() to split dot-separated field names into nested paths
+                    // (e.g., "log.json" -> ["log", "json"] for flattening support).
+                    // fromPath() handles edge cases like "." or ".." that would otherwise crash.
+                    JsonPath.fromPath(entry.getKey()),
                     parse(
                         entry.getValue(),
                         makeField(prefix, entry.getKey()),
@@ -411,11 +414,53 @@ static void populateValueRecursive(ExprTupleValue result, JsonPath path, ExprVal
   static class JsonPath {
     private final List<String> paths;
 
-    public JsonPath(String rawPath) {
-      this.paths = List.of(rawPath.split("\\."));
+    /**
+     * Create a JsonPath from a literal field name (no splitting by dots). Use this when the field
+     * name comes directly from JSON object keys, where dots are literal characters in the field
+     * name, not path separators.
+     *
+     * @param fieldName The literal field name (e.g., ".", "..", "a...", ".a")
+     * @return A JsonPath with a single element containing the literal field name
+     */
+    public static JsonPath literal(String fieldName) {
+      return new JsonPath(List.of(fieldName));
     }
 
-    public JsonPath(List<String> paths) {
+    /**
+     * Create a JsonPath by splitting a dot-separated path into components. Use this when the path
+     * represents a nested field structure (e.g., "log.json.time" → ["log", "json", "time"]).
+     *
+     * <p>Handles edge cases:
+     *
+     * <ul>
+     *   <li>Dot-only field names like "." or ".." - split returns empty array, use literal
+     *   <li>Leading/trailing/consecutive dots like ".a", "a.", "a..b" - split produces empty
+     *       strings, use literal to avoid creating nested structures with empty keys
+     * </ul>
+     *
+     * @param path The dot-separated path
+     * @return A JsonPath with components split by dots, or literal if splitting would produce empty
+     *     keys
+     */
+    public static JsonPath fromPath(String path) {
+      // Use -1 limit to preserve trailing empty strings (e.g., "a..." -> ["a", "", "", ""])
+      String[] parts = path.split("\\.", -1);
+      // Handle edge cases where splitting would produce problematic results:
+      // 1. Empty array (dot-only field names like "." or "..")
+      // 2. Array with empty strings (e.g., ".a" -> ["", "a"], "a..." -> ["a", "", "", ""])
+      // In these cases, treat the original string as a literal field name
+      if (parts.length == 0) {
+        return new JsonPath(List.of(path));
+      }
+      for (String part : parts) {
+        if (part.isEmpty()) {
+          return new JsonPath(List.of(path));
+        }
+      }
+      return new JsonPath(List.of(parts));
+    }
+
+    private JsonPath(List<String> paths) {
       this.paths = paths;
     }