-
Notifications
You must be signed in to change notification settings - Fork 181
Fix parsing of dot-containing field names #4897
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,246 @@ | ||
| # Issue: https://github.com/opensearch-project/sql/issues/4896 | ||
| # ArrayIndexOutOfBoundsException when querying index with dot-containing field names | ||
| # | ||
| # Root cause: JSON field names containing dots (e.g., ".", "..", "a...", ".a") | ||
| # were incorrectly split as path separators, causing crashes or data corruption. | ||
| # | ||
| # This can happen when an index has a disabled object field (enabled: false), | ||
| # which allows storing documents without validating inner field names. | ||
| # | ||
| # Fix: Treat JSON field names as literal strings, not dot-separated paths. | ||
| # Dots in field names are literal characters, not path separators. | ||
|
|
||
| setup: | ||
| - do: | ||
| query.settings: | ||
| body: | ||
| transient: | ||
| plugins.calcite.enabled: true | ||
| # Create index with disabled object field | ||
| - do: | ||
| indices.create: | ||
| index: test_disabled_object_4896 | ||
| body: | ||
| mappings: | ||
| properties: | ||
| log: | ||
| type: object | ||
| enabled: false | ||
| "@timestamp": | ||
| type: date | ||
| message: | ||
| type: text | ||
|
|
||
| # Document 1: Single dot field name "." | ||
| - do: | ||
| index: | ||
| index: test_disabled_object_4896 | ||
| id: 1 | ||
| body: | ||
| "@timestamp": "2025-11-26T17:10:00.000Z" | ||
| message: "single dot" | ||
| log: | ||
| ".": "single dot value" | ||
|
|
||
| # Document 2: Multiple dots field name ".." | ||
| - do: | ||
| index: | ||
| index: test_disabled_object_4896 | ||
| id: 2 | ||
| body: | ||
| "@timestamp": "2025-11-26T17:11:00.000Z" | ||
| message: "multiple dots" | ||
| log: | ||
| "..": "double dot value" | ||
|
|
||
| # Document 3: Trailing dots field name "a..." | ||
| - do: | ||
| index: | ||
| index: test_disabled_object_4896 | ||
| id: 3 | ||
| body: | ||
| "@timestamp": "2025-11-26T17:12:00.000Z" | ||
| message: "trailing dots" | ||
| log: | ||
| "a...": "trailing dots value" | ||
|
|
||
| # Document 4: Leading dot field name ".a" | ||
| - do: | ||
| index: | ||
| index: test_disabled_object_4896 | ||
| id: 4 | ||
| body: | ||
| "@timestamp": "2025-11-26T17:13:00.000Z" | ||
| message: "leading dot" | ||
| log: | ||
| ".a": "leading dot value" | ||
|
|
||
| # Document 5: Middle dots field name "a..b" | ||
| - do: | ||
| index: | ||
| index: test_disabled_object_4896 | ||
| id: 5 | ||
| body: | ||
| "@timestamp": "2025-11-26T17:14:00.000Z" | ||
| message: "middle dots" | ||
| log: | ||
| "a..b": "middle dots value" | ||
|
|
||
| # Document 6: Multiple unusual fields in same object | ||
| - do: | ||
| index: | ||
| index: test_disabled_object_4896 | ||
| id: 6 | ||
| body: | ||
| "@timestamp": "2025-11-26T17:15:00.000Z" | ||
| message: "mixed fields" | ||
| log: | ||
| ".": "dot1" | ||
| "..": "dot2" | ||
| "normal": "normal value" | ||
|
|
||
| # Document 7: Malformed top-level field name "log." (trailing dot) | ||
| - do: | ||
| index: | ||
| index: test_disabled_object_4896 | ||
| id: 7 | ||
| refresh: true | ||
| body: | ||
| "@timestamp": "2025-11-26T17:16:00.000Z" | ||
| message: "malformed top-level" | ||
| "log.": | ||
| "nested": "value in log." | ||
|
|
||
| --- | ||
| teardown: | ||
| - do: | ||
| query.settings: | ||
| body: | ||
| transient: | ||
| plugins.calcite.enabled: false | ||
| - do: | ||
| indices.delete: | ||
| index: test_disabled_object_4896 | ||
|
|
||
| --- | ||
| "Single dot field name returns actual value": | ||
| - skip: | ||
| features: | ||
| - headers | ||
|
|
||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=test_disabled_object_4896 | where message = 'single dot' | fields log | ||
| - match: { "total": 1 } | ||
| - match: { "datarows.0.0": { ".": "single dot value" } } | ||
|
|
||
| --- | ||
| "Multiple dots field name returns actual value": | ||
| - skip: | ||
| features: | ||
| - headers | ||
|
|
||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=test_disabled_object_4896 | where message = 'multiple dots' | fields log | ||
| - match: { "total": 1 } | ||
| - match: { "datarows.0.0": { "..": "double dot value" } } | ||
|
|
||
| --- | ||
| "Trailing dots field name preserves original name": | ||
| - skip: | ||
| features: | ||
| - headers | ||
|
|
||
| # Field "a..." should NOT be truncated to "a" | ||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=test_disabled_object_4896 | where message = 'trailing dots' | fields log | ||
| - match: { "total": 1 } | ||
| - match: { "datarows.0.0": { "a...": "trailing dots value" } } | ||
|
|
||
| --- | ||
| "Leading dot field name preserves original name": | ||
| - skip: | ||
| features: | ||
| - headers | ||
|
|
||
| # Field ".a" should NOT create nested structure | ||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=test_disabled_object_4896 | where message = 'leading dot' | fields log | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is results if using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I have added test cases to verify this behavior |
||
| - match: { "total": 1 } | ||
| - match: { "datarows.0.0": { ".a": "leading dot value" } } | ||
|
|
||
| --- | ||
| "Middle dots field name preserves original name": | ||
| - skip: | ||
| features: | ||
| - headers | ||
|
|
||
| # Field "a..b" should NOT create nested structure a -> "" -> b | ||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=test_disabled_object_4896 | where message = 'middle dots' | fields log | ||
| - match: { "total": 1 } | ||
| - match: { "datarows.0.0": { "a..b": "middle dots value" } } | ||
|
|
||
| --- | ||
| "Multiple unusual fields coexist with normal fields": | ||
| - skip: | ||
| features: | ||
| - headers | ||
|
|
||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=test_disabled_object_4896 | where message = 'mixed fields' | fields log | ||
| - match: { "total": 1 } | ||
| - match: { "datarows.0.0": { ".": "dot1", "..": "dot2", "normal": "normal value" } } | ||
|
|
||
| --- | ||
| "Malformed top-level field name preserves original name": | ||
| - skip: | ||
| features: | ||
| - headers | ||
|
|
||
| # Top-level field "log." should be preserved, not truncated to "log" | ||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=test_disabled_object_4896 | where message = 'malformed top-level' | ||
| - match: { "total": 1 } | ||
|
|
||
| --- | ||
| "Query all documents with unusual field names succeeds": | ||
| - skip: | ||
| features: | ||
| - headers | ||
|
|
||
| # All 7 documents should be queryable without crash | ||
| - do: | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: source=test_disabled_object_4896 | ||
| - match: { "total": 7 } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -999,15 +999,15 @@ public void testPopulateValueRecursive() { | |
| ExprTupleValue tupleValue = ExprTupleValue.empty(); | ||
|
|
||
| OpenSearchExprValueFactory.populateValueRecursive( | ||
| tupleValue, new JsonPath("log.json.time"), ExprValueUtils.integerValue(100)); | ||
| tupleValue, JsonPath.fromPath("log.json.time"), ExprValueUtils.integerValue(100)); | ||
| ExprValue expectedValue = | ||
| ExprValueUtils.tupleValue( | ||
| Map.of("log", Map.of("json", new LinkedHashMap<>(Map.of("time", 100))))); | ||
| assertEquals(expectedValue, tupleValue); | ||
|
|
||
| OpenSearchExprValueFactory.populateValueRecursive( | ||
| tupleValue, | ||
| new JsonPath("log.json"), | ||
| JsonPath.fromPath("log.json"), | ||
| ExprValueUtils.tupleValue(new LinkedHashMap<>(Map.of("status", "SUCCESS")))); | ||
| expectedValue = | ||
| ExprValueUtils.tupleValue( | ||
|
|
@@ -1025,7 +1025,7 @@ public void testPopulateValueRecursive() { | |
|
|
||
| // update the conflict value with the latest | ||
| OpenSearchExprValueFactory.populateValueRecursive( | ||
| tupleValue, new JsonPath("log.json.status"), ExprValueUtils.stringValue("FAILED")); | ||
| tupleValue, JsonPath.fromPath("log.json.status"), ExprValueUtils.stringValue("FAILED")); | ||
| expectedValue = | ||
| ExprValueUtils.tupleValue( | ||
| Map.of( | ||
|
|
@@ -1041,6 +1041,66 @@ public void testPopulateValueRecursive() { | |
| assertEquals(expectedValue, tupleValue); | ||
| } | ||
|
|
||
| @Test | ||
| public void constructWithDotOnlyFieldNameReturnsValue() { | ||
| // Field name "." (single dot) should return the actual value, not crash or return null | ||
| // This can happen with disabled object fields in OpenSearch | ||
| Map<String, ExprValue> result = tupleValue("{\"structV\":{\".\":\"value\"}}"); | ||
| ExprValue structValue = result.get("structV"); | ||
| // The "." field should contain the actual value | ||
| assertEquals(stringValue("value"), structValue.tupleValue().get(".")); | ||
| } | ||
|
|
||
| @Test | ||
| public void constructWithMultipleDotsFieldNameReturnsValue() { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. np: Could you verify what if the malformed field is not on top level? E.g.,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to have more comprehensive test cases for both UT and yaml test |
||
| // Field name ".." (multiple dots) should also return the actual value | ||
| Map<String, ExprValue> result = tupleValue("{\"structV\":{\"..\":\"value\"}}"); | ||
| ExprValue structValue = result.get("structV"); | ||
| assertEquals(stringValue("value"), structValue.tupleValue().get("..")); | ||
| } | ||
|
|
||
| @Test | ||
| public void constructWithDotFieldAlongsideValidFieldsReturnsAll() { | ||
| // Both dot field and valid fields should be returned | ||
| Map<String, ExprValue> result = | ||
| tupleValue( | ||
| "{\"structV\":{\".\":\"dotValue\",\"id\":1,\"state\":\"WA\"},\"stringV\":\"test\"}"); | ||
|
|
||
| // stringV field should be returned normally | ||
| assertEquals(stringValue("test"), result.get("stringV")); | ||
|
|
||
| // All fields inside structV should be returned | ||
| ExprValue structValue = result.get("structV"); | ||
| assertEquals(stringValue("dotValue"), structValue.tupleValue().get(".")); | ||
| assertEquals(integerValue(1), structValue.tupleValue().get("id")); | ||
| assertEquals(stringValue("WA"), structValue.tupleValue().get("state")); | ||
| } | ||
|
|
||
| @Test | ||
| public void constructWithTrailingDotsFieldNameReturnsValue() { | ||
| // Field name "a..." (with trailing dots) should preserve the original name | ||
| Map<String, ExprValue> result = tupleValue("{\"structV\":{\"a...\":\"value\"}}"); | ||
| ExprValue structValue = result.get("structV"); | ||
| // The field should be stored under "a...", not "a" | ||
| assertEquals(stringValue("value"), structValue.tupleValue().get("a...")); | ||
| } | ||
|
|
||
| @Test | ||
| public void constructWithLeadingDotsFieldNameReturnsValue() { | ||
| // Field name ".a" (with leading dot) should preserve the original name | ||
| Map<String, ExprValue> result = tupleValue("{\"structV\":{\".a\":\"value\"}}"); | ||
| ExprValue structValue = result.get("structV"); | ||
| assertEquals(stringValue("value"), structValue.tupleValue().get(".a")); | ||
| } | ||
|
|
||
| @Test | ||
| public void constructWithMiddleDotsFieldNameReturnsValue() { | ||
| // Field name "a..b" (with consecutive dots in middle) should preserve the original name | ||
| Map<String, ExprValue> result = tupleValue("{\"structV\":{\"a..b\":\"value\"}}"); | ||
| ExprValue structValue = result.get("structV"); | ||
| assertEquals(stringValue("value"), structValue.tupleValue().get("a..b")); | ||
| } | ||
|
|
||
| public Map<String, ExprValue> tupleValue(String jsonString) { | ||
| final ExprValue construct = exprValueFactory.construct(jsonString, false); | ||
| return construct.tupleValue(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change L35-L112 single doc index to bulk index
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated