diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index 2e9d62f0ace..8fdd93c12cd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -11,6 +11,7 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DATATYPE_NUMERIC; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DATE_FORMATS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TELEMETRY; import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -41,6 +42,7 @@ public void init() throws Exception { loadIndex(Index.DATE_FORMATS); loadIndex(Index.DATA_TYPE_NUMERIC); loadIndex(Index.LOGS); + loadIndex(Index.TELEMETRY); loadIndex(Index.TIME_TEST_DATA); } @@ -1246,4 +1248,255 @@ public void testLimitAfterAggregation() throws IOException { verifySchema(response, schema("count()", "bigint"), schema("age", "int")); verifyDataRows(response, rows(1, 39), rows(2, 36), rows(1, 34)); } + + @Test + public void testFirstLastWithSimpleField() throws IOException { + // This should work - testing simple field first + JSONObject actual = + executeQuery( + String.format("source=%s | stats first(severityNumber)", TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("first(severityNumber)", "int")); + verifyDataRows(actual, rows(9)); + } + + @Test + public void testFirstLastWithDeepNestedField() throws IOException { + // This test should now work with the fix for ClassCastException + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats first(`resource.attributes.telemetry.sdk.language`)", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("first(`resource.attributes.telemetry.sdk.language`)", "string")); + verifyDataRows(actual, rows("java")); + } + + @Test + public void testLastWithDeepNestedField() throws IOException { + // This test should now work with the fix for ClassCastException + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats last(`resource.attributes.telemetry.sdk.language`)", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("last(`resource.attributes.telemetry.sdk.language`)", "string")); + verifyDataRows(actual, rows("rust")); + } + + @Test + public void testFirstLastWithDeepNestedFieldByGroup() throws IOException { + // This test should now work with the fix for ClassCastException + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats first(`resource.attributes.telemetry.sdk.language`) by" + + " severityNumber", + TEST_INDEX_TELEMETRY)); + verifySchema( + actual, + schema("first(`resource.attributes.telemetry.sdk.language`)", "string"), + schema("severityNumber", "int")); + verifyDataRows(actual, rows("java", 9), rows("python", 12), rows("go", 16)); + } + + @Test + public void testMinWithDeepNestedField() throws IOException { + // Test that min() works with deeply nested fields after the ClassCastException fix + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats min(`resource.attributes.telemetry.sdk.language`)", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("min(`resource.attributes.telemetry.sdk.language`)", "string")); + verifyDataRows( + actual, rows("go")); // Alphabetically first: go < java < javascript < python < rust + } + + @Test + public void testMaxWithDeepNestedField() throws IOException { + // Test that max() works with deeply nested fields after the ClassCastException fix + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats max(`resource.attributes.telemetry.sdk.language`)", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("max(`resource.attributes.telemetry.sdk.language`)", "string")); + verifyDataRows( + actual, rows("rust")); // Alphabetically last: go < java < javascript < python < rust + } + + @Test + public void testMinMaxWithDeepNestedFieldByGroup() throws IOException { + // Test that min() and max() work with deeply nested fields and grouping + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats min(`resource.attributes.telemetry.sdk.language`) by" + + " severityNumber | sort severityNumber", + TEST_INDEX_TELEMETRY)); + verifySchema( + actual, + schema("min(`resource.attributes.telemetry.sdk.language`)", "string"), + schema("severityNumber", "int")); + // severityNumber 9: java, javascript -> min = java + // severityNumber 12: python, rust -> min = python + // severityNumber 16: go -> min = go + verifyDataRows(actual, rows("java", 9), rows("python", 12), rows("go", 16)); + } + + @Test + public void testMinMaxMultipleNestedFields() throws IOException { + // Test min/max with multiple nested field aggregations in one query + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats min(`resource.attributes.telemetry.sdk.language`) as min_lang," + + " max(`resource.attributes.telemetry.sdk.language`) as max_lang", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("min_lang", "string"), schema("max_lang", "string")); + verifyDataRows(actual, rows("go", "rust")); + } + + @Test + public void testMinWithIntegerNestedField() throws IOException { + // Test that min() works with deeply nested integer fields + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats min(`resource.attributes.telemetry.sdk.version`)", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("min(`resource.attributes.telemetry.sdk.version`)", "int")); + verifyDataRows(actual, rows(10)); // Minimum version is 10 + } + + @Test + public void testMaxWithIntegerNestedField() throws IOException { + // Test that max() works with deeply nested integer fields + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats max(`resource.attributes.telemetry.sdk.version`)", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("max(`resource.attributes.telemetry.sdk.version`)", "int")); + verifyDataRows(actual, rows(14)); // Maximum version is 14 + } + + @Test + public void testMinMaxIntegerNestedFieldsByGroup() throws IOException { + // Test min/max on integer nested fields with grouping + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats min(`resource.attributes.telemetry.sdk.version`) as min_ver," + + " max(`resource.attributes.telemetry.sdk.version`) as max_ver by" + + " severityNumber", + TEST_INDEX_TELEMETRY)); + verifySchema( + actual, + schema("min_ver", "int"), + schema("max_ver", "int"), + schema("severityNumber", "int")); + // severityNumber 9: versions 10, 12 -> min=10, max=12 + // severityNumber 12: versions 11, 14 -> min=11, max=14 + // severityNumber 16: version 13 -> min=13, max=13 + verifyDataRows(actual, rows(10, 12, 9), rows(11, 14, 12), rows(13, 13, 16)); + } + + @Test + public void testFirstLastWithIntegerNestedField() throws IOException { + // Test first/last with deeply nested integer fields + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats first(`resource.attributes.telemetry.sdk.version`) as first_ver," + + " last(`resource.attributes.telemetry.sdk.version`) as last_ver", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("first_ver", "int"), schema("last_ver", "int")); + verifyDataRows(actual, rows(10, 14)); + } + + @Test + public void testFirstLastWithBooleanNestedField() throws IOException { + // Test first/last with deeply nested boolean fields + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats first(`resource.attributes.telemetry.sdk.enabled`) as" + + " first_enabled, last(`resource.attributes.telemetry.sdk.enabled`) as" + + " last_enabled", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("first_enabled", "boolean"), schema("last_enabled", "boolean")); + verifyDataRows(actual, rows(true, true)); // First record is true, last record is true + } + + @Test + public void testCountWithBooleanNestedFieldGroupBy() throws IOException { + // Test count aggregation grouped by boolean nested field + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats count() as cnt by `resource.attributes.telemetry.sdk.enabled`", + TEST_INDEX_TELEMETRY)); + verifySchema( + actual, + schema("cnt", "bigint"), + schema("resource.attributes.telemetry.sdk.enabled", "boolean")); + verifyDataRows(actual, rows(2L, false), rows(3L, true)); // 2 false, 3 true values + } + + @Test + public void testMinMaxWithBooleanNestedField() throws IOException { + // Test min/max with deeply nested boolean fields + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats min(`resource.attributes.telemetry.sdk.enabled`) as min_enabled," + + " max(`resource.attributes.telemetry.sdk.enabled`) as max_enabled", + TEST_INDEX_TELEMETRY)); + verifySchema(actual, schema("min_enabled", "boolean"), schema("max_enabled", "boolean")); + verifyDataRows(actual, rows(false, true)); // Min is false, max is true + } + + @Test + public void testBooleanNestedFieldByGroup() throws IOException { + // Test boolean nested fields with grouping by other fields + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats count() as cnt," + + " first(`resource.attributes.telemetry.sdk.enabled`) as enabled by" + + " severityNumber", + TEST_INDEX_TELEMETRY)); + verifySchema( + actual, + schema("cnt", "bigint"), + schema("enabled", "boolean"), + schema("severityNumber", "int")); + // severityNumber 9: java (true), javascript (true) -> 2 records, first is true + // severityNumber 12: python (false), rust (true) -> 2 records, first is false + // severityNumber 16: go (false) -> 1 record, first is false + verifyDataRows(actual, rows(2L, true, 9), rows(2L, false, 12), rows(1L, false, 16)); + } + + @Test + public void testMixedTypesNestedFieldAggregations() throws IOException { + // Test aggregating multiple nested field types in one query + JSONObject actual = + executeQuery( + String.format( + "source=%s | stats min(`resource.attributes.telemetry.sdk.version`) as min_ver," + + " max(`resource.attributes.telemetry.sdk.version`) as max_ver," + + " min(`resource.attributes.telemetry.sdk.enabled`) as min_enabled," + + " max(`resource.attributes.telemetry.sdk.enabled`) as max_enabled," + + " first(`resource.attributes.telemetry.sdk.language`) as first_lang", + TEST_INDEX_TELEMETRY)); + verifySchema( + actual, + schema("min_ver", "int"), + schema("max_ver", "int"), + schema("min_enabled", "boolean"), + schema("max_enabled", "boolean"), + schema("first_lang", "string")); + verifyDataRows(actual, rows(10, 14, false, true, "java")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index f91125458bc..2ff0a8061b1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -660,6 +660,11 @@ public enum Index { "_doc", getDeepNestedIndexMapping(), "src/test/resources/deep_nested_index_data.json"), + TELEMETRY( + TestsConstants.TEST_INDEX_TELEMETRY, + "_doc", + getMappingFile("telemetry_test_mapping.json"), + "src/test/resources/telemetry_test_data.json"), DATA_TYPE_NUMERIC( TestsConstants.TEST_INDEX_DATATYPE_NUMERIC, "_doc", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index df11332fd44..3a5135f9ec2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -47,6 +47,7 @@ public class TestsConstants { public static final String TEST_INDEX_DATE = TEST_INDEX + "_date"; public static final String TEST_INDEX_DATE_TIME = TEST_INDEX + "_datetime"; public static final String TEST_INDEX_DEEP_NESTED = TEST_INDEX + "_deep_nested"; + public static final String TEST_INDEX_TELEMETRY = TEST_INDEX + "_telemetry"; public static final String TEST_INDEX_STRINGS = TEST_INDEX + "_strings"; public static final String TEST_INDEX_DATATYPE_NUMERIC = TEST_INDEX + "_datatypes_numeric"; public static final String TEST_INDEX_DATATYPE_NONNUMERIC = TEST_INDEX + "_datatypes_nonnumeric"; diff --git a/integ-test/src/test/resources/indexDefinitions/telemetry_test_mapping.json b/integ-test/src/test/resources/indexDefinitions/telemetry_test_mapping.json new file mode 100644 index 00000000000..8937a29de07 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/telemetry_test_mapping.json @@ -0,0 +1,39 @@ +{ + "mappings": { + "properties": { + "resource": { + "properties": { + "attributes": { + "properties": { + "telemetry": { + "properties": { + "sdk": { + "properties": { + "language": { + "type": "keyword", + "ignore_above": 256 + }, + "name": { + "type": "keyword", + "ignore_above": 256 + }, + "version": { + "type": "integer" + }, + "enabled": { + "type": "boolean" + } + } + } + } + } + } + } + } + }, + "severityNumber": { + "type": "integer" + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/telemetry_test_data.json b/integ-test/src/test/resources/telemetry_test_data.json new file mode 100644 index 00000000000..1ae4a9a6a96 --- /dev/null +++ b/integ-test/src/test/resources/telemetry_test_data.json @@ -0,0 +1,10 @@ +{"index":{"_id":"1"}} +{"resource": {"attributes": {"telemetry": {"sdk": {"language": "java", "name": "opentelemetry", "version": 10, "enabled": true}}}}, "severityNumber": 9} +{"index":{"_id":"2"}} +{"resource": {"attributes": {"telemetry": {"sdk": {"language": "python", "name": "opentelemetry", "version": 11, "enabled": false}}}}, "severityNumber": 12} +{"index":{"_id":"3"}} +{"resource": {"attributes": {"telemetry": {"sdk": {"language": "javascript", "name": "opentelemetry", "version": 12, "enabled": true}}}}, "severityNumber": 9} +{"index":{"_id":"4"}} +{"resource": {"attributes": {"telemetry": {"sdk": {"language": "go", "name": "opentelemetry", "version": 13, "enabled": false}}}}, "severityNumber": 16} +{"index":{"_id":"5"}} +{"resource": {"attributes": {"telemetry": {"sdk": {"language": "rust", "name": "opentelemetry", "version": 14, "enabled": true}}}}, "severityNumber": 12} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index d813ffdb962..311f5ccf7d8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -182,7 +182,8 @@ public ExprValue construct(String jsonString, boolean supportArrays) { * @return ExprValue */ public ExprValue construct(String field, Object value, boolean supportArrays) { - return parse(new ObjectContent(value), field, type(field), supportArrays); + Object extractedValue = extractFinalPrimitiveValue(value); + return parse(new ObjectContent(extractedValue), field, type(field), supportArrays); } private ExprValue parse( @@ -526,4 +527,26 @@ private ExprValue parseInnerArrayValue( private String makeField(String path, String field) { return path.equalsIgnoreCase(TOP_PATH) ? field : String.join(".", path, field); } + + /** + * Recursively extracts the final primitive value from nested Map structures. For example: + * {attributes={telemetry={sdk={language=java}}}} -> "java" + * + * @param value The value to extract from + * @return The extracted primitive value, or the original value if extraction is not possible + */ + @SuppressWarnings("unchecked") + private Object extractFinalPrimitiveValue(Object value) { + if (value == null || !(value instanceof Map)) { + return value; + } + + Map map = (Map) value; + if (map.size() == 1) { + Object singleValue = map.values().iterator().next(); + return extractFinalPrimitiveValue(singleValue); + } + + return value; + } }