diff --git a/core/build.gradle b/core/build.gradle index b5d2a7da9f0..b8630a2ef90 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -54,6 +54,7 @@ dependencies { api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" + api "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${versions.jackson}" api group: 'com.google.code.gson', name: 'gson', version: '2.8.9' api group: 'com.tdunning', name: 't-digest', version: '3.3' api "net.minidev:json-smart:${versions.json_smart}" diff --git a/core/src/main/java/org/opensearch/sql/utils/YamlFormatter.java b/core/src/main/java/org/opensearch/sql/utils/YamlFormatter.java new file mode 100644 index 00000000000..8df2bbb6919 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/YamlFormatter.java @@ -0,0 +1,46 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; + +/** + * YAML formatter utility class. Attributes are sorted alphabetically for consistent output. Check + * {@link YamlFormatterTest} for the actual formatting behavior. + */ +public class YamlFormatter { + + private static final ObjectMapper YAML_MAPPER; + + static { + YAMLFactory yamlFactory = new YAMLFactory(); + yamlFactory.disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER); + yamlFactory.enable(YAMLGenerator.Feature.MINIMIZE_QUOTES); // Enable smart quoting + yamlFactory.enable( + YAMLGenerator.Feature.ALWAYS_QUOTE_NUMBERS_AS_STRINGS); // Quote numeric strings + yamlFactory.enable(YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR); + YAML_MAPPER = new ObjectMapper(yamlFactory); + YAML_MAPPER.enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS); + } + + /** + * Formats any object into YAML format. + * + * @param object the object to format + * @return YAML-formatted string representation + */ + public static String formatToYaml(Object object) { + try { + return YAML_MAPPER.writeValueAsString(object); + } catch (JsonProcessingException e) { + throw new RuntimeException("Failed to format object to YAML", e); + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/utils/YamlFormatter.java b/core/src/test/java/org/opensearch/sql/utils/YamlFormatter.java new file mode 100644 index 00000000000..7dc939e7e5a --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/utils/YamlFormatter.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; + +/** + * YAML formatter utility class. Attributes are sorted alphabetically for consistent output. Check + * {@link YamlFormatterTest} for the actual formatting behavior. + */ +public class YamlFormatter { + + private static final ObjectMapper YAML_MAPPER = initObjectMapper(); + + private static ObjectMapper initObjectMapper() { + YAMLFactory yamlFactory = new YAMLFactory(); + yamlFactory.disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER); + yamlFactory.enable(YAMLGenerator.Feature.MINIMIZE_QUOTES); // Enable smart quoting + yamlFactory.enable( + YAMLGenerator.Feature.ALWAYS_QUOTE_NUMBERS_AS_STRINGS); // Quote numeric strings + yamlFactory.enable(YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR); + + ObjectMapper mapper = new ObjectMapper(yamlFactory); + mapper.enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS); + return mapper; + } + + /** Formats any object into YAML. It will always use LF as line break regardless of OS. */ + public static String formatToYaml(Object object) { + try { + return YAML_MAPPER.writer().withDefaultPrettyPrinter().writeValueAsString(object); + } catch (JsonProcessingException e) { + throw new RuntimeException("Failed to format object to YAML", e); + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/utils/YamlFormatterTest.java b/core/src/test/java/org/opensearch/sql/utils/YamlFormatterTest.java new file mode 100644 index 00000000000..c602303816d --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/utils/YamlFormatterTest.java @@ -0,0 +1,140 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import net.minidev.json.JSONObject; +import org.junit.jupiter.api.Test; + +class YamlFormatterTest { + @Test + void testAttributes() { + JSONObject json1 = new JSONObject(); + json1.put("attr1", null); + json1.put("attr2", "null"); + json1.put("attr3", 123); + json1.put("attr4", "123"); + + String actualYaml = YamlFormatter.formatToYaml(json1); + + String expectedYaml = "attr1: null\nattr2: \"null\"\nattr3: 123\nattr4: \"123\"\n"; + assertEquals(expectedYaml, actualYaml); + } + + @Test + void testJSONObjectConsistentOutput() { + JSONObject json1 = new JSONObject(); + json1.put("query", "SELECT * FROM users"); + json1.put("database", "test"); + json1.put("filters", new String[] {"active = true", "role = 'user'"}); + + JSONObject metadata1 = new JSONObject(); + metadata1.put("version", "1.0"); + metadata1.put("author", "system"); + json1.put("metadata", metadata1); + + // Create second JSONObject with same data but different insertion order + JSONObject json2 = new JSONObject(); + JSONObject metadata2 = new JSONObject(); + metadata2.put("author", "system"); + metadata2.put("version", "1.0"); + json2.put("metadata", metadata2); + + json2.put("filters", new String[] {"active = true", "role = 'user'"}); + json2.put("database", "test"); + json2.put("query", "SELECT * FROM users"); + + String yaml1 = YamlFormatter.formatToYaml(json1); + String yaml2 = YamlFormatter.formatToYaml(json2); + + String expectedYaml = + "database: test\n" + + "filters:\n" + + " - active = true\n" + + " - role = 'user'\n" + + "metadata:\n" + + " author: system\n" + + " version: \"1.0\"\n" + + "query: SELECT * FROM users\n"; + + assertEquals(expectedYaml, yaml1, "YAML output should match expected sorted format"); + assertEquals(yaml1, yaml2, "YAML output should be identical for same JSONObject data"); + assertTrue(yaml1.indexOf("database:") < yaml1.indexOf("filters:")); + assertTrue(yaml1.indexOf("filters:") < yaml1.indexOf("metadata:")); + assertTrue(yaml1.indexOf("metadata:") < yaml1.indexOf("query:")); + } + + @Test + void testMultiLineStrings() { + JSONObject json = new JSONObject(); + json.put( + "query", + "SELECT name, age, department\n" + + " FROM users u\n" + + " JOIN departments d ON u.dept_id = d.id\n" + + "WHERE u.active = true\n" + + "ORDER BY u.created_date DESC\n"); + json.put("singleLine", "Simple single line text"); + json.put("number", 42); + + // Create nested metadata object with multi-line description + JSONObject metadata = new JSONObject(); + metadata.put( + "description", "Multi-line description\nof the query purpose\nand expected results"); + metadata.put("author", "system"); + metadata.put("version", "2.0"); + json.put("metadata", metadata); + + String yaml = YamlFormatter.formatToYaml(json); + + // Expected complete YAML output with nested multi-line string + String expectedYaml = + "metadata:\n" + + " author: system\n" + + " description: |-\n" + + " Multi-line description\n" + + " of the query purpose\n" + + " and expected results\n" + + " version: \"2.0\"\n" + + "number: 42\n" + + "query: |\n" + + " SELECT name, age, department\n" + + " FROM users u\n" + + " JOIN departments d ON u.dept_id = d.id\n" + + " WHERE u.active = true\n" + + " ORDER BY u.created_date DESC\n" + + "singleLine: Simple single line text\n"; + + assertEquals( + expectedYaml, + yaml, + "YAML output should match expected format with nested multi-line strings"); + } + + @Test + void testFormatArbitraryObject() { + TestObject testObj = new TestObject("test", 42); + + String yaml = YamlFormatter.formatToYaml(testObj); + + assertNotNull(yaml); + assertTrue(yaml.contains("name:")); + assertTrue(yaml.contains("value: 42")); + } + + private static class TestObject { + public String name; + public int value; + + public TestObject(String name, int value) { + this.name = name; + this.value = value; + } + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index b3e7e70d0a3..61992ebfd35 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -10,6 +10,7 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; import static org.opensearch.sql.util.MatcherUtils.assertJsonEqualsIgnoreId; +import static org.opensearch.sql.util.MatcherUtils.assertYamlEqualsJsonIgnoreId; import java.io.IOException; import java.util.Locale; @@ -40,8 +41,8 @@ public void supportSearchSargPushDown_singleRange() throws IOException { String query = "source=opensearch-sql_test_index_account | where age >= 1.0 and age < 10 | fields age"; var result = explainQueryToString(query); - String expected = loadExpectedPlan("explain_sarg_filter_push_single_range.json"); - assertJsonEqualsIgnoreId(expected, result); + String expected = loadExpectedPlan("explain_sarg_filter_push_single_range.yaml"); + assertYamlEqualsJsonIgnoreId(expected, result); } // Only for Calcite diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java index 4800bad4e06..ff86b52a783 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java @@ -30,6 +30,7 @@ import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.legacy.SQLIntegTestCase; import org.opensearch.sql.util.RetryProcessor; +import org.opensearch.sql.utils.YamlFormatter; /** OpenSearch Rest integration test base for PPL testing. */ public abstract class PPLIntegTestCase extends SQLIntegTestCase { @@ -58,6 +59,12 @@ protected String explainQueryToString(String query) throws IOException { return explainQueryToString(query, false); } + protected String explainQueryToYaml(String query) throws IOException { + String jsonResponse = explainQueryToString(query); + JSONObject jsonObject = jsonify(jsonResponse); + return YamlFormatter.formatToYaml(jsonObject); + } + protected String explainQueryToString(String query, boolean extended) throws IOException { Response response = client() diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index b7e1bf150aa..929397594f0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -18,6 +18,7 @@ import static org.hamcrest.Matchers.hasItems; import static org.junit.Assert.assertEquals; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Strings; import com.google.gson.JsonParser; import java.math.BigDecimal; @@ -37,10 +38,12 @@ import org.json.JSONObject; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; +import org.opensearch.sql.utils.YamlFormatter; public class MatcherUtils { private static final Logger LOG = LogManager.getLogger(); + private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); /** * Assert field value in object by a custom matcher and getter to access the field. @@ -422,4 +425,40 @@ private static String eliminateRelId(String s) { private static String eliminatePid(String s) { return s.replaceAll("pitId=[^,]+,", "pitId=*,"); } + + public static void assertYamlEqualsJsonIgnoreId(String expectedYaml, String actualJson) { + String cleanedYaml = cleanUpYaml(jsonToYaml(actualJson)); + assertYamlEquals(expectedYaml, cleanedYaml); + } + + public static void assertYamlEquals(String expected, String actual) { + String normalizedExpected = normalizeLineBreaks(expected).trim(); + String normalizedActual = normalizeLineBreaks(actual).trim(); + assertEquals( + formatMessage(normalizedExpected, normalizedActual), normalizedExpected, normalizedActual); + } + + private static String normalizeLineBreaks(String s) { + return s.replace("\r\n", "\n").replace("\r", "\n"); + } + + private static String cleanUpYaml(String s) { + return s.replaceAll("\"utcTimestamp\":\\d+", "\"utcTimestamp\": 0") + .replaceAll("rel#\\d+", "rel#") + .replaceAll("RelSubset#\\d+", "RelSubset#") + .replaceAll("pitId=[^,]+,", "pitId=*,"); + } + + private static String jsonToYaml(String json) { + try { + Object jsonObject = JSON_MAPPER.readValue(json, Object.class); + return YamlFormatter.formatToYaml(jsonObject); + } catch (Exception e) { + throw new RuntimeException("Failed to convert JSON to YAML", e); + } + } + + private static String formatMessage(String expected, String actual) { + return String.format("### Expected ###\n%s\n### Actual###\n%s\n", expected, actual); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.json deleted file mode 100644 index 8bf63d38a93..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalFilter(condition=[SEARCH($8, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->SEARCH($0, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":1.0,\"to\":10.0,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.yaml new file mode 100644 index 00000000000..c1321360b07 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$8]) + LogicalFilter(condition=[SEARCH($8, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->SEARCH($0, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":1.0,"to":10.0,"include_lower":true,"include_upper":false,"boost":1.0}}},"_source":{"includes":["age"],"excludes":[]},"sort":[{"_doc":{"order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_single_range.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_single_range.json deleted file mode 100644 index 959faa7c936..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_single_range.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalFilter(condition=[SEARCH($8, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1)], expr#18=[SEARCH($t8, $t17)], age=[$t8], $condition=[$t18])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_single_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_single_range.yaml new file mode 100644 index 00000000000..834e267b63f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_single_range.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$8]) + LogicalFilter(condition=[SEARCH($8, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1)], expr#18=[SEARCH($t8, $t17)], age=[$t8], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])