From 6b3678a56a497fc28acf2ba4549bfe5e8c7a7925 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 15 Dec 2025 18:15:00 +0800 Subject: [PATCH 1/9] Support pushdown dedup with expression Signed-off-by: Lantao Jin --- .../sql/calcite/remote/CalciteExplainIT.java | 26 +++- .../org/opensearch/sql/util/MatcherUtils.java | 10 ++ .../calcite/explain_dedup_expr1.yaml | 11 ++ .../calcite/explain_dedup_expr2.yaml | 11 ++ .../explain_dedup_expr2_alternative.yaml | 11 ++ .../calcite/explain_dedup_expr3.yaml | 11 ++ .../explain_dedup_expr3_alternative.yaml | 11 ++ .../calcite/explain_dedup_expr4.yaml | 12 ++ .../explain_dedup_expr4_alternative.yaml | 12 ++ .../calcite/explain_dedup_expr_complex.yaml | 13 ++ ...xplain_dedup_expr_complex_alternative.yaml | 13 ++ .../planner/rules/DedupPushdownRule.java | 142 ++++++++++++------ .../planner/rules/OpenSearchIndexRules.java | 3 +- .../response/agg/TopHitsParser.java | 7 +- .../scan/AbstractCalciteIndexScan.java | 11 +- .../storage/scan/context/PushDownContext.java | 14 ++ .../storage/scan/context/SortDigest.java | 16 ++ .../storage/script/CalciteScriptEngine.java | 3 + .../script/field/CalciteFieldScript.java | 45 ++++++ .../field/CalciteFieldScriptFactory.java | 37 +++++ .../field/CalciteFieldScriptLeafFactory.java | 51 +++++++ 21 files changed, 410 insertions(+), 60 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2_alternative.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3_alternative.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex_alternative.yaml create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortDigest.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScript.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptFactory.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 37230a3c402..d567b182a9c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2020,7 +2020,7 @@ public void testComplexDedup() throws IOException { + " dedup 2 gender, state")); } - @Ignore("https://github.com/opensearch-project/sql/issues/4789") + @Test public void testDedupExpr() throws IOException { enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_dedup_expr1.yaml"); @@ -2030,30 +2030,36 @@ public void testDedupExpr() throws IOException { "source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | dedup 1" + " new_gender")); expected = loadExpectedPlan("explain_dedup_expr2.yaml"); + String alternative = loadExpectedPlan("explain_dedup_expr2_alternative.yaml"); assertYamlEqualsIgnoreId( expected, + alternative, explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + " eval new_gender = lower(gender), new_state = lower(state) | dedup 1 new_gender," + " new_state")); expected = loadExpectedPlan("explain_dedup_expr3.yaml"); + alternative = loadExpectedPlan("explain_dedup_expr3_alternative.yaml"); assertYamlEqualsIgnoreId( expected, + alternative, explainQueryYaml( "source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | eval" + " new_state = lower(state) | dedup 2 new_gender, new_state")); expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + alternative = loadExpectedPlan("explain_dedup_expr4_alternative.yaml"); assertYamlEqualsIgnoreId( expected, + alternative, explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + " eval new_gender = lower(gender) | eval new_state = lower(state) | sort gender," + " -state | dedup 2 new_gender, new_state")); } - @Ignore("https://github.com/opensearch-project/sql/issues/4789") + @Test public void testDedupRename() throws IOException { - // rename changes nothing, reuse the same yaml files of testDedupExpr() + // rename changes nothing, reuse the same yaml files enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_dedup_expr1.yaml"); assertYamlEqualsIgnoreId( @@ -2062,23 +2068,29 @@ public void testDedupRename() throws IOException { "source=opensearch-sql_test_index_account | eval tmp_gender = lower(gender) | rename" + " tmp_gender as new_gender | dedup 1 new_gender")); expected = loadExpectedPlan("explain_dedup_expr2.yaml"); + String alternative = loadExpectedPlan("explain_dedup_expr2_alternative.yaml"); assertYamlEqualsIgnoreId( expected, + alternative, explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + " eval tmp_gender = lower(gender), tmp_state = lower(state) | rename tmp_gender" + " as new_gender | rename tmp_state as new_state | dedup 1 new_gender," + " new_state")); expected = loadExpectedPlan("explain_dedup_expr3.yaml"); + alternative = loadExpectedPlan("explain_dedup_expr3_alternative.yaml"); assertYamlEqualsIgnoreId( expected, + alternative, explainQueryYaml( "source=opensearch-sql_test_index_account | eval tmp_gender = lower(gender) | eval" + " tmp_state = lower(state) | rename tmp_gender as new_gender | rename tmp_state" + " as new_state | dedup 2 new_gender, new_state")); expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + alternative = loadExpectedPlan("explain_dedup_expr4_alternative.yaml"); assertYamlEqualsIgnoreId( expected, + alternative, explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename" @@ -2086,12 +2098,14 @@ public void testDedupRename() throws IOException { + " -state | dedup 2 new_gender, new_state")); } - @Ignore("SortExprIndexScanRule not work?") - public void testDedupRename2() throws IOException { + @Test + public void testRenameDedupThenSortExpr() throws IOException { enabledOnlyWhenPushdownIsEnabled(); - String expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + String expected = loadExpectedPlan("explain_dedup_expr_complex.yaml"); + String alternative = loadExpectedPlan("explain_dedup_expr_complex_alternative.yaml"); assertYamlEqualsIgnoreId( expected, + alternative, explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename" diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index 079e332ef32..9419b76678d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -447,6 +447,16 @@ public static void assertYamlEqualsIgnoreId(String expectedYaml, String actualYa assertYamlEquals(cleanUpYaml(expectedYaml), cleanUpYaml(actualYaml)); } + /** Due to the implementation of DSL, may both expected YAML files are correct. */ + public static void assertYamlEqualsIgnoreId( + String expectedYaml1, String expectedYaml2, String actualYaml) { + try { + assertYamlEquals(cleanUpYaml(expectedYaml1), cleanUpYaml(actualYaml)); + } catch (AssertionError e) { + assertYamlEquals(cleanUpYaml(expectedYaml2), cleanUpYaml(actualYaml)); + } + } + public static void assertYamlEquals(String expected, String actual) { String normalizedExpected = normalizeLineBreaks(expected).trim(); String normalizedActual = normalizeLineBreaks(actual).trim(); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml index e69de29bb2d..5f13847540f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new_gender=[$17]) + LogicalFilter(condition=[<=($18, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[$17], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $17)]) + LogicalFilter(condition=[IS NOT NULL($17)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"LOWER($4)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml index e69de29bb2d..aa8c90ec119 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2_alternative.yaml new file mode 100644 index 00000000000..73472aa7762 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2_alternative.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml index e69de29bb2d..1803e419652 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new_gender=[$17], new_state=[$18]) + LogicalFilter(condition=[<=($19, 2)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[$17], new_state=[$18], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $17, $18)]) + LogicalFilter(condition=[AND(IS NOT NULL($17), IS NOT NULL($18))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"LOWER($4)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"LOWER($7)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3_alternative.yaml new file mode 100644 index 00000000000..46337bb34c1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3_alternative.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new_gender=[$17], new_state=[$18]) + LogicalFilter(condition=[<=($19, 2)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[$17], new_state=[$18], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $17, $18)]) + LogicalFilter(condition=[AND(IS NOT NULL($17), IS NOT NULL($18))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"LOWER($7)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"LOWER($4)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml index e69de29bb2d..840cb578f8e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml new file mode 100644 index 00000000000..57b24111649 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex.yaml new file mode 100644 index 00000000000..469c2517b2c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[4 ASC FIRST, 5 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex_alternative.yaml new file mode 100644 index 00000000000..7b88b70d348 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex_alternative.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[4 ASC FIRST, 5 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index 70e446fa4bc..ea2d49db194 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -5,14 +5,17 @@ package org.opensearch.sql.opensearch.planner.rules; +import com.google.common.collect.Streams; import java.util.ArrayList; import java.util.List; import java.util.function.Predicate; +import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; @@ -45,7 +48,11 @@ protected void onMatchImpl(RelOptRuleCall call) { final LogicalProject projectWithWindow = call.rel(2); if (call.rels.length == 5) { final CalciteLogicalIndexScan scan = call.rel(4); - apply(call, finalProject, numOfDedupFilter, projectWithWindow, scan); + apply(call, finalProject, numOfDedupFilter, projectWithWindow, null, scan); + } else if (call.rels.length == 6) { + final LogicalProject projectWithExpr = call.rel(4); + final CalciteLogicalIndexScan scan = call.rel(5); + apply(call, finalProject, numOfDedupFilter, projectWithWindow, projectWithExpr, scan); } else { throw new AssertionError( String.format( @@ -59,6 +66,7 @@ protected void apply( LogicalProject finalProject, LogicalFilter numOfDedupFilter, LogicalProject projectWithWindow, + @Nullable LogicalProject projectWithExpr, CalciteLogicalIndexScan scan) { List windows = PlanUtils.getRexWindowFromProject(projectWithWindow); if (windows.size() != 1) { @@ -73,10 +81,16 @@ protected void apply( // TODO https://github.com/opensearch-project/sql/issues/4564 return; } - if (projectWithWindow.getProjects().stream() + List dedupColumnNames = + dedupColumns.stream() + .filter(rex -> rex.isA(SqlKind.INPUT_REF)) + .map(r -> ((RexInputRef) r).getIndex()) + .map(i -> projectWithWindow.getInput().getRowType().getFieldNames().get(i)) + .toList(); + if (projectWithExpr != null && projectWithWindow.getProjects().stream() .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) .filter(Predicate.not(dedupColumns::contains)) - .anyMatch(rex -> !rex.isA(SqlKind.INPUT_REF))) { + .anyMatch(rex -> !projectWithExpr.getProjects().get(((RexInputRef) rex).getIndex()).isA(SqlKind.INPUT_REF))) { // TODO fallback to the approach of Collapse search // | eval new_age = age + 1 | fields gender, new_age | dedup 1 gender if (LOG.isDebugEnabled()) { @@ -111,71 +125,101 @@ && dedupColumnsContainRexCall(rexCallsExceptWindow, dedupColumns)) { // We convert the dedup pushdown to composite aggregate + top_hits: // Aggregate(literalAgg(dedupNumer), groups) // +- Project(groups, remaining) - // +- Scan - // Step 1: Initial a RelBuilder to build aggregate by pushing Scan and Project + // +- Scan or Project with expression + // Step 1: Initial a RelBuilder to build aggregate by pushing child Project or Scan RelBuilder relBuilder = call.builder(); - relBuilder.push(scan); - // To baseline the rowType, merge the fields() and projectWithWindow - List mergedRexList = new ArrayList<>(); - List mergedFieldNames = new ArrayList<>(); - List builderFields = relBuilder.fields(); - List projectFields = projectWithWindow.getProjects(); - List builderFieldNames = relBuilder.peek().getRowType().getFieldNames(); - List projectFieldNames = projectWithWindow.getRowType().getFieldNames(); + if (projectWithExpr == null) { + relBuilder.push(scan); + // To baseline the rowType, merge the fields() and projectWithWindow + List mergedRexList = new ArrayList<>(); + List mergedFieldNames = new ArrayList<>(); + List builderFields = relBuilder.fields(); + List projectFields = projectWithWindow.getProjects(); + List builderFieldNames = relBuilder.peek().getRowType().getFieldNames(); + List projectFieldNames = projectWithWindow.getRowType().getFieldNames(); - // Add existing fields with proper names - // For rename case: source = t | rename old as new | dedup new - for (RexNode field : builderFields) { - mergedRexList.add(field); - int projectIndex = projectFields.indexOf(field); - if (projectIndex >= 0) { - mergedFieldNames.add(projectFieldNames.get(projectIndex)); - } else { - mergedFieldNames.add(builderFieldNames.get(builderFields.indexOf(field))); - } - } - // Append new fields from project (excluding ROW_NUMBER and duplicates) - for (RexNode field : projectFields) { - if (!field.isA(SqlKind.ROW_NUMBER) && !builderFields.contains(field)) { + // Add existing fields with proper names + // For rename case: source = t | rename old as new | dedup new + for (RexNode field : builderFields) { mergedRexList.add(field); - mergedFieldNames.add(field.toString()); + int projectIndex = projectFields.indexOf(field); + if (projectIndex >= 0) { + mergedFieldNames.add(projectFieldNames.get(projectIndex)); + } else { + mergedFieldNames.add(builderFieldNames.get(builderFields.indexOf(field))); + } } + // Append new fields from project (excluding ROW_NUMBER and duplicates) + for (RexNode field : projectFields) { + if (!field.isA(SqlKind.ROW_NUMBER) && !builderFields.contains(field)) { + mergedRexList.add(field); + mergedFieldNames.add(field.toString()); + } + } + // Force add the project + relBuilder.project(mergedRexList, mergedFieldNames, true); + } else { + relBuilder.push(projectWithExpr); } - // Force add the project - relBuilder.project(mergedRexList, mergedFieldNames, true); - LogicalProject baseline = (LogicalProject) relBuilder.peek(); - Mapping mappingForDedupColumns = - PlanUtils.mapping(dedupColumns, relBuilder.peek().getRowType()); - + LogicalProject projectBeforeReorder = (LogicalProject) relBuilder.peek(); // Step 2: Push a Project which groups is first, then remaining finalOutput columns - List reordered = new ArrayList<>(PlanUtils.getInputRefs(dedupColumns)); - baseline.getProjects().stream() - .filter(Predicate.not(dedupColumns::contains)) - .forEach(reordered::add); - relBuilder.project(reordered); - // childProject includes all list of finalOutput columns - LogicalProject childProject = (LogicalProject) relBuilder.peek(); + List dedupColumnsAfterReorder = dedupColumns; + if (reorderRequired(dedupColumns)) { + Mapping mappingForDedupColumns = + PlanUtils.mapping(dedupColumns, relBuilder.peek().getRowType()); + List reordered = + new ArrayList<>(withAliases(relBuilder, dedupColumns, dedupColumnNames)); + List dedupColumnIndices = + dedupColumns.stream().map(r -> ((RexInputRef) r).getIndex()).toList(); + for (int i = 0; i < projectBeforeReorder.getProjects().size(); i++) { + if (!dedupColumnIndices.contains(i)) { + reordered.add(projectBeforeReorder.getProjects().get(i)); + } + } + relBuilder.project(reordered); + dedupColumnsAfterReorder = RexUtil.apply(mappingForDedupColumns, dedupColumns); + } + LogicalProject projectAfterReorder = (LogicalProject) relBuilder.peek(); // Step 3: Push an Aggregate + // The group keys should contain aliases. + RelBuilder.GroupKey groupKeys = + relBuilder.groupKey(withAliases(relBuilder, dedupColumnsAfterReorder, dedupColumnNames)); // We push down a LITERAL_AGG with dedupNumer for converting the dedup command to aggregate: // (1) Pass the dedupNumer to AggregateAnalyzer.processAggregateCalls() // (2) Distinguish it from an optimization operator and user defined aggregator. // (LITERAL_AGG is used in optimization normally, see {@link SqlKind#LITERAL_AGG}) - final List newDedupColumns = RexUtil.apply(mappingForDedupColumns, dedupColumns); - relBuilder.aggregate(relBuilder.groupKey(newDedupColumns), relBuilder.literalAgg(dedupNumer)); + relBuilder.aggregate(groupKeys, relBuilder.literalAgg(dedupNumer)); // add bucket_nullable = false hint PlanUtils.addIgnoreNullBucketHintToAggregate(relBuilder); // peek the aggregate after hint being added LogicalAggregate aggregate = (LogicalAggregate) relBuilder.build(); CalciteLogicalIndexScan newScan = - (CalciteLogicalIndexScan) scan.pushDownAggregate(aggregate, childProject); + (CalciteLogicalIndexScan) scan.pushDownAggregate(aggregate, projectAfterReorder); if (newScan != null) { - // Reorder back to original order + // Back to original project order call.transformTo(newScan.copyWithNewSchema(finalProject.getRowType())); } } + // Return true if the indices of dedup columns are not start with 0, 1, 2... + private boolean reorderRequired(List dedupColumns) { + for (int i = 0; i < dedupColumns.size(); i++) { + if (((RexInputRef) dedupColumns.get(i)).getIndex() != i) { + return true; + } + } + return false; + } + + private List withAliases( + RelBuilder relBuilder, List rexNodes, List aliases) { + return Streams.zip( + rexNodes.stream(), aliases.stream(), (key, alias) -> relBuilder.alias(key, alias)) + .toList(); + } + private static boolean dedupColumnsContainRexCall( List calls, List dedupColumns) { List dedupColumnIndicesFromCall = @@ -257,7 +301,9 @@ public interface Config extends OpenSearchRuleConfig { .oneInput( b3 -> b3.operand(LogicalFilter.class) - .predicate(Config::isNotNull) + .predicate( + PlanUtils + ::mayBeFilterFromBucketNonNull) .oneInput( b4 -> b4.operand(LogicalProject.class) @@ -291,9 +337,5 @@ private static boolean validDedupNumberChecker(LogicalFilter filter) { return filter.getCondition().isA(SqlKind.LESS_THAN_OR_EQUAL) && PlanUtils.containsRowNumberDedup(filter); } - - private static boolean isNotNull(LogicalFilter filter) { - return filter.getCondition().isA(SqlKind.IS_NOT_NULL); - } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index ef55868b202..37f45d4942c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -57,8 +57,7 @@ public class OpenSearchIndexRules { LIMIT_INDEX_SCAN, SORT_INDEX_SCAN, DEDUP_PUSH_DOWN, - // TODO https://github.com/opensearch-project/sql/issues/4789 - // DEDUP_EXPR_PUSH_DOWN, + DEDUP_EXPR_PUSH_DOWN, SORT_PROJECT_EXPR_TRANSPOSE, SORT_AGGREGATION_METRICS_RULE, RARE_TOP_PUSH_DOWN, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java index 0d662fcc385..ef3c37af1b0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java @@ -90,7 +90,12 @@ public List> parse(Aggregation agg) { // LinkedHashMap["name" -> "A", "category" -> "Y"] // ] return Arrays.stream(hits) - .>map(hit -> new LinkedHashMap<>(hit.getSourceAsMap())) + .>map( + hit -> { + Map map = new LinkedHashMap<>(hit.getSourceAsMap()); + hit.getFields().values().forEach(f -> map.put(f.getName(), f.getValue())); + return map; + }) .toList(); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 91e0b9be315..86a8f1748c8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -67,6 +67,7 @@ import org.opensearch.sql.opensearch.storage.scan.context.PushDownOperation; import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; import org.opensearch.sql.opensearch.storage.scan.context.RareTopDigest; +import org.opensearch.sql.opensearch.storage.scan.context.SortDigest; import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; /** An abstract relational operator representing a scan of an OpenSearchIndex type. */ @@ -379,7 +380,7 @@ && isAnyCollationNameInAggregators(collationNames)) { builders.add(sortBuilder.order(order)); } action = (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownSort(builders); - digest = builders.toString(); + digest = new SortDigest(builders); newScan.pushDownContext.add(PushDownType.SORT, digest, action); return newScan; } @@ -497,6 +498,14 @@ public boolean isProjectPushed() { return this.getPushDownContext().isProjectPushed(); } + public boolean isSortPushed() { + return this.getPushDownContext().isSortPushed(); + } + + public boolean isSortExprPushed() { + return this.getPushDownContext().isSortExprPushed(); + } + /** * Determine the appropriate ScriptSortType based on the expression's return type. * diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 4f84746ade4..27d39331705 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -8,6 +8,7 @@ import java.util.AbstractCollection; import java.util.ArrayDeque; import java.util.Iterator; +import java.util.List; import lombok.Getter; import org.jetbrains.annotations.NotNull; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; @@ -153,6 +154,19 @@ public Object getDigestByType(PushDownType type) { .orElse(null); } + /** + * Get all digest of a specific type. + * + * @param type The PushDownType to get the digest for + * @return The digest list + */ + public List getAllDigestByType(PushDownType type) { + return this.stream() + .filter(operation -> operation.type() == type) + .map(PushDownOperation::digest) + .toList(); + } + public OpenSearchRequestBuilder createRequestBuilder() { OpenSearchRequestBuilder newRequestBuilder = osIndex.createRequestBuilder(); if (operationsForRequestBuilder != null) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortDigest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortDigest.java new file mode 100644 index 00000000000..dc8f3ca6d9f --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortDigest.java @@ -0,0 +1,16 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan.context; + +import java.util.List; +import org.opensearch.search.sort.SortBuilder; + +public record SortDigest(List> builders) { + @Override + public String toString() { + return builders.toString(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java index c1e4f5535bf..224d7019ec2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java @@ -70,6 +70,7 @@ import org.checkerframework.checker.nullness.qual.Nullable; import org.opensearch.index.fielddata.ScriptDocValues; import org.opensearch.script.AggregationScript; +import org.opensearch.script.FieldScript; import org.opensearch.script.FilterScript; import org.opensearch.script.NumberSortScript; import org.opensearch.script.ScriptContext; @@ -78,6 +79,7 @@ import org.opensearch.search.lookup.SourceLookup; import org.opensearch.sql.data.model.ExprTimestampValue; import org.opensearch.sql.opensearch.storage.script.aggregation.CalciteAggregationScriptFactory; +import org.opensearch.sql.opensearch.storage.script.field.CalciteFieldScriptFactory; import org.opensearch.sql.opensearch.storage.script.filter.CalciteFilterScriptFactory; import org.opensearch.sql.opensearch.storage.script.sort.CalciteNumberSortScriptFactory; import org.opensearch.sql.opensearch.storage.script.sort.CalciteStringSortScriptFactory; @@ -110,6 +112,7 @@ public CalciteScriptEngine(RelOptCluster relOptCluster) { .put(AggregationScript.CONTEXT, CalciteAggregationScriptFactory::new) .put(NumberSortScript.CONTEXT, CalciteNumberSortScriptFactory::new) .put(StringSortScript.CONTEXT, CalciteStringSortScriptFactory::new) + .put(FieldScript.CONTEXT, CalciteFieldScriptFactory::new) .build(); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScript.java new file mode 100644 index 00000000000..b709089e84f --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScript.java @@ -0,0 +1,45 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.field; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.FieldScript; +import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.lookup.SourceLookup; +import org.opensearch.sql.opensearch.storage.script.core.CalciteScript; + +/** Calcite script executor that produce dynamic values. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteFieldScript extends FieldScript { + + /** Calcite Script. */ + private final CalciteScript calciteScript; + + private final SourceLookup sourceLookup; + + private final Map parametersToIndex; + + public CalciteFieldScript( + Function1 function, + SearchLookup lookup, + LeafReaderContext context, + Map params, + Map parametersToIndex) { + super(params, lookup, context); + this.calciteScript = new CalciteScript(function, params); + this.sourceLookup = lookup.getLeafSearchLookup(context).source(); + this.parametersToIndex = parametersToIndex; + } + + @Override + public Object execute() { + return calciteScript.execute(this.getDoc(), this.sourceLookup, this.parametersToIndex)[0]; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptFactory.java new file mode 100644 index 00000000000..189b128fdf8 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptFactory.java @@ -0,0 +1,37 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.field; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.type.RelDataType; +import org.opensearch.script.FieldScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite script factory that generates leaf factory. */ +@EqualsAndHashCode +public class CalciteFieldScriptFactory implements FieldScript.Factory { + + /** Generated code of calcite to execute. */ + private final Function1 function; + + public CalciteFieldScriptFactory(Function1 function, RelDataType type) { + this.function = function; + } + + @Override + public boolean isResultDeterministic() { + // This implies the results are cacheable + return true; + } + + @Override + public FieldScript.LeafFactory newFactory(Map params, SearchLookup lookup) { + return new CalciteFieldScriptLeafFactory(function, params, lookup); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java new file mode 100644 index 00000000000..727640e4ccf --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java @@ -0,0 +1,51 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.field; + +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.SOURCES; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.FieldScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite script leaf factory that produces script executor for each leaf. */ +public class CalciteFieldScriptLeafFactory implements FieldScript.LeafFactory { + private final Function1 function; + + /** Parameters for the calcite script. */ + private final Map params; + + /** Document lookup that returns doc values. */ + private final SearchLookup lookup; + + /** + * Stores the parameter names to the actual indices in SOURCES. Generate it in advance in factory + * to save the process for each document* + */ + private final Map parametersToIndex; + + public CalciteFieldScriptLeafFactory( + Function1 function, Map params, SearchLookup lookup) { + this.function = function; + this.params = params; + this.lookup = lookup; + this.parametersToIndex = + IntStream.range(0, ((List) params.get(SOURCES)).size()) + .boxed() + .collect(Collectors.toMap(i -> "?" + i, i -> i)); + } + + @Override + public FieldScript newInstance(LeafReaderContext ctx) { + return new CalciteFieldScript(function, lookup, ctx, params, parametersToIndex); + } +} From b2f64f037e167599f3f310d3f7d404d8b686b54f Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 16 Dec 2025 10:27:01 +0800 Subject: [PATCH 2/9] Fix java spotless check Signed-off-by: Lantao Jin --- .../planner/rules/DedupPushdownRule.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index ea2d49db194..c9a646320eb 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -87,10 +87,16 @@ protected void apply( .map(r -> ((RexInputRef) r).getIndex()) .map(i -> projectWithWindow.getInput().getRowType().getFieldNames().get(i)) .toList(); - if (projectWithExpr != null && projectWithWindow.getProjects().stream() - .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) - .filter(Predicate.not(dedupColumns::contains)) - .anyMatch(rex -> !projectWithExpr.getProjects().get(((RexInputRef) rex).getIndex()).isA(SqlKind.INPUT_REF))) { + if (projectWithExpr != null + && projectWithWindow.getProjects().stream() + .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) + .filter(Predicate.not(dedupColumns::contains)) + .anyMatch( + rex -> + !projectWithExpr + .getProjects() + .get(((RexInputRef) rex).getIndex()) + .isA(SqlKind.INPUT_REF))) { // TODO fallback to the approach of Collapse search // | eval new_age = age + 1 | fields gender, new_age | dedup 1 gender if (LOG.isDebugEnabled()) { From 8b3e8d21afc8ba05f6a9e0336e5ad68a6f045fde Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 16 Dec 2025 18:01:33 +0800 Subject: [PATCH 3/9] Support dedup with expression Signed-off-by: Lantao Jin --- .../sql/calcite/remote/CalciteExplainIT.java | 51 +++- ...rt_expr_pushdown_for_smj_w_max_option.yaml | 2 +- .../calcite/explain_dedup_complex1.yaml | 2 +- .../calcite/explain_dedup_complex2.yaml | 2 +- .../calcite/explain_dedup_complex3.yaml | 2 +- .../calcite/explain_dedup_complex4.yaml | 2 +- .../calcite/explain_dedup_expr1.yaml | 2 +- .../calcite/explain_dedup_expr2.yaml | 2 +- .../explain_dedup_expr2_alternative.yaml | 2 +- .../calcite/explain_dedup_expr3.yaml | 2 +- .../explain_dedup_expr3_alternative.yaml | 2 +- .../calcite/explain_dedup_expr4.yaml | 4 +- .../explain_dedup_expr4_alternative.yaml | 4 +- .../calcite/explain_dedup_expr_complex.yaml | 13 - .../calcite/explain_dedup_expr_complex1.yaml | 13 + ...plain_dedup_expr_complex1_alternative.yaml | 13 + .../calcite/explain_dedup_expr_complex2.yaml | 12 + ...plain_dedup_expr_complex2_alternative.yaml | 12 + ...xplain_dedup_expr_complex_alternative.yaml | 13 - .../explain_dedup_keepempty_false_push.yaml | 2 +- .../calcite/explain_dedup_push.yaml | 2 +- .../calcite/explain_dedup_with_expr1.yaml | 11 + .../calcite/explain_dedup_with_expr2.yaml | 11 + .../explain_dedup_with_expr2_alternative.yaml | 11 + .../calcite/explain_dedup_with_expr3.yaml | 11 + .../explain_dedup_with_expr3_alternative.yaml | 11 + .../calcite/explain_dedup_with_expr4.yaml | 13 + .../explain_dedup_with_expr4_alternative.yaml | 13 + ...explain_join_with_criteria_max_option.yaml | 2 +- .../explain_join_with_fields_max_option.yaml | 2 +- .../planner/rules/DedupPushdownRule.java | 229 ++++++++---------- .../opensearch/request/AggregateAnalyzer.java | 96 ++++---- .../scan/AbstractCalciteIndexScan.java | 11 +- .../storage/scan/context/PushDownContext.java | 14 -- .../storage/scan/context/SortDigest.java | 16 -- 35 files changed, 356 insertions(+), 254 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex2.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex2_alternative.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex_alternative.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr1.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr2.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr2_alternative.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr3.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr3_alternative.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortDigest.java diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index d567b182a9c..22a46ff63b3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2101,8 +2101,8 @@ public void testDedupRename() throws IOException { @Test public void testRenameDedupThenSortExpr() throws IOException { enabledOnlyWhenPushdownIsEnabled(); - String expected = loadExpectedPlan("explain_dedup_expr_complex.yaml"); - String alternative = loadExpectedPlan("explain_dedup_expr_complex_alternative.yaml"); + String expected = loadExpectedPlan("explain_dedup_expr_complex1.yaml"); + String alternative = loadExpectedPlan("explain_dedup_expr_complex1_alternative.yaml"); assertYamlEqualsIgnoreId( expected, alternative, @@ -2111,6 +2111,53 @@ public void testRenameDedupThenSortExpr() throws IOException { + " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename" + " tmp_gender as new_gender | rename tmp_state as new_state | sort new_gender," + " -new_state | dedup 2 new_gender, new_state")); + expected = loadExpectedPlan("explain_dedup_expr_complex2.yaml"); + alternative = loadExpectedPlan("explain_dedup_expr_complex2_alternative.yaml"); + assertYamlEqualsIgnoreId( + expected, + alternative, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename" + + " tmp_gender as new_gender | rename tmp_state as new_state | dedup 2 new_gender," + + " new_state | sort new_gender, -new_state")); + } + + @Test + public void testDedupWithExpr() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_with_expr1.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | dedup 1" + + " age")); + expected = loadExpectedPlan("explain_dedup_with_expr2.yaml"); + String alternative = loadExpectedPlan("explain_dedup_with_expr2_alternative.yaml"); + assertYamlEqualsIgnoreId( + expected, + alternative, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval new_gender = lower(gender), new_state = lower(state) | dedup 1 age," + + " new_state")); + expected = loadExpectedPlan("explain_dedup_with_expr3.yaml"); + alternative = loadExpectedPlan("explain_dedup_with_expr3_alternative.yaml"); + assertYamlEqualsIgnoreId( + expected, + alternative, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | eval" + + " new_state = lower(state) | dedup 2 age, account_number")); + expected = loadExpectedPlan("explain_dedup_with_expr4.yaml"); + alternative = loadExpectedPlan("explain_dedup_with_expr4_alternative.yaml"); + assertYamlEqualsIgnoreId( + expected, + alternative, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval new_gender = lower(gender) | eval new_state = lower(state) | sort gender," + + " -state | dedup 2 gender, state")); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml index b0707c70389..2e1eaf42f88 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml @@ -18,4 +18,4 @@ calcite: EnumerableMergeJoin(condition=[=($0, $7)], joinType=[left]) EnumerableCalc(expr#0=[{inputs}], expr#1=['(?^[A-Z])'], expr#2=['lastname'], expr#3=[REX_EXTRACT($t0, $t1, $t2)], $f0=[$t3]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[lastname], LIMIT->10000, SORT_EXPR->[REX_EXTRACT($0, '(?^[A-Z])', 'lastname') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["lastname"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?^[A-Z])","lastname"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[6]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"lastname":{"terms":{"field":"lastname","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["lastname","account_number","firstname","address","birthdate","gender","city","balance","employer","state","age","email","male"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[6]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"lastname":{"terms":{"field":"lastname","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["lastname","account_number","firstname","address","birthdate","gender","city","balance","employer","state","age","email","male","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml index 7b317734f93..de7047f0161 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($4)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml index 17af313bfb3..0d045fb7a5f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml index 56cdf375905..d5b816ff644 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml index 64a65b0277c..cbe56d53c56 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml index 5f13847540f..a84b664804e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"LOWER($4)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml index aa8c90ec119..a892b5c4e26 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2_alternative.yaml index 73472aa7762..ffa6bc1aa52 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2_alternative.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml index 1803e419652..ce4f25d3892 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"LOWER($4)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"LOWER($7)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3_alternative.yaml index 46337bb34c1..a7009a3751b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3_alternative.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"LOWER($7)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"LOWER($4)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml index 840cb578f8e..e8dffe4a7a8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml @@ -9,4 +9,6 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml index 57b24111649..f441f5d8e12 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml @@ -9,4 +9,6 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex.yaml deleted file mode 100644 index 469c2517b2c..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex.yaml +++ /dev/null @@ -1,13 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) - LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[4 ASC FIRST, 5 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml new file mode 100644 index 00000000000..d7354c8b6d1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[4 ASC FIRST, 5 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml new file mode 100644 index 00000000000..fdd427f5f9b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[4 ASC FIRST, 5 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex2.yaml new file mode 100644 index 00000000000..53462af4bce --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex2.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[4 ASC FIRST, 5 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex2_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex2_alternative.yaml new file mode 100644 index 00000000000..b9cb684053f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex2_alternative.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[4 ASC FIRST, 5 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex_alternative.yaml deleted file mode 100644 index 7b88b70d348..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex_alternative.yaml +++ /dev/null @@ -1,13 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) - LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[4 ASC FIRST, 5 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"LOWER($3)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"LOWER($1)":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml index ead28edabe4..b35dfe6adb3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml index ead28edabe4..b35dfe6adb3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr1.yaml new file mode 100644 index 00000000000..45034087d0f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr1.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new_gender=[$17]) + LogicalFilter(condition=[<=($18, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[$17], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $8)]) + LogicalFilter(condition=[IS NOT NULL($8)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age":{"terms":{"field":"age","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["age","account_number","firstname","address","balance","gender","city","employer","state","email","lastname"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr2.yaml new file mode 100644 index 00000000000..eadbe165e38 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr2.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($5))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age":{"terms":{"field":"age","missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["age","account_number","gender","state"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr2_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr2_alternative.yaml new file mode 100644 index 00000000000..4c7afb236a8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr2_alternative.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2, $5)]) + LogicalFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($5))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age":{"terms":{"field":"age","missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["age","account_number","gender","state"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr3.yaml new file mode 100644 index 00000000000..95a1627f212 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr3.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new_gender=[$17], new_state=[$18]) + LogicalFilter(condition=[<=($19, 2)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[$17], new_state=[$18], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $8, $0)]) + LogicalFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","age","firstname","address","balance","gender","city","employer","state","email","lastname"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr3_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr3_alternative.yaml new file mode 100644 index 00000000000..be3cbf82623 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr3_alternative.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new_gender=[$17], new_state=[$18]) + LogicalFilter(condition=[<=($19, 2)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[$17], new_state=[$18], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $8, $0)]) + LogicalFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","age","firstname","address","balance","gender","city","employer","state","email","lastname"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml new file mode 100644 index 00000000000..1c583d8f79d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[1 ASC FIRST, 3 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml new file mode 100644 index 00000000000..c205d98dbad --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[1 ASC FIRST, 3 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml index 12bd2aefaa0..d86d374ec2e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml @@ -21,4 +21,4 @@ calcite: "missing" : "_last" } }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml index e795d514faf..715ba0c9882 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml @@ -16,5 +16,5 @@ calcite: EnumerableCalc(expr#0..13=[{inputs}], proj#0..12=[{exprs}]) EnumerableLimit(fetch=[10000]) EnumerableHashJoin(condition=[=($0, $13)], joinType=[inner]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index c9a646320eb..9a62517020b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -5,10 +5,11 @@ package org.opensearch.sql.opensearch.planner.rules; -import com.google.common.collect.Streams; import java.util.ArrayList; import java.util.List; +import java.util.Set; import java.util.function.Predicate; +import java.util.stream.IntStream; import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.logical.LogicalAggregate; @@ -18,12 +19,11 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindow; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.tools.RelBuilder; -import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.immutables.value.Value; @@ -81,46 +81,15 @@ protected void apply( // TODO https://github.com/opensearch-project/sql/issues/4564 return; } - List dedupColumnNames = + List dedupColumnIndicesInWindowProject = dedupColumns.stream() .filter(rex -> rex.isA(SqlKind.INPUT_REF)) .map(r -> ((RexInputRef) r).getIndex()) - .map(i -> projectWithWindow.getInput().getRowType().getFieldNames().get(i)) .toList(); - if (projectWithExpr != null - && projectWithWindow.getProjects().stream() - .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) - .filter(Predicate.not(dedupColumns::contains)) - .anyMatch( - rex -> - !projectWithExpr - .getProjects() - .get(((RexInputRef) rex).getIndex()) - .isA(SqlKind.INPUT_REF))) { - // TODO fallback to the approach of Collapse search - // | eval new_age = age + 1 | fields gender, new_age | dedup 1 gender - if (LOG.isDebugEnabled()) { - LOG.debug( - "Cannot pushdown the dedup since the final outputs contain a column which is not" - + " included in table schema"); - } - return; - } - - List rexCallsExceptWindow = - projectWithWindow.getProjects().stream() - .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) - .filter(rex -> rex instanceof RexCall) + List dedupColumnNames = + dedupColumnIndicesInWindowProject.stream() + .map(i -> projectWithWindow.getInput().getRowType().getFieldNames().get(i)) .toList(); - if (!rexCallsExceptWindow.isEmpty() - && dedupColumnsContainRexCall(rexCallsExceptWindow, dedupColumns)) { - // TODO https://github.com/opensearch-project/sql/issues/4789 - // | eval new_gender = lower(gender) | fields new_gender, age | dedup 1 new_gender - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup since the dedup columns contain RexCall"); - } - return; - } // must be row_number <= number assert numOfDedupFilter.getCondition().isA(SqlKind.LESS_THAN_OR_EQUAL); @@ -132,108 +101,114 @@ && dedupColumnsContainRexCall(rexCallsExceptWindow, dedupColumns)) { // Aggregate(literalAgg(dedupNumer), groups) // +- Project(groups, remaining) // +- Scan or Project with expression - // Step 1: Initial a RelBuilder to build aggregate by pushing child Project or Scan RelBuilder relBuilder = call.builder(); + // 1 Initial a RelBuilder by pushing Scan and Project if (projectWithExpr == null) { + // 1.1 if projectWithExpr not existed, push scan relBuilder.push(scan); - // To baseline the rowType, merge the fields() and projectWithWindow - List mergedRexList = new ArrayList<>(); - List mergedFieldNames = new ArrayList<>(); - List builderFields = relBuilder.fields(); - List projectFields = projectWithWindow.getProjects(); - List builderFieldNames = relBuilder.peek().getRowType().getFieldNames(); - List projectFieldNames = projectWithWindow.getRowType().getFieldNames(); + // 1.2 To baseline the rowType, merge the fields() and projectWithWindow + List columnsMerged = new ArrayList<>(); + List colNamesMerged = new ArrayList<>(); + List columnsFromScan = relBuilder.fields(); + List columnsFromWindowProj = projectWithWindow.getProjects(); + List colNamesFromScan = relBuilder.peek().getRowType().getFieldNames(); + List colNamesFromWindowProj = projectWithWindow.getRowType().getFieldNames(); - // Add existing fields with proper names + // 1.3 Add existing columns with proper names // For rename case: source = t | rename old as new | dedup new - for (RexNode field : builderFields) { - mergedRexList.add(field); - int projectIndex = projectFields.indexOf(field); + // Add the column `old` with name "new" + for (RexNode col : columnsFromScan) { + columnsMerged.add(col); + int projectIndex = columnsFromWindowProj.indexOf(col); if (projectIndex >= 0) { - mergedFieldNames.add(projectFieldNames.get(projectIndex)); + colNamesMerged.add(colNamesFromWindowProj.get(projectIndex)); } else { - mergedFieldNames.add(builderFieldNames.get(builderFields.indexOf(field))); + colNamesMerged.add(colNamesFromScan.get(columnsFromScan.indexOf(col))); } } - // Append new fields from project (excluding ROW_NUMBER and duplicates) - for (RexNode field : projectFields) { - if (!field.isA(SqlKind.ROW_NUMBER) && !builderFields.contains(field)) { - mergedRexList.add(field); - mergedFieldNames.add(field.toString()); + // 1.4 Append new columns from project (excluding ROW_NUMBER and duplicates) + for (RexNode col : columnsFromWindowProj) { + if (!col.isA(SqlKind.ROW_NUMBER) && !columnsFromScan.contains(col)) { + columnsMerged.add(col); + colNamesMerged.add(col.toString()); } } - // Force add the project - relBuilder.project(mergedRexList, mergedFieldNames, true); + // 1.5 if reorder required, build the reordered project + List> merged = + IntStream.range(0, columnsMerged.size()) + .mapToObj(i -> new Pair<>(columnsMerged.get(i), colNamesMerged.get(i))) + .toList(); + List> reordered = + moveForwardDedupColumns(merged, dedupColumnIndicesInWindowProject, dedupColumnNames); + // 1.6 force add this reordered project + relBuilder.project( + reordered.stream().map(Pair::getKey).toList(), + reordered.stream().map(Pair::getValue).toList(), + true); } else { - relBuilder.push(projectWithExpr); + // 1.7 if projectWithExpr existed, push a reordered projectWithExpr + List> reordered = + moveForwardDedupColumns( + projectWithExpr.getNamedProjects(), + dedupColumnIndicesInWindowProject, + dedupColumnNames); + LogicalProject reorderedProject = + LogicalProject.create( + projectWithExpr.getInput(), + List.of(), + reordered.stream().map(Pair::getKey).toList(), + reordered.stream().map(Pair::getValue).toList(), + Set.of()); + relBuilder.push(reorderedProject); } - LogicalProject projectBeforeReorder = (LogicalProject) relBuilder.peek(); - // Step 2: Push a Project which groups is first, then remaining finalOutput columns - List dedupColumnsAfterReorder = dedupColumns; - if (reorderRequired(dedupColumns)) { - Mapping mappingForDedupColumns = - PlanUtils.mapping(dedupColumns, relBuilder.peek().getRowType()); - List reordered = - new ArrayList<>(withAliases(relBuilder, dedupColumns, dedupColumnNames)); - List dedupColumnIndices = - dedupColumns.stream().map(r -> ((RexInputRef) r).getIndex()).toList(); - for (int i = 0; i < projectBeforeReorder.getProjects().size(); i++) { - if (!dedupColumnIndices.contains(i)) { - reordered.add(projectBeforeReorder.getProjects().get(i)); - } - } - relBuilder.project(reordered); - dedupColumnsAfterReorder = RexUtil.apply(mappingForDedupColumns, dedupColumns); - } - LogicalProject projectAfterReorder = (LogicalProject) relBuilder.peek(); + LogicalProject targetChildProject = (LogicalProject) relBuilder.peek(); - // Step 3: Push an Aggregate - // The group keys should contain aliases. - RelBuilder.GroupKey groupKeys = - relBuilder.groupKey(withAliases(relBuilder, dedupColumnsAfterReorder, dedupColumnNames)); + // 2 Push an Aggregate // We push down a LITERAL_AGG with dedupNumer for converting the dedup command to aggregate: // (1) Pass the dedupNumer to AggregateAnalyzer.processAggregateCalls() // (2) Distinguish it from an optimization operator and user defined aggregator. // (LITERAL_AGG is used in optimization normally, see {@link SqlKind#LITERAL_AGG}) - relBuilder.aggregate(groupKeys, relBuilder.literalAgg(dedupNumer)); + relBuilder.aggregate( + relBuilder.groupKey(relBuilder.fields(dedupColumnNames)), + relBuilder.literalAgg(dedupNumer)); // add bucket_nullable = false hint PlanUtils.addIgnoreNullBucketHintToAggregate(relBuilder); // peek the aggregate after hint being added LogicalAggregate aggregate = (LogicalAggregate) relBuilder.build(); CalciteLogicalIndexScan newScan = - (CalciteLogicalIndexScan) scan.pushDownAggregate(aggregate, projectAfterReorder); + (CalciteLogicalIndexScan) scan.pushDownAggregate(aggregate, targetChildProject); if (newScan != null) { // Back to original project order call.transformTo(newScan.copyWithNewSchema(finalProject.getRowType())); } } - // Return true if the indices of dedup columns are not start with 0, 1, 2... - private boolean reorderRequired(List dedupColumns) { - for (int i = 0; i < dedupColumns.size(); i++) { - if (((RexInputRef) dedupColumns.get(i)).getIndex() != i) { - return true; - } - } - return false; - } - - private List withAliases( - RelBuilder relBuilder, List rexNodes, List aliases) { - return Streams.zip( - rexNodes.stream(), aliases.stream(), (key, alias) -> relBuilder.alias(key, alias)) - .toList(); - } - - private static boolean dedupColumnsContainRexCall( - List calls, List dedupColumns) { - List dedupColumnIndicesFromCall = - PlanUtils.getSelectColumns(calls).stream().distinct().toList(); - List dedupColumnsIndicesFromPartitionKeys = - PlanUtils.getSelectColumns(dedupColumns).stream().distinct().toList(); - return dedupColumnsIndicesFromPartitionKeys.stream() - .anyMatch(dedupColumnIndicesFromCall::contains); + /** Move the dedup columns to the front of the original column list. */ + private static List> moveForwardDedupColumns( + List> originalColumnList, + List dedupColumnIndices, + List dedupColumnNames) { + List> reordered = + IntStream.range(0, originalColumnList.size()) + .boxed() + .sorted( + (i1, i2) -> { + boolean in1 = dedupColumnIndices.contains(i1); + boolean in2 = dedupColumnIndices.contains(i2); + return in1 == in2 ? i1 - i2 : in2 ? 1 : -1; + }) + .map( + i -> { + Pair original = originalColumnList.get(i); + if (dedupColumnIndices.contains(i)) { + int dedupIndex = dedupColumnIndices.indexOf(i); + return Pair.of(original.getKey(), dedupColumnNames.get(dedupIndex)); + } + return original; + }) + .toList(); + return reordered; } @Value.Immutable @@ -272,15 +247,8 @@ public interface Config extends OpenSearchRuleConfig { CalciteLogicalIndexScan .class) .predicate( - Predicate.not( - AbstractCalciteIndexScan - ::isLimitPushed) - .and( - AbstractCalciteIndexScan - ::noAggregatePushed) - .and( - AbstractCalciteIndexScan - ::isProjectPushed)) + Config + ::tableScanChecker) .noInputs()))))); // +- LogicalProject(no _row_number_dedup_) // +- LogicalFilter(condition contains _row_number_dedup_) @@ -322,18 +290,21 @@ public interface Config extends OpenSearchRuleConfig { CalciteLogicalIndexScan .class) .predicate( - Predicate - .not( - AbstractCalciteIndexScan - ::isLimitPushed) - .and( - AbstractCalciteIndexScan - ::noAggregatePushed) - .and( - AbstractCalciteIndexScan - ::isProjectPushed)) + Config + ::tableScanChecker) .noInputs())))))); + /** + * Project must be not pushed since the name of expression would lose after project pushed. E.g. + * in query "eval new_a = a + 1 | dedup b", the "new_a" will lose. + */ + private static boolean tableScanChecker(AbstractCalciteIndexScan scan) { + return Predicate.not(AbstractCalciteIndexScan::isLimitPushed) + .and(AbstractCalciteIndexScan::noAggregatePushed) + .and(Predicate.not(AbstractCalciteIndexScan::isProjectPushed)) + .test(scan); + } + @Override default DedupPushdownRule toRule() { return new DedupPushdownRule(this); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 620c9b97cf7..d23a5ca144b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -337,7 +337,8 @@ private static Pair> processAggregateCalls( for (int i = 0; i < aggCalls.size(); i++) { AggregateCall aggCall = aggCalls.get(i); - List args = convertAggArgThroughProject(aggCall, project); + List> args = + convertAggArgThroughProject(aggCall, project); String aggFieldName = aggFieldNames.get(i); Pair builderAndParser = @@ -356,19 +357,22 @@ private static Pair> processAggregateCalls( * * @param aggCall the aggregate call * @param project the project - * @return the converted RexNode list + * @return the converted Pair list */ - private static List convertAggArgThroughProject(AggregateCall aggCall, Project project) { + private static List> convertAggArgThroughProject( + AggregateCall aggCall, Project project) { return project == null ? List.of() : PlanUtils.getObjectFromLiteralAgg(aggCall) != null - ? project.getProjects().stream().filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)).toList() - : aggCall.getArgList().stream().map(project.getProjects()::get).toList(); + ? project.getNamedProjects().stream() + .filter(rex -> !rex.getKey().isA(SqlKind.ROW_NUMBER)) + .toList() + : aggCall.getArgList().stream().map(project.getNamedProjects()::get).toList(); } private static Pair createAggregationBuilderAndParser( AggregateCall aggCall, - List args, + List> args, String aggFieldName, AggregateAnalyzer.AggregateBuilderHelper helper) { if (aggCall.isDistinct()) { @@ -380,7 +384,7 @@ private static Pair createAggregationBuilderAn private static Pair createDistinctAggregation( AggregateCall aggCall, - List args, + List> args, String aggFieldName, AggregateBuilderHelper helper) { @@ -388,7 +392,7 @@ private static Pair createDistinctAggregation( case COUNT -> Pair.of( helper.build( - !args.isEmpty() ? args.getFirst() : null, + !args.isEmpty() ? args.getFirst().getKey() : null, AggregationBuilders.cardinality(aggFieldName)), new SingleValueParser(aggFieldName)); default -> @@ -399,99 +403,107 @@ private static Pair createDistinctAggregation( private static Pair createRegularAggregation( AggregateCall aggCall, - List args, + List> args, String aggFieldName, AggregateBuilderHelper helper) { return switch (aggCall.getAggregation().kind) { case AVG -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.avg(aggFieldName)), + helper.build(args.getFirst().getKey(), AggregationBuilders.avg(aggFieldName)), new SingleValueParser(aggFieldName)); // 1. Only case SUM, skip SUM0 / COUNT since calling avg() in DSL should be faster. // 2. To align with databases, SUM0 is not preferred now. case SUM -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.sum(aggFieldName)), + helper.build(args.getFirst().getKey(), AggregationBuilders.sum(aggFieldName)), new SingleValueParser(aggFieldName)); case COUNT -> Pair.of( helper.build( - !args.isEmpty() ? args.getFirst() : null, + !args.isEmpty() ? args.getFirst().getKey() : null, AggregationBuilders.count(aggFieldName)), new SingleValueParser(aggFieldName)); case MIN -> { ExprType fieldType = - OpenSearchTypeFactory.convertRelDataTypeToExprType(args.getFirst().getType()); + OpenSearchTypeFactory.convertRelDataTypeToExprType(args.getFirst().getKey().getType()); if (supportsMaxMinAggregation(fieldType)) { yield Pair.of( - helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), + helper.build(args.getFirst().getKey(), AggregationBuilders.min(aggFieldName)), new SingleValueParser(aggFieldName)); } else { yield Pair.of( AggregationBuilders.topHits(aggFieldName) - .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) + .fetchField( + helper.inferNamedField(args.getFirst().getKey()).getReferenceForTermQuery()) .size(1) .from(0) .sort( - helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), + helper.inferNamedField(args.getFirst().getKey()).getReferenceForTermQuery(), SortOrder.ASC), new TopHitsParser(aggFieldName, true, false)); } } case MAX -> { ExprType fieldType = - OpenSearchTypeFactory.convertRelDataTypeToExprType(args.getFirst().getType()); + OpenSearchTypeFactory.convertRelDataTypeToExprType(args.getFirst().getKey().getType()); if (supportsMaxMinAggregation(fieldType)) { yield Pair.of( - helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), + helper.build(args.getFirst().getKey(), AggregationBuilders.max(aggFieldName)), new SingleValueParser(aggFieldName)); } else { yield Pair.of( AggregationBuilders.topHits(aggFieldName) - .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) + .fetchField( + helper.inferNamedField(args.getFirst().getKey()).getReferenceForTermQuery()) .size(1) .from(0) .sort( - helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), + helper.inferNamedField(args.getFirst().getKey()).getReferenceForTermQuery(), SortOrder.DESC), new TopHitsParser(aggFieldName, true, false)); } } case VAR_SAMP -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), + helper.build( + args.getFirst().getKey(), AggregationBuilders.extendedStats(aggFieldName)), new StatsParser(ExtendedStats::getVarianceSampling, aggFieldName)); case VAR_POP -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), + helper.build( + args.getFirst().getKey(), AggregationBuilders.extendedStats(aggFieldName)), new StatsParser(ExtendedStats::getVariancePopulation, aggFieldName)); case STDDEV_SAMP -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), + helper.build( + args.getFirst().getKey(), AggregationBuilders.extendedStats(aggFieldName)), new StatsParser(ExtendedStats::getStdDeviationSampling, aggFieldName)); case STDDEV_POP -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), + helper.build( + args.getFirst().getKey(), AggregationBuilders.extendedStats(aggFieldName)), new StatsParser(ExtendedStats::getStdDeviationPopulation, aggFieldName)); case ARG_MAX -> Pair.of( AggregationBuilders.topHits(aggFieldName) - .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) + .fetchField( + helper.inferNamedField(args.getFirst().getKey()).getReferenceForTermQuery()) .size(1) .from(0) .sort( - helper.inferNamedField(args.get(1)).getReferenceForTermQuery(), + helper.inferNamedField(args.get(1).getKey()).getReferenceForTermQuery(), org.opensearch.search.sort.SortOrder.DESC), new ArgMaxMinParser(aggFieldName)); case ARG_MIN -> Pair.of( AggregationBuilders.topHits(aggFieldName) - .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) + .fetchField( + helper.inferNamedField(args.getFirst().getKey()).getReferenceForTermQuery()) .size(1) .from(0) .sort( - helper.inferNamedField(args.get(1)).getReferenceForTermQuery(), + helper.inferNamedField(args.get(1).getKey()).getReferenceForTermQuery(), org.opensearch.search.sort.SortOrder.ASC), new ArgMaxMinParser(aggFieldName)); case OTHER_FUNCTION -> { @@ -502,8 +514,10 @@ yield switch (functionName) { Pair.of( AggregationBuilders.topHits(aggFieldName) .fetchField( - helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) - .size(helper.inferValue(args.getLast(), Integer.class)) + helper + .inferNamedField(args.getFirst().getKey()) + .getReferenceForTermQuery()) + .size(helper.inferValue(args.getLast().getKey(), Integer.class)) .from(0), new TopHitsParser(aggFieldName, false, true)); case FIRST -> { @@ -511,7 +525,7 @@ yield switch (functionName) { AggregationBuilders.topHits(aggFieldName).size(1).from(0); if (!args.isEmpty()) { firstBuilder.fetchField( - helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()); + helper.inferNamedField(args.getFirst().getKey()).getReferenceForTermQuery()); } yield Pair.of(firstBuilder, new TopHitsParser(aggFieldName, true, false)); } @@ -523,25 +537,25 @@ yield switch (functionName) { .sort("_doc", org.opensearch.search.sort.SortOrder.DESC); if (!args.isEmpty()) { lastBuilder.fetchField( - helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()); + helper.inferNamedField(args.getFirst().getKey()).getReferenceForTermQuery()); } yield Pair.of(lastBuilder, new TopHitsParser(aggFieldName, true, false)); } case PERCENTILE_APPROX -> { PercentilesAggregationBuilder aggBuilder = helper - .build(args.getFirst(), AggregationBuilders.percentiles(aggFieldName)) - .percentiles(helper.inferValue(args.get(1), Double.class)); + .build(args.getFirst().getKey(), AggregationBuilders.percentiles(aggFieldName)) + .percentiles(helper.inferValue(args.get(1).getKey(), Double.class)); /* See {@link PercentileApproxFunction}, PERCENTILE_APPROX accepts args of [FIELD, PERCENTILE, TYPE, COMPRESSION(optional)] */ if (args.size() > 3) { - aggBuilder.compression(helper.inferValue(args.getLast(), Double.class)); + aggBuilder.compression(helper.inferValue(args.getLast().getKey(), Double.class)); } yield Pair.of(aggBuilder, new SinglePercentileParser(aggFieldName)); } case DISTINCT_COUNT_APPROX -> Pair.of( helper.build( - !args.isEmpty() ? args.getFirst() : null, + !args.isEmpty() ? args.getFirst().getKey() : null, AggregationBuilders.cardinality(aggFieldName)), new SingleValueParser(aggFieldName)); default -> @@ -563,17 +577,17 @@ yield switch (functionName) { List scripts = new ArrayList<>(); args.forEach( rex -> { - if (rex instanceof RexInputRef) { - sources.add(helper.inferNamedField(rex).getReference()); - } else if (rex instanceof RexCall || rex instanceof RexLiteral) { + if (rex.getKey() instanceof RexInputRef) { + sources.add(helper.inferNamedField(rex.getKey()).getReference()); + } else if (rex.getKey() instanceof RexCall || rex.getKey() instanceof RexLiteral) { scripts.add( new SearchSourceBuilder.ScriptField( - rex.toString(), helper.inferScript(rex).getScript(), false)); + rex.getValue(), helper.inferScript(rex.getKey()).getScript(), false)); } else { throw new AggregateAnalyzer.AggregateAnalyzerException( String.format( "Unsupported push-down aggregator %s due to rex type is %s", - aggCall.getAggregation(), rex.getKind())); + aggCall.getAggregation(), rex.getKey().getKind())); } }); topHitsAggregationBuilder.fetchSource( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 86a8f1748c8..91e0b9be315 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -67,7 +67,6 @@ import org.opensearch.sql.opensearch.storage.scan.context.PushDownOperation; import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; import org.opensearch.sql.opensearch.storage.scan.context.RareTopDigest; -import org.opensearch.sql.opensearch.storage.scan.context.SortDigest; import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; /** An abstract relational operator representing a scan of an OpenSearchIndex type. */ @@ -380,7 +379,7 @@ && isAnyCollationNameInAggregators(collationNames)) { builders.add(sortBuilder.order(order)); } action = (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownSort(builders); - digest = new SortDigest(builders); + digest = builders.toString(); newScan.pushDownContext.add(PushDownType.SORT, digest, action); return newScan; } @@ -498,14 +497,6 @@ public boolean isProjectPushed() { return this.getPushDownContext().isProjectPushed(); } - public boolean isSortPushed() { - return this.getPushDownContext().isSortPushed(); - } - - public boolean isSortExprPushed() { - return this.getPushDownContext().isSortExprPushed(); - } - /** * Determine the appropriate ScriptSortType based on the expression's return type. * diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 27d39331705..4f84746ade4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -8,7 +8,6 @@ import java.util.AbstractCollection; import java.util.ArrayDeque; import java.util.Iterator; -import java.util.List; import lombok.Getter; import org.jetbrains.annotations.NotNull; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; @@ -154,19 +153,6 @@ public Object getDigestByType(PushDownType type) { .orElse(null); } - /** - * Get all digest of a specific type. - * - * @param type The PushDownType to get the digest for - * @return The digest list - */ - public List getAllDigestByType(PushDownType type) { - return this.stream() - .filter(operation -> operation.type() == type) - .map(PushDownOperation::digest) - .toList(); - } - public OpenSearchRequestBuilder createRequestBuilder() { OpenSearchRequestBuilder newRequestBuilder = osIndex.createRequestBuilder(); if (operationsForRequestBuilder != null) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortDigest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortDigest.java deleted file mode 100644 index dc8f3ca6d9f..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortDigest.java +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.storage.scan.context; - -import java.util.List; -import org.opensearch.search.sort.SortBuilder; - -public record SortDigest(List> builders) { - @Override - public String toString() { - return builders.toString(); - } -} From 1bbac1408d178bc5543e563d06dba7b736bd38a0 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 16 Dec 2025 21:23:31 +0800 Subject: [PATCH 4/9] fix yaml test Signed-off-by: Lantao Jin --- .../sql/opensearch/planner/rules/DedupPushdownRule.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index 9a62517020b..2e763c5fdb5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -88,7 +88,14 @@ protected void apply( .toList(); List dedupColumnNames = dedupColumnIndicesInWindowProject.stream() - .map(i -> projectWithWindow.getInput().getRowType().getFieldNames().get(i)) + .map( + i -> + projectWithWindow.getNamedProjects().stream() + .filter(pair -> pair.getKey().isA(SqlKind.INPUT_REF)) + .filter(pair -> ((RexInputRef) pair.getKey()).getIndex() == i) + .map(Pair::getValue) + .findFirst() + .get()) .toList(); // must be row_number <= number From 8942e105e9ea13c6da325d5e3e3f450468a98ad6 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 16 Dec 2025 21:48:19 +0800 Subject: [PATCH 5/9] simplify Signed-off-by: Lantao Jin --- .../planner/rules/DedupPushdownRule.java | 74 +++++++------------ 1 file changed, 26 insertions(+), 48 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index 2e763c5fdb5..8b0c50b1ef9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -5,7 +5,6 @@ package org.opensearch.sql.opensearch.planner.rules; -import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.function.Predicate; @@ -27,6 +26,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.immutables.value.Value; +import org.opensearch.ml.repackage.com.google.common.collect.Streams; import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; @@ -81,13 +81,9 @@ protected void apply( // TODO https://github.com/opensearch-project/sql/issues/4564 return; } - List dedupColumnIndicesInWindowProject = - dedupColumns.stream() - .filter(rex -> rex.isA(SqlKind.INPUT_REF)) - .map(r -> ((RexInputRef) r).getIndex()) - .toList(); + List dedupColumnIndices = getInputRefIndices(dedupColumns); List dedupColumnNames = - dedupColumnIndicesInWindowProject.stream() + dedupColumnIndices.stream() .map( i -> projectWithWindow.getNamedProjects().stream() @@ -107,58 +103,26 @@ protected void apply( // We convert the dedup pushdown to composite aggregate + top_hits: // Aggregate(literalAgg(dedupNumer), groups) // +- Project(groups, remaining) - // +- Scan or Project with expression RelBuilder relBuilder = call.builder(); // 1 Initial a RelBuilder by pushing Scan and Project if (projectWithExpr == null) { - // 1.1 if projectWithExpr not existed, push scan + // 1.1 if projectWithExpr not existed, push a scan then create a new project relBuilder.push(scan); - // 1.2 To baseline the rowType, merge the fields() and projectWithWindow - List columnsMerged = new ArrayList<>(); - List colNamesMerged = new ArrayList<>(); List columnsFromScan = relBuilder.fields(); - List columnsFromWindowProj = projectWithWindow.getProjects(); List colNamesFromScan = relBuilder.peek().getRowType().getFieldNames(); - List colNamesFromWindowProj = projectWithWindow.getRowType().getFieldNames(); - - // 1.3 Add existing columns with proper names - // For rename case: source = t | rename old as new | dedup new - // Add the column `old` with name "new" - for (RexNode col : columnsFromScan) { - columnsMerged.add(col); - int projectIndex = columnsFromWindowProj.indexOf(col); - if (projectIndex >= 0) { - colNamesMerged.add(colNamesFromWindowProj.get(projectIndex)); - } else { - colNamesMerged.add(colNamesFromScan.get(columnsFromScan.indexOf(col))); - } - } - // 1.4 Append new columns from project (excluding ROW_NUMBER and duplicates) - for (RexNode col : columnsFromWindowProj) { - if (!col.isA(SqlKind.ROW_NUMBER) && !columnsFromScan.contains(col)) { - columnsMerged.add(col); - colNamesMerged.add(col.toString()); - } - } - // 1.5 if reorder required, build the reordered project - List> merged = - IntStream.range(0, columnsMerged.size()) - .mapToObj(i -> new Pair<>(columnsMerged.get(i), colNamesMerged.get(i))) - .toList(); + List> namedPairFromScan = + Streams.zip(columnsFromScan.stream(), colNamesFromScan.stream(), Pair::new).toList(); List> reordered = - moveForwardDedupColumns(merged, dedupColumnIndicesInWindowProject, dedupColumnNames); - // 1.6 force add this reordered project + advanceDedupColumns(namedPairFromScan, dedupColumnIndices, dedupColumnNames); relBuilder.project( reordered.stream().map(Pair::getKey).toList(), reordered.stream().map(Pair::getValue).toList(), true); } else { - // 1.7 if projectWithExpr existed, push a reordered projectWithExpr + // 1.2 if projectWithExpr existed, push a reordered projectWithExpr List> reordered = - moveForwardDedupColumns( - projectWithExpr.getNamedProjects(), - dedupColumnIndicesInWindowProject, - dedupColumnNames); + advanceDedupColumns( + projectWithExpr.getNamedProjects(), dedupColumnIndices, dedupColumnNames); LogicalProject reorderedProject = LogicalProject.create( projectWithExpr.getInput(), @@ -191,8 +155,22 @@ protected void apply( } } - /** Move the dedup columns to the front of the original column list. */ - private static List> moveForwardDedupColumns( + private static List getInputRefIndices(List columns) { + return columns.stream() + .filter(rex -> rex.isA(SqlKind.INPUT_REF)) + .map(r -> ((RexInputRef) r).getIndex()) + .toList(); + } + + /** + * Move the dedup columns to the front of the original column list. + * + * @param originalColumnList The original column pair list + * @param dedupColumnIndices The indices of dedup columns + * @param dedupColumnNames The names of dedup columns + * @return The reordered column pair list + */ + private static List> advanceDedupColumns( List> originalColumnList, List dedupColumnIndices, List dedupColumnNames) { From abd6f5a56c559fb9276f6b334371fdea2c5e56e0 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 16 Dec 2025 22:46:04 +0800 Subject: [PATCH 6/9] address comments Signed-off-by: Lantao Jin --- .../org/opensearch/sql/util/MatcherUtils.java | 11 ++++++++++- .../planner/rules/DedupPushdownRule.java | 6 ++++-- .../CalciteAggregationScriptLeafFactory.java | 2 +- .../storage/script/field/CalciteFieldScript.java | 16 +++++++++++++++- .../field/CalciteFieldScriptLeafFactory.java | 2 +- .../filter/CalciteFilterScriptLeafFactory.java | 9 ++++++++- 6 files changed, 39 insertions(+), 7 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index 9419b76678d..a70e27c1f54 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -447,7 +447,16 @@ public static void assertYamlEqualsIgnoreId(String expectedYaml, String actualYa assertYamlEquals(cleanUpYaml(expectedYaml), cleanUpYaml(actualYaml)); } - /** Due to the implementation of DSL, may both expected YAML files are correct. */ + /** + * Compare actual YAML with two expected YAML strings, using the second as a fallback. This is + * useful when the DSL implementation can produce multiple valid plan variants. If the first + * comparison fails, attempts the second comparison instead. + * + * @param expectedYaml1 the primary expected YAML string + * @param expectedYaml2 the fallback expected YAML string + * @param actualYaml the actual YAML string to compare + * @throws AssertionError if both comparisons fail (reports only the second failure) + */ public static void assertYamlEqualsIgnoreId( String expectedYaml1, String expectedYaml2, String actualYaml) { try { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index 8b0c50b1ef9..5d5fb2932c1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -5,6 +5,7 @@ package org.opensearch.sql.opensearch.planner.rules; +import com.google.common.collect.Streams; import java.util.List; import java.util.Set; import java.util.function.Predicate; @@ -26,7 +27,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.immutables.value.Value; -import org.opensearch.ml.repackage.com.google.common.collect.Streams; import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; @@ -93,7 +93,9 @@ protected void apply( .findFirst() .get()) .toList(); - + if (dedupColumnIndices.size() != dedupColumnNames.size()) { + return; + } // must be row_number <= number assert numOfDedupFilter.getCondition().isA(SqlKind.LESS_THAN_OR_EQUAL); RexLiteral literal = diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScriptLeafFactory.java index b0b0914e072..87227ca789d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScriptLeafFactory.java @@ -32,7 +32,7 @@ class CalciteAggregationScriptLeafFactory implements AggregationScript.LeafFacto /** * Stores the parameter names to the actual indices in SOURCES. Generate it in advance in factory - * to save the process for each document* + * to save the process for each document. */ private final Map parametersToIndex; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScript.java index b709089e84f..6a859a4e0a2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScript.java @@ -26,6 +26,15 @@ public class CalciteFieldScript extends FieldScript { private final Map parametersToIndex; + /** + * Creates a new CalciteFieldScript. + * + * @param function the Calcite function to execute + * @param lookup the search lookup for document access + * @param context the leaf reader context + * @param params the script parameters + * @param parametersToIndex mapping from parameter names to indices + */ public CalciteFieldScript( Function1 function, SearchLookup lookup, @@ -40,6 +49,11 @@ public CalciteFieldScript( @Override public Object execute() { - return calciteScript.execute(this.getDoc(), this.sourceLookup, this.parametersToIndex)[0]; + Object[] results = + calciteScript.execute(this.getDoc(), this.sourceLookup, this.parametersToIndex); + if (results == null || results.length == 0) { + return null; + } + return results[0]; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java index 727640e4ccf..94ec5e62e28 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java @@ -29,7 +29,7 @@ public class CalciteFieldScriptLeafFactory implements FieldScript.LeafFactory { /** * Stores the parameter names to the actual indices in SOURCES. Generate it in advance in factory - * to save the process for each document* + * to save the process for each document. */ private final Map parametersToIndex; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java index 0158e593902..1d4a66b5875 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java @@ -30,10 +30,17 @@ class CalciteFilterScriptLeafFactory implements FilterScript.LeafFactory { /** * Stores the parameter names to the actual indices in SOURCES. Generate it in advance in factory - * to save the process for each document* + * to save the process for each document. */ private final Map parametersToIndex; + /** + * Creates a new CalciteFieldScriptLeafFactory. + * + * @param function the Calcite function to execute + * @param params the script parameters including SOURCES + * @param lookup the search lookup for document access + */ public CalciteFilterScriptLeafFactory( Function1 function, Map params, SearchLookup lookup) { this.function = function; From 7f91e5e1e6457373e43e1bb65584ce2132a5520a Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Wed, 17 Dec 2025 14:03:39 +0800 Subject: [PATCH 7/9] fix javadoc Signed-off-by: Lantao Jin --- .../script/field/CalciteFieldScriptLeafFactory.java | 7 +++++++ .../script/filter/CalciteFilterScriptLeafFactory.java | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java index 94ec5e62e28..61e13a61916 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/field/CalciteFieldScriptLeafFactory.java @@ -33,6 +33,13 @@ public class CalciteFieldScriptLeafFactory implements FieldScript.LeafFactory { */ private final Map parametersToIndex; + /** + * Creates a new CalciteFieldScriptLeafFactory. + * + * @param function the Calcite function to execute + * @param params the script parameters including SOURCES + * @param lookup the search lookup for document access + */ public CalciteFieldScriptLeafFactory( Function1 function, Map params, SearchLookup lookup) { this.function = function; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java index 1d4a66b5875..1628db599b9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java @@ -35,7 +35,7 @@ class CalciteFilterScriptLeafFactory implements FilterScript.LeafFactory { private final Map parametersToIndex; /** - * Creates a new CalciteFieldScriptLeafFactory. + * Creates a new CalciteFilterScriptLeafFactory. * * @param function the Calcite function to execute * @param params the script parameters including SOURCES From 3d2786dafa2958324a87754bce36e65dc5944bf8 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Fri, 19 Dec 2025 10:29:55 +0800 Subject: [PATCH 8/9] address comments Signed-off-by: Lantao Jin --- .../java/org/opensearch/sql/utils/Utils.java | 23 +++++ .../calcite/explain_nested_agg_push.yaml | 9 ++ .../physical/EnumerableNestedAggregate.java | 85 +++++++++++++++++++ .../rules/EnumerableNestedAggregateRule.java | 67 +++++++++++++++ .../opensearch/request/AggregateAnalyzer.java | 17 ++-- .../response/agg/TopHitsParser.java | 4 +- 6 files changed, 196 insertions(+), 9 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/utils/Utils.java create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_push.yaml create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/EnumerableNestedAggregate.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableNestedAggregateRule.java diff --git a/core/src/main/java/org/opensearch/sql/utils/Utils.java b/core/src/main/java/org/opensearch/sql/utils/Utils.java new file mode 100644 index 00000000000..ddbe7afe03d --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/Utils.java @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import org.apache.commons.lang3.tuple.Pair; + +public interface Utils { + static List> zipWithIndex(List input) { + LinkedList> result = new LinkedList<>(); + Iterator iter = input.iterator(); + int index = 0; + while (iter.hasNext()) { + result.add(Pair.of(iter.next(), index++)); + } + return result; + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_push.yaml new file mode 100644 index 00000000000..2808b4e0135 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count_area=[$1], min_area=[$2], max_area=[$3], avg_area=[$4], avg_age=[$5], name=[$0]) + LogicalAggregate(group=[{0}], count_area=[COUNT($1)], min_area=[MIN($1)], max_area=[MAX($1)], avg_area=[AVG($1)], avg_age=[AVG($2)]) + LogicalProject(name=[$1], address.area=[$3], age=[$9]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count_area=COUNT($1),min_area=MIN($1),max_area=MAX($1),avg_area=AVG($1),avg_age=AVG($2)), PROJECT->[count_area, min_area, max_area, avg_area, avg_age, name], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"name":{"terms":{"field":"name.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"nested_count_area":{"nested":{"path":"address"},"aggregations":{"count_area":{"value_count":{"field":"address.area"}}}},"nested_min_area":{"nested":{"path":"address"},"aggregations":{"min_area":{"min":{"field":"address.area"}}}},"nested_max_area":{"nested":{"path":"address"},"aggregations":{"max_area":{"max":{"field":"address.area"}}}},"nested_avg_area":{"nested":{"path":"address"},"aggregations":{"avg_area":{"avg":{"field":"address.area"}}}},"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/EnumerableNestedAggregate.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/EnumerableNestedAggregate.java new file mode 100644 index 00000000000..89965d25d7a --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/EnumerableNestedAggregate.java @@ -0,0 +1,85 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.planner.physical; + +import java.util.List; +import org.apache.calcite.adapter.enumerable.AggImplementor; +import org.apache.calcite.adapter.enumerable.EnumerableAggregateBase; +import org.apache.calcite.adapter.enumerable.EnumerableConvention; +import org.apache.calcite.adapter.enumerable.EnumerableRel; +import org.apache.calcite.adapter.enumerable.EnumerableRelImplementor; +import org.apache.calcite.adapter.enumerable.RexImpTable; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.hint.RelHint; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.ImmutableBitSet; +import org.checkerframework.checker.nullness.qual.Nullable; + +/** + * The enumerable aggregate physical implementation for OpenSearch nested aggregation. + * https://docs.opensearch.org/latest/aggregations/bucket/nested/ + */ +public class EnumerableNestedAggregate extends EnumerableAggregateBase implements EnumerableRel { + + public EnumerableNestedAggregate( + RelOptCluster cluster, + RelTraitSet traitSet, + List hints, + RelNode input, + ImmutableBitSet groupSet, + @Nullable List groupSets, + List aggCalls) + throws InvalidRelException { + super(cluster, traitSet, hints, input, groupSet, groupSets, aggCalls); + assert getConvention() instanceof EnumerableConvention; + + for (AggregateCall aggCall : aggCalls) { + if (aggCall.isDistinct()) { + throw new InvalidRelException("distinct aggregation not supported"); + } + if (aggCall.distinctKeys != null) { + throw new InvalidRelException("within-distinct aggregation not supported"); + } + AggImplementor implementor2 = RexImpTable.INSTANCE.get(aggCall.getAggregation(), false); + if (implementor2 == null) { + throw new InvalidRelException("aggregation " + aggCall.getAggregation() + " not supported"); + } + } + } + + @Override + public EnumerableNestedAggregate copy( + RelTraitSet traitSet, + RelNode input, + ImmutableBitSet groupSet, + @Nullable List groupSets, + List aggCalls) { + try { + return new EnumerableNestedAggregate( + getCluster(), traitSet, getHints(), input, groupSet, groupSets, aggCalls); + } catch (InvalidRelException e) { + // Semantic error not possible. Must be a bug. Convert to + // internal error. + throw new AssertionError(e); + } + } + + @Override + public Result implement(EnumerableRelImplementor implementor, Prefer pref) { + throw new RuntimeException(); + } + + @Override + public @Nullable RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { + return super.computeSelfCost(planner, mq).multiplyBy(.9); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableNestedAggregateRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableNestedAggregateRule.java new file mode 100644 index 00000000000..01fe7b3eaa3 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableNestedAggregateRule.java @@ -0,0 +1,67 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.planner.rules; + +import org.apache.calcite.adapter.enumerable.EnumerableConvention; +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.convert.ConverterRule; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.opensearch.planner.physical.EnumerableNestedAggregate; + +/** Rule to convert {@link LogicalAggregate} to {@link EnumerableNestedAggregate}. */ +public class EnumerableNestedAggregateRule extends ConverterRule { + private static final Logger LOG = LogManager.getLogger(); + + /** Default configuration. */ + public static final Config DEFAULT_CONFIG = + Config.INSTANCE + .withConversion( + LogicalAggregate.class, + Convention.NONE, + EnumerableConvention.INSTANCE, + "EnumerableNestedAggregateRule") + .withRuleFactory(EnumerableNestedAggregateRule::new); + + /** Called from the Config. */ + protected EnumerableNestedAggregateRule(Config config) { + super(config); + } + + @Override + public @Nullable RelNode convert(RelNode rel) { + final Aggregate agg = (Aggregate) rel; + if (agg.getHints().stream() + .noneMatch( + hint -> + hint.hintName.equals("stats_args") + && hint.kvOptions.values().stream().anyMatch(v -> v.equals("true")))) { + return null; + } + final RelTraitSet traitSet = rel.getCluster().traitSet().replace(EnumerableConvention.INSTANCE); + try { + return new EnumerableNestedAggregate( + rel.getCluster(), + traitSet, + agg.getHints(), + convert(agg.getInput(), traitSet), + agg.getGroupSet(), + agg.getGroupSets(), + agg.getAggCallList()); + } catch (InvalidRelException e) { + if (LOG.isDebugEnabled()) { + LOG.debug(e.toString()); + } + return null; + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index d23a5ca144b..6766c9f5f51 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -337,8 +337,7 @@ private static Pair> processAggregateCalls( for (int i = 0; i < aggCalls.size(); i++) { AggregateCall aggCall = aggCalls.get(i); - List> args = - convertAggArgThroughProject(aggCall, project); + List> args = convertAggArgThroughProject(aggCall, project); String aggFieldName = aggFieldNames.get(i); Pair builderAndParser = @@ -359,20 +358,24 @@ private static Pair> processAggregateCalls( * @param project the project * @return the converted Pair list */ - private static List> convertAggArgThroughProject( + private static List> convertAggArgThroughProject( AggregateCall aggCall, Project project) { return project == null ? List.of() : PlanUtils.getObjectFromLiteralAgg(aggCall) != null ? project.getNamedProjects().stream() .filter(rex -> !rex.getKey().isA(SqlKind.ROW_NUMBER)) + .map(p -> Pair.of(p.getKey(), p.getValue())) .toList() - : aggCall.getArgList().stream().map(project.getNamedProjects()::get).toList(); + : aggCall.getArgList().stream() + .map(project.getNamedProjects()::get) + .map(p -> Pair.of(p.getKey(), p.getValue())) + .toList(); } private static Pair createAggregationBuilderAndParser( AggregateCall aggCall, - List> args, + List> args, String aggFieldName, AggregateAnalyzer.AggregateBuilderHelper helper) { if (aggCall.isDistinct()) { @@ -384,7 +387,7 @@ private static Pair createAggregationBuilderAn private static Pair createDistinctAggregation( AggregateCall aggCall, - List> args, + List> args, String aggFieldName, AggregateBuilderHelper helper) { @@ -403,7 +406,7 @@ private static Pair createDistinctAggregation( private static Pair createRegularAggregation( AggregateCall aggCall, - List> args, + List> args, String aggFieldName, AggregateBuilderHelper helper) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java index ef3c37af1b0..0e178220600 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java @@ -90,9 +90,9 @@ public List> parse(Aggregation agg) { // LinkedHashMap["name" -> "A", "category" -> "Y"] // ] return Arrays.stream(hits) - .>map( + .map( hit -> { - Map map = new LinkedHashMap<>(hit.getSourceAsMap()); + Map map = new LinkedHashMap<>(hit.getSourceAsMap()); hit.getFields().values().forEach(f -> map.put(f.getName(), f.getValue())); return map; }) From 8323df6d1fa5f6db4ca7c2bc3b85771efe87a1e7 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Fri, 19 Dec 2025 10:39:31 +0800 Subject: [PATCH 9/9] delete unexpected files Signed-off-by: Lantao Jin --- .../java/org/opensearch/sql/utils/Utils.java | 23 ----- .../calcite/explain_nested_agg_push.yaml | 9 -- .../physical/EnumerableNestedAggregate.java | 85 ------------------- .../rules/EnumerableNestedAggregateRule.java | 67 --------------- 4 files changed, 184 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/utils/Utils.java delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_push.yaml delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/EnumerableNestedAggregate.java delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableNestedAggregateRule.java diff --git a/core/src/main/java/org/opensearch/sql/utils/Utils.java b/core/src/main/java/org/opensearch/sql/utils/Utils.java deleted file mode 100644 index ddbe7afe03d..00000000000 --- a/core/src/main/java/org/opensearch/sql/utils/Utils.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.utils; - -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import org.apache.commons.lang3.tuple.Pair; - -public interface Utils { - static List> zipWithIndex(List input) { - LinkedList> result = new LinkedList<>(); - Iterator iter = input.iterator(); - int index = 0; - while (iter.hasNext()) { - result.add(Pair.of(iter.next(), index++)); - } - return result; - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_push.yaml deleted file mode 100644 index 2808b4e0135..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_push.yaml +++ /dev/null @@ -1,9 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(count_area=[$1], min_area=[$2], max_area=[$3], avg_area=[$4], avg_age=[$5], name=[$0]) - LogicalAggregate(group=[{0}], count_area=[COUNT($1)], min_area=[MIN($1)], max_area=[MAX($1)], avg_area=[AVG($1)], avg_age=[AVG($2)]) - LogicalProject(name=[$1], address.area=[$3], age=[$9]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) - physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count_area=COUNT($1),min_area=MIN($1),max_area=MAX($1),avg_area=AVG($1),avg_age=AVG($2)), PROJECT->[count_area, min_area, max_area, avg_area, avg_age, name], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"name":{"terms":{"field":"name.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"nested_count_area":{"nested":{"path":"address"},"aggregations":{"count_area":{"value_count":{"field":"address.area"}}}},"nested_min_area":{"nested":{"path":"address"},"aggregations":{"min_area":{"min":{"field":"address.area"}}}},"nested_max_area":{"nested":{"path":"address"},"aggregations":{"max_area":{"max":{"field":"address.area"}}}},"nested_avg_area":{"nested":{"path":"address"},"aggregations":{"avg_area":{"avg":{"field":"address.area"}}}},"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/EnumerableNestedAggregate.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/EnumerableNestedAggregate.java deleted file mode 100644 index 89965d25d7a..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/EnumerableNestedAggregate.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.planner.physical; - -import java.util.List; -import org.apache.calcite.adapter.enumerable.AggImplementor; -import org.apache.calcite.adapter.enumerable.EnumerableAggregateBase; -import org.apache.calcite.adapter.enumerable.EnumerableConvention; -import org.apache.calcite.adapter.enumerable.EnumerableRel; -import org.apache.calcite.adapter.enumerable.EnumerableRelImplementor; -import org.apache.calcite.adapter.enumerable.RexImpTable; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.plan.RelOptCost; -import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.rel.InvalidRelException; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.AggregateCall; -import org.apache.calcite.rel.hint.RelHint; -import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.util.ImmutableBitSet; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * The enumerable aggregate physical implementation for OpenSearch nested aggregation. - * https://docs.opensearch.org/latest/aggregations/bucket/nested/ - */ -public class EnumerableNestedAggregate extends EnumerableAggregateBase implements EnumerableRel { - - public EnumerableNestedAggregate( - RelOptCluster cluster, - RelTraitSet traitSet, - List hints, - RelNode input, - ImmutableBitSet groupSet, - @Nullable List groupSets, - List aggCalls) - throws InvalidRelException { - super(cluster, traitSet, hints, input, groupSet, groupSets, aggCalls); - assert getConvention() instanceof EnumerableConvention; - - for (AggregateCall aggCall : aggCalls) { - if (aggCall.isDistinct()) { - throw new InvalidRelException("distinct aggregation not supported"); - } - if (aggCall.distinctKeys != null) { - throw new InvalidRelException("within-distinct aggregation not supported"); - } - AggImplementor implementor2 = RexImpTable.INSTANCE.get(aggCall.getAggregation(), false); - if (implementor2 == null) { - throw new InvalidRelException("aggregation " + aggCall.getAggregation() + " not supported"); - } - } - } - - @Override - public EnumerableNestedAggregate copy( - RelTraitSet traitSet, - RelNode input, - ImmutableBitSet groupSet, - @Nullable List groupSets, - List aggCalls) { - try { - return new EnumerableNestedAggregate( - getCluster(), traitSet, getHints(), input, groupSet, groupSets, aggCalls); - } catch (InvalidRelException e) { - // Semantic error not possible. Must be a bug. Convert to - // internal error. - throw new AssertionError(e); - } - } - - @Override - public Result implement(EnumerableRelImplementor implementor, Prefer pref) { - throw new RuntimeException(); - } - - @Override - public @Nullable RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { - return super.computeSelfCost(planner, mq).multiplyBy(.9); - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableNestedAggregateRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableNestedAggregateRule.java deleted file mode 100644 index 01fe7b3eaa3..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableNestedAggregateRule.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.planner.rules; - -import org.apache.calcite.adapter.enumerable.EnumerableConvention; -import org.apache.calcite.plan.Convention; -import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.rel.InvalidRelException; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.convert.ConverterRule; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.logical.LogicalAggregate; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.opensearch.sql.opensearch.planner.physical.EnumerableNestedAggregate; - -/** Rule to convert {@link LogicalAggregate} to {@link EnumerableNestedAggregate}. */ -public class EnumerableNestedAggregateRule extends ConverterRule { - private static final Logger LOG = LogManager.getLogger(); - - /** Default configuration. */ - public static final Config DEFAULT_CONFIG = - Config.INSTANCE - .withConversion( - LogicalAggregate.class, - Convention.NONE, - EnumerableConvention.INSTANCE, - "EnumerableNestedAggregateRule") - .withRuleFactory(EnumerableNestedAggregateRule::new); - - /** Called from the Config. */ - protected EnumerableNestedAggregateRule(Config config) { - super(config); - } - - @Override - public @Nullable RelNode convert(RelNode rel) { - final Aggregate agg = (Aggregate) rel; - if (agg.getHints().stream() - .noneMatch( - hint -> - hint.hintName.equals("stats_args") - && hint.kvOptions.values().stream().anyMatch(v -> v.equals("true")))) { - return null; - } - final RelTraitSet traitSet = rel.getCluster().traitSet().replace(EnumerableConvention.INSTANCE); - try { - return new EnumerableNestedAggregate( - rel.getCluster(), - traitSet, - agg.getHints(), - convert(agg.getInput(), traitSet), - agg.getGroupSet(), - agg.getGroupSets(), - agg.getAggCallList()); - } catch (InvalidRelException e) { - if (LOG.isDebugEnabled()) { - LOG.debug(e.toString()); - } - return null; - } - } -}