diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index bbd4dd3c6d6..aaef0a00be2 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1262,15 +1262,6 @@ private Optional extractAliasLiteral(RexNode node) { public RelNode visitJoin(Join node, CalcitePlanContext context) { List children = node.getChildren(); children.forEach(c -> analyze(c, context)); - // add join.subsearch_maxout limit to subsearch side, 0 and negative means unlimited. - if (context.sysLimit.joinSubsearchLimit() > 0) { - PlanUtils.replaceTop( - context.relBuilder, - LogicalSystemLimit.create( - SystemLimitType.JOIN_SUBSEARCH_MAXOUT, - context.relBuilder.peek(), - context.relBuilder.literal(context.sysLimit.joinSubsearchLimit()))); - } if (node.getJoinCondition().isEmpty()) { // join-with-field-list grammar List leftColumns = context.relBuilder.peek(1).getRowType().getFieldNames(); @@ -1327,23 +1318,25 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { .collect(Collectors.toList()); buildDedupNotNull(context, dedupeFields, allowedDuplication, true); } + // add LogicalSystemLimit after dedup + addSysLimitForJoinSubsearch(context); context.relBuilder.join( JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition); if (!toBeRemovedFields.isEmpty()) { context.relBuilder.projectExcept(toBeRemovedFields); } - return context.relBuilder.peek(); - } - // The join-with-criteria grammar doesn't allow empty join condition - RexNode joinCondition = - node.getJoinCondition() - .map(c -> rexVisitor.analyzeJoinCondition(c, context)) - .orElse(context.relBuilder.literal(true)); - if (node.getJoinType() == SEMI || node.getJoinType() == ANTI) { - // semi and anti join only return left table outputs - context.relBuilder.join( - JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition); } else { + // The join-with-criteria grammar doesn't allow empty join condition + RexNode joinCondition = + node.getJoinCondition() + .map(c -> rexVisitor.analyzeJoinCondition(c, context)) + .orElse(context.relBuilder.literal(true)); + if (node.getJoinType() == SEMI || node.getJoinType() == ANTI) { + // semi and anti join only return left table outputs + context.relBuilder.join( + JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition); + return context.relBuilder.peek(); + } // Join condition could contain duplicated column name, Calcite will rename the duplicated // column name with numeric suffix, e.g. ON t1.id = t2.id, the output contains `id` and `id0` // when a new project add to stack. To avoid `id0`, we will rename the `id0` to `alias.id` @@ -1383,6 +1376,8 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { buildDedupNotNull(context, dedupeFields, allowedDuplication, true); } + // add LogicalSystemLimit after dedup + addSysLimitForJoinSubsearch(context); context.relBuilder.join( JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition); JoinAndLookupUtils.renameToExpectedFields( @@ -1391,6 +1386,18 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { return context.relBuilder.peek(); } + private static void addSysLimitForJoinSubsearch(CalcitePlanContext context) { + // add join.subsearch_maxout limit to subsearch side, 0 and negative means unlimited. + if (context.sysLimit.joinSubsearchLimit() > 0) { + PlanUtils.replaceTop( + context.relBuilder, + LogicalSystemLimit.create( + SystemLimitType.JOIN_SUBSEARCH_MAXOUT, + context.relBuilder.peek(), + context.relBuilder.literal(context.sysLimit.joinSubsearchLimit()))); + } + } + private List getRightColumnsInJoinCriteria( RelBuilder relBuilder, RexNode joinCondition) { int stackSize = relBuilder.size(); @@ -1566,7 +1573,6 @@ private static void buildDedupOrNull( .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() .partitionBy(dedupeFields) - .orderBy(dedupeFields) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_DEDUP); context.relBuilder.projectPlus(rowNumber); @@ -1608,7 +1614,6 @@ private static void buildDedupNotNull( .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() .partitionBy(dedupeFields) - .orderBy(dedupeFields) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(rowNumberAlias); context.relBuilder.projectPlus(rowNumber); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 62cb3bff69e..2f5cf45983e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -488,7 +488,11 @@ static RexNode derefMapCall(RexNode rexNode) { /** Check if contains dedup */ static boolean containsRowNumberDedup(RelNode node) { - return node.getRowType().getFieldNames().stream().anyMatch(ROW_NUMBER_COLUMN_FOR_DEDUP::equals); + return node.getRowType().getFieldNames().stream() + .anyMatch( + name -> + name.equals(ROW_NUMBER_COLUMN_FOR_DEDUP) + || name.equals(ROW_NUMBER_COLUMN_FOR_JOIN_MAX_DEDUP)); } /** Check if contains dedup for top/rare */ diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 265cf6835d5..5db4d7f9e50 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -86,8 +86,6 @@ public void supportSearchSargPushDown_timeRange() throws IOException { // Only for Calcite @Test public void testJoinWithCriteriaAndMaxOption() throws IOException { - // TODO could be optimized with https://github.com/opensearch-project/OpenSearch/issues/3725 - enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_bank | join max=1 left=l right=r on" + " l.account_number=r.account_number opensearch-sql_test_index_bank"; @@ -99,8 +97,6 @@ public void testJoinWithCriteriaAndMaxOption() throws IOException { // Only for Calcite @Test public void testJoinWithFieldListAndMaxOption() throws IOException { - // TODO could be optimized with https://github.com/opensearch-project/OpenSearch/issues/3725 - enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_bank | join type=inner max=1 account_number" + " opensearch-sql_test_index_bank"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/NfwPplDashboardIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/NfwPplDashboardIT.java index d28e6d4d8e6..21e4238d636 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/NfwPplDashboardIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/NfwPplDashboardIT.java @@ -436,7 +436,8 @@ public void testTopTCPFlowsByBytes() throws IOException { public void testTopTCPFlags() throws IOException { String query = String.format( - "source=%s | STATS count() as Count by `event.tcp.tcp_flags` | SORT - Count, `event.tcp.tcp_flags` | HEAD 10", + "source=%s | STATS count() as Count by `event.tcp.tcp_flags` | SORT - Count," + + " `event.tcp.tcp_flags` | HEAD 10", NFW_LOGS_INDEX); JSONObject response = executeQuery(query); verifySchema(response, schema("Count", "bigint"), schema("event.tcp.tcp_flags", "string")); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/VpcFlowLogsPplDashboardIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/VpcFlowLogsPplDashboardIT.java index 249f5daf7d1..80e7f925c25 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/VpcFlowLogsPplDashboardIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/VpcFlowLogsPplDashboardIT.java @@ -5,7 +5,6 @@ package org.opensearch.sql.ppl.dashboard; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -167,7 +166,8 @@ public void testTopTalkersByBytes() throws IOException { public void testTopTalkersByPackets() throws IOException { String query = String.format( - "source=%s | stats sum(packets) as Packets by srcaddr | sort - Packets, srcaddr | head 10", + "source=%s | stats sum(packets) as Packets by srcaddr | sort - Packets, srcaddr | head" + + " 10", VPC_FLOW_LOGS_INDEX); JSONObject response = executeQuery(query); verifySchema(response, schema("Packets", null, "bigint"), schema("srcaddr", null, "string")); @@ -189,7 +189,8 @@ public void testTopTalkersByPackets() throws IOException { public void testTopDestinationsByPackets() throws IOException { String query = String.format( - "source=%s | stats sum(packets) as Packets by dstaddr | sort - Packets, dstaddr | head 10", + "source=%s | stats sum(packets) as Packets by dstaddr | sort - Packets, dstaddr | head" + + " 10", VPC_FLOW_LOGS_INDEX); JSONObject response = executeQuery(query); verifySchema(response, schema("Packets", null, "bigint"), schema("dstaddr", null, "string")); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/WafPplDashboardIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/WafPplDashboardIT.java index cc29d265626..51620facc01 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/WafPplDashboardIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/WafPplDashboardIT.java @@ -144,7 +144,8 @@ public void testTopCountries() throws IOException { public void testTopTerminatingRules() throws IOException { String query = String.format( - "source=%s | stats count() as Count by `terminatingRuleId` | sort - Count, `terminatingRuleId` | head 10", + "source=%s | stats count() as Count by `terminatingRuleId` | sort - Count," + + " `terminatingRuleId` | head 10", WAF_LOGS_INDEX); JSONObject response = executeQuery(query); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml index eb53c2aeb66..b0707c70389 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml @@ -5,11 +5,11 @@ calcite: LogicalJoin(condition=[=($12, $19)], joinType=[left]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], lastname=[REX_EXTRACT($6, '(?^[A-Z])', 'lastname')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) - LogicalFilter(condition=[<=($13, 1)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $6 ORDER BY $6)]) - LogicalFilter(condition=[IS NOT NULL($6)]) - LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $6)]) + LogicalFilter(condition=[IS NOT NULL($6)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | @@ -18,8 +18,4 @@ calcite: EnumerableMergeJoin(condition=[=($0, $7)], joinType=[left]) EnumerableCalc(expr#0=[{inputs}], expr#1=['(?^[A-Z])'], expr#2=['lastname'], expr#3=[REX_EXTRACT($t0, $t1, $t2)], $f0=[$t3]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[lastname], LIMIT->10000, SORT_EXPR->[REX_EXTRACT($0, '(?^[A-Z])', 'lastname') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["lastname"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?^[A-Z])","lastname"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$6], dir0=[ASC]) - EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) - EnumerableWindow(window#0=[window(partition {6} order by [6] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0..12=[{inputs}], expr#13=[IS NOT NULL($t6)], proj#0..12=[{exprs}], $condition=[$t13]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[6]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"lastname":{"terms":{"field":"lastname","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["lastname","account_number","firstname","address","birthdate","gender","city","balance","employer","state","age","email","male"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml index 61017e4f76b..7b317734f93 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) LogicalFilter(condition=[<=($17, 1)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4)]) LogicalFilter(condition=[IS NOT NULL($4)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml index daca12143df..17af313bfb3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3]) LogicalFilter(condition=[<=($4, 1)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1, $3)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)]) LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml index b5d9a2194dc..56cdf375905 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) LogicalFilter(condition=[<=($17, 2)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $7 ORDER BY $4, $7)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $7)]) LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml index 3dc325d896b..64a65b0277c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3]) LogicalFilter(condition=[<=($4, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1, $3)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)]) LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml index 4084a34e2d4..ead28edabe4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], gender=[$1], age=[$2]) LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(account_number=[$0], gender=[$4], age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml index 978cedd813f..9f15c0d120b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml @@ -3,10 +3,10 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], gender=[$1], age=[$2]) LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))]) - LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t1)], expr#5=[1], expr#6=[<=($t3, $t5)], expr#7=[OR($t4, $t6)], proj#0..2=[{exprs}], $condition=[$t7]) - EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml index 4084a34e2d4..ead28edabe4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], gender=[$1], age=[$2]) LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(account_number=[$0], gender=[$4], age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml index 451fc8ff5d7..80599e8a338 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml @@ -3,11 +3,11 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) LogicalFilter(condition=[<=($19, 1)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $11 ORDER BY $11)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $11)]) LogicalFilter(condition=[IS NOT NULL($11)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) - EnumerableWindow(window#0=[window(partition {11} order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableWindow(window#0=[window(partition {11} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($11)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"email","boost":1.0}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml index 7b4e5516c04..12bd2aefaa0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml @@ -5,11 +5,11 @@ calcite: LogicalJoin(condition=[=($0, $13)], joinType=[inner]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) - LogicalFilter(condition=[<=($13, 1)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)]) - LogicalFilter(condition=[IS NOT NULL($0)]) - LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)]) + LogicalFilter(condition=[IS NOT NULL($0)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | @@ -21,8 +21,4 @@ calcite: "missing" : "_last" } }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) - EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0..12=[{inputs}], expr#13=[IS NOT NULL($t0)], proj#0..12=[{exprs}], $condition=[$t13]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml index 71ab58f77d3..e795d514faf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml @@ -5,25 +5,16 @@ calcite: LogicalJoin(condition=[=($0, $13)], joinType=[inner]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) - LogicalFilter(condition=[<=($13, 1)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)]) - LogicalFilter(condition=[IS NOT NULL($0)]) - LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)]) + LogicalFilter(condition=[IS NOT NULL($0)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - EnumerableCalc(expr#0..13=[{inputs}], account_number=[$t1], firstname=[$t2], address=[$t3], birthdate=[$t4], gender=[$t5], city=[$t6], lastname=[$t7], balance=[$t8], employer=[$t9], state=[$t10], age=[$t11], email=[$t12], male=[$t13]) + EnumerableCalc(expr#0..13=[{inputs}], proj#0..12=[{exprs}]) EnumerableLimit(fetch=[10000]) - EnumerableMergeJoin(condition=[=($0, $1)], joinType=[inner]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number], SORT->[{ - "account_number" : { - "order" : "asc", - "missing" : "_last" - } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) - EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0..12=[{inputs}], expr#13=[IS NOT NULL($t0)], proj#0..12=[{exprs}], $condition=[$t13]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableHashJoin(condition=[=($0, $13)], joinType=[inner]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml index 30ad4020d5d..b9c090616e5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) LogicalFilter(condition=[IS NOT NULL($2)]) LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) @@ -16,6 +16,6 @@ calcite: EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]) EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) - EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], age2=[$t2], $condition=[$t3]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), SORT->[0 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["city","state","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml index 4b41b2d6c13..07684889396 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml @@ -5,11 +5,11 @@ calcite: LogicalJoin(condition=[=($12, $19)], joinType=[left]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], lastname=[REX_EXTRACT($6, '(?^[A-Z])', 'lastname')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) - LogicalFilter(condition=[<=($13, 1)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $6 ORDER BY $6)]) - LogicalFilter(condition=[IS NOT NULL($6)]) - LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $6)]) + LogicalFilter(condition=[IS NOT NULL($6)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | @@ -20,8 +20,8 @@ calcite: EnumerableCalc(expr#0..18=[{inputs}], expr#19=['(?^[A-Z])'], expr#20=['lastname'], expr#21=[REX_EXTRACT($t6, $t19, $t20)], lastname=[$t21]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) EnumerableSort(sort0=[$6], dir0=[ASC]) - EnumerableCalc(expr#0..19=[{inputs}], expr#20=[1], expr#21=[<=($t19, $t20)], proj#0..12=[{exprs}], $condition=[$t21]) - EnumerableWindow(window#0=[window(partition {6} order by [6] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t6)], proj#0..18=[{exprs}], $condition=[$t19]) - EnumerableLimit(fetch=[50000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableLimit(fetch=[50000]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[1], expr#21=[<=($t19, $t20)], proj#0..12=[{exprs}], $condition=[$t21]) + EnumerableWindow(window#0=[window(partition {6} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t6)], proj#0..18=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml index 2706c009421..6bb96baa243 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml @@ -3,13 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], gender=[$1], age=[$2]) LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(account_number=[$0], gender=[$4], age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19]) - EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml index ee74cf48e4a..52d30ddf078 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml @@ -3,10 +3,10 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], gender=[$1], age=[$2]) LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))]) - LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NULL($t4)], expr#19=[1], expr#20=[<=($t17, $t19)], expr#21=[OR($t18, $t20)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t21]) - EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml index 2706c009421..6bb96baa243 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml @@ -3,13 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], gender=[$1], age=[$2]) LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(account_number=[$0], gender=[$4], age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19]) - EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.yaml new file mode 100644 index 00000000000..be2c210caf2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.yaml @@ -0,0 +1,26 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], r.account_number=[$13], r.firstname=[$14], r.address=[$15], r.birthdate=[$16], r.gender=[$17], r.city=[$18], r.lastname=[$19], r.balance=[$20], r.employer=[$21], r.state=[$22], r.age=[$23], r.email=[$24], r.male=[$25]) + LogicalJoin(condition=[=($0, $13)], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..18=[{inputs}], proj#0..12=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[50000]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[1], expr#21=[<=($t19, $t20)], proj#0..12=[{exprs}], $condition=[$t21]) + EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t0)], proj#0..18=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.yaml new file mode 100644 index 00000000000..84ce94c02e8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.yaml @@ -0,0 +1,27 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$13], firstname=[$14], address=[$15], birthdate=[$16], gender=[$17], city=[$18], lastname=[$19], balance=[$20], employer=[$21], state=[$22], age=[$23], email=[$24], male=[$25]) + LogicalJoin(condition=[=($0, $13)], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..13=[{inputs}], account_number=[$t1], firstname=[$t2], address=[$t3], birthdate=[$t4], gender=[$t5], city=[$t6], lastname=[$t7], balance=[$t8], employer=[$t9], state=[$t10], age=[$t11], email=[$t12], male=[$t13]) + EnumerableLimit(fetch=[10000]) + EnumerableMergeJoin(condition=[=($0, $1)], joinType=[inner]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..18=[{inputs}], account_number=[$t0]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[50000]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[1], expr#21=[<=($t19, $t20)], proj#0..12=[{exprs}], $condition=[$t21]) + EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t0)], proj#0..18=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml index 8c0283f0e9c..2a1449e4b1f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) LogicalFilter(condition=[IS NOT NULL($2)]) LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) @@ -16,9 +16,9 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) - EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[2], expr#11=[+($t9, $t10)], expr#12=[IS NOT NULL($t8)], state=[$t1], age2=[$t11], $condition=[$t12]) EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) EnumerableAggregate(group=[{5, 7}], agg#0=[$SUM0($8)], agg#1=[COUNT($8)]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[>($t8, $t17)], proj#0..16=[{exprs}], $condition=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 0e14e5e6cbc..13a116a1a00 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -25,7 +25,7 @@ public void testDedup1() { + " LogicalFilter(condition=[<=($8, 1)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION" - + " BY $7 ORDER BY $7)])\n" + + " BY $7)])\n" + " LogicalFilter(condition=[IS NOT NULL($7)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -39,11 +39,9 @@ public void testDedup1() { verifyResult(root, expectedResult); String expectedSparkSql = - "" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO` ORDER BY `DEPTNO` NULLS LAST)" - + " `_row_number_dedup_`\n" + + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO`) `_row_number_dedup_`\n" + "FROM `scott`.`EMP`\n" + "WHERE `DEPTNO` IS NOT NULL) `t0`\n" + "WHERE `_row_number_dedup_` <= 1"; @@ -60,7 +58,7 @@ public void testDedup2() { + " LogicalFilter(condition=[<=($8, 2)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION" - + " BY $7 ORDER BY $7)])\n" + + " BY $7)])\n" + " LogicalFilter(condition=[IS NOT NULL($7)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -80,11 +78,9 @@ public void testDedup2() { verifyResult(root, expectedResult); String expectedSparkSql = - "" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO` ORDER BY `DEPTNO` NULLS LAST)" - + " `_row_number_dedup_`\n" + + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO`) `_row_number_dedup_`\n" + "FROM `scott`.`EMP`\n" + "WHERE `DEPTNO` IS NOT NULL) `t0`\n" + "WHERE `_row_number_dedup_` <= 2"; @@ -101,7 +97,7 @@ public void testDedupKeepEmpty1() { + " LogicalFilter(condition=[OR(IS NULL($7), IS NULL($2), <=($8, 1))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION" - + " BY $7, $2 ORDER BY $7, $2)])\n" + + " BY $7, $2)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -128,8 +124,7 @@ public void testDedupKeepEmpty1() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO`, `JOB` ORDER BY `DEPTNO` NULLS LAST, `JOB`" - + " NULLS LAST) `_row_number_dedup_`\n" + + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO`, `JOB`) `_row_number_dedup_`\n" + "FROM `scott`.`EMP`) `t`\n" + "WHERE `DEPTNO` IS NULL OR `JOB` IS NULL OR `_row_number_dedup_` <= 1"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -145,7 +140,7 @@ public void testDedupKeepEmpty2() { + " LogicalFilter(condition=[OR(IS NULL($7), IS NULL($2), <=($8, 2))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION" - + " BY $7, $2 ORDER BY $7, $2)])\n" + + " BY $7, $2)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -178,8 +173,7 @@ public void testDedupKeepEmpty2() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO`, `JOB` ORDER BY `DEPTNO` NULLS LAST, `JOB`" - + " NULLS LAST) `_row_number_dedup_`\n" + + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO`, `JOB`) `_row_number_dedup_`\n" + "FROM `scott`.`EMP`) `t`\n" + "WHERE `DEPTNO` IS NULL OR `JOB` IS NULL OR `_row_number_dedup_` <= 2"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -195,7 +189,7 @@ public void testDedupExpr() { "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$3], NEW_DEPTNO=[$4])\n" + " LogicalFilter(condition=[<=($5, 1)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$3], NEW_DEPTNO=[$4]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)])\n" + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4)])\n" + " LogicalFilter(condition=[IS NOT NULL($4)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$7]," + " NEW_DEPTNO=[+($7, 1)])\n" @@ -214,7 +208,7 @@ public void testDedupExpr() { "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + " LogicalFilter(condition=[<=($4, 1)])\n" + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $3 ORDER BY $3)])\n" + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $3)])\n" + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -227,7 +221,7 @@ public void testDedupExpr() { "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + " LogicalFilter(condition=[<=($4, 1)])\n" + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" @@ -245,7 +239,7 @@ public void testRenameDedup() { "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + " LogicalFilter(condition=[<=($4, 1)])\n" + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -258,7 +252,7 @@ public void testRenameDedup() { "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + " LogicalFilter(condition=[<=($4, 1)])\n" + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $3 ORDER BY $3)])\n" + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $3)])\n" + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -271,7 +265,7 @@ public void testRenameDedup() { "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + " LogicalFilter(condition=[<=($4, 1)])\n" + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java index c203867932d..8c84aece625 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java @@ -994,7 +994,7 @@ public void testJoinWithFieldListMaxGreaterThanZero() { + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + " LogicalFilter(condition=[<=($3, 1)])\n" + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2]," - + " _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" + " LogicalTableScan(table=[[scott, DEPT]])\n"; verifyLogical(root, expectedLogical); verifyResultCount(root, 14); @@ -1004,8 +1004,8 @@ public void testJoinWithFieldListMaxGreaterThanZero() { + " `EMP`.`SAL`, `EMP`.`COMM`, `t1`.`DEPTNO`, `t1`.`DNAME`, `t1`.`LOC`\n" + "FROM `scott`.`EMP`\n" + "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n" - + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`" - + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_join_max_dedup_`\n" + + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`)" + + " `_row_number_join_max_dedup_`\n" + "FROM `scott`.`DEPT`) `t`\n" + "WHERE `_row_number_join_max_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -1023,7 +1023,7 @@ public void testJoinWithCriteriaMaxGreaterThanZero() { + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + " LogicalFilter(condition=[<=($3, 1)])\n" + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2]," - + " _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" + " LogicalTableScan(table=[[scott, DEPT]])\n"; verifyLogical(root, expectedLogical); verifyResultCount(root, 14); @@ -1034,8 +1034,8 @@ public void testJoinWithCriteriaMaxGreaterThanZero() { + " `t1`.`LOC`\n" + "FROM `scott`.`EMP`\n" + "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n" - + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`" - + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_join_max_dedup_`\n" + + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`)" + + " `_row_number_join_max_dedup_`\n" + "FROM `scott`.`DEPT`) `t`\n" + "WHERE `_row_number_join_max_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -1069,7 +1069,7 @@ public void testJoinSubsearchMaxOut() { verifyResultCount(root1, 14); // no limit String ppl2 = "source=EMP | inner join left=l right=r on l.DEPTNO=r.DEPTNO DEPT"; RelNode root2 = getRelNode(ppl2); - verifyResultCount(root1, 14); // no limit for sql-like syntax + verifyResultCount(root2, 14); // no limit for sql-like syntax doReturn(1).when(settings).getSettingValue(Settings.Key.PPL_JOIN_SUBSEARCH_MAXOUT); root1 = getRelNode(ppl1);