diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 82651861735..d0820e9cceb 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -15,6 +15,7 @@ import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; +import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_JOIN_MAX_DEDUP; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_MAIN; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_RARE_TOP; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_STREAMSTATS; @@ -48,9 +49,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.hint.HintStrategyTable; -import org.apache.calcite.rel.hint.RelHint; -import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFamily; @@ -1054,7 +1052,7 @@ private Pair, List> aggregateWithTrimming( List intendedGroupKeyAliases = getGroupKeyNamesAfterAggregation(reResolved.getLeft()); context.relBuilder.aggregate( context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight()); - if (hintBucketNonNull) addIgnoreNullBucketHintToAggregate(context); + if (hintBucketNonNull) PlanUtils.addIgnoreNullBucketHintToAggregate(context.relBuilder); // During aggregation, Calcite projects both input dependencies and output group-by fields. // When names conflict, Calcite adds numeric suffixes (e.g., "value0"). // Apply explicit renaming to restore the intended aliases. @@ -1317,7 +1315,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { : duplicatedFieldNames.stream() .map(a -> (RexNode) context.relBuilder.field(a)) .toList(); - buildDedupNotNull(context, dedupeFields, allowedDuplication); + buildDedupNotNull(context, dedupeFields, allowedDuplication, true); } context.relBuilder.join( JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition); @@ -1373,7 +1371,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { List dedupeFields = getRightColumnsInJoinCriteria(context.relBuilder, joinCondition); - buildDedupNotNull(context, dedupeFields, allowedDuplication); + buildDedupNotNull(context, dedupeFields, allowedDuplication, true); } context.relBuilder.join( JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition); @@ -1538,7 +1536,7 @@ public RelNode visitDedupe(Dedupe node, CalcitePlanContext context) { if (keepEmpty) { buildDedupOrNull(context, dedupeFields, allowedDuplication); } else { - buildDedupNotNull(context, dedupeFields, allowedDuplication); + buildDedupNotNull(context, dedupeFields, allowedDuplication, false); } return context.relBuilder.peek(); } @@ -1546,16 +1544,12 @@ public RelNode visitDedupe(Dedupe node, CalcitePlanContext context) { private static void buildDedupOrNull( CalcitePlanContext context, List dedupeFields, Integer allowedDuplication) { /* - * | dedup 2 a, b keepempty=false - * DropColumns('_row_number_dedup_) - * +- Filter ('_row_number_dedup_ <= n OR isnull('a) OR isnull('b)) - * +- Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, specifiedwindowoundedpreceding$(), currentrow$())) AS _row_number_dedup_], ['a, 'b], ['a ASC NULLS FIRST, 'b ASC NULLS FIRST] + * | dedup 2 a, b keepempty=true + * LogicalProject(...) + * +- LogicalFilter(condition=[OR(IS NULL(a), IS NULL(b), <=(_row_number_dedup_, 1))]) + * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY a, b)]) * +- ... */ - // Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, - // specifiedwindowoundedpreceding$(), currentrow$())) AS _row_number_dedup_], ['a, 'b], ['a - // ASC - // NULLS FIRST, 'b ASC NULLS FIRST] RexNode rowNumber = context .relBuilder @@ -1578,16 +1572,21 @@ private static void buildDedupOrNull( } private static void buildDedupNotNull( - CalcitePlanContext context, List dedupeFields, Integer allowedDuplication) { + CalcitePlanContext context, + List dedupeFields, + Integer allowedDuplication, + boolean fromJoinMaxOption) { /* * | dedup 2 a, b keepempty=false - * DropColumns('_row_number_dedup_) - * +- Filter ('_row_number_dedup_ <= n) - * +- Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, specifiedwindowoundedpreceding$(), currentrow$())) AS _row_number_dedup_], ['a, 'b], ['a ASC NULLS FIRST, 'b ASC NULLS FIRST] - * +- Filter (isnotnull('a) AND isnotnull('b)) - * +- ... + * LogicalProject(...) + * +- LogicalFilter(condition=[<=(_row_number_dedup_, n)])) + * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY a, b)]) + * +- LogicalFilter(condition=[AND(IS NOT NULL(a), IS NOT NULL(b))]) + * +- ... */ // Filter (isnotnull('a) AND isnotnull('b)) + String rowNumberAlias = + fromJoinMaxOption ? ROW_NUMBER_COLUMN_FOR_JOIN_MAX_DEDUP : ROW_NUMBER_COLUMN_FOR_DEDUP; context.relBuilder.filter( context.relBuilder.and(dedupeFields.stream().map(context.relBuilder::isNotNull).toList())); // Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, @@ -1601,15 +1600,15 @@ private static void buildDedupNotNull( .partitionBy(dedupeFields) .orderBy(dedupeFields) .rowsTo(RexWindowBounds.CURRENT_ROW) - .as(ROW_NUMBER_COLUMN_FOR_DEDUP); + .as(rowNumberAlias); context.relBuilder.projectPlus(rowNumber); - RexNode _row_number_dedup_ = context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_DEDUP); + RexNode rowNumberField = context.relBuilder.field(rowNumberAlias); // Filter ('_row_number_dedup_ <= n) context.relBuilder.filter( context.relBuilder.lessThanOrEqual( - _row_number_dedup_, context.relBuilder.literal(allowedDuplication))); + rowNumberField, context.relBuilder.literal(allowedDuplication))); // DropColumns('_row_number_dedup_) - context.relBuilder.projectExcept(_row_number_dedup_); + context.relBuilder.projectExcept(rowNumberField); } @Override @@ -2378,25 +2377,6 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) { return context.relBuilder.peek(); } - private static void addIgnoreNullBucketHintToAggregate(CalcitePlanContext context) { - final RelHint statHits = - RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); - assert context.relBuilder.peek() instanceof LogicalAggregate - : "Stats hits should be added to LogicalAggregate"; - context.relBuilder.hints(statHits); - context - .relBuilder - .getCluster() - .setHintStrategies( - HintStrategyTable.builder() - .hintStrategy( - "stats_args", - (hint, rel) -> { - return rel instanceof LogicalAggregate; - }) - .build()); - } - @Override public RelNode visitTableFunction(TableFunction node, CalcitePlanContext context) { throw new CalciteUnsupportedException("Table function is unsupported in Calcite"); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index cd9abcf32ba..50e03fc608f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -25,15 +25,21 @@ import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.hint.HintStrategyTable; +import org.apache.calcite.rel.hint.RelHint; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexVisitorImpl; @@ -45,8 +51,11 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.Pair; import org.apache.calcite.util.Util; +import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.mapping.Mappings; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.IntervalUnit; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.WindowBound; @@ -62,6 +71,7 @@ public interface PlanUtils { /** this is only for dedup command, do not reuse it in other command */ String ROW_NUMBER_COLUMN_FOR_DEDUP = "_row_number_dedup_"; + String ROW_NUMBER_COLUMN_FOR_JOIN_MAX_DEDUP = "_row_number_join_max_dedup_"; String ROW_NUMBER_COLUMN_FOR_RARE_TOP = "_row_number_rare_top_"; String ROW_NUMBER_COLUMN_FOR_MAIN = "_row_number_main_"; String ROW_NUMBER_COLUMN_FOR_SUBSEARCH = "_row_number_subsearch_"; @@ -449,18 +459,15 @@ static RexNode derefMapCall(RexNode rexNode) { return rexNode; } - /** Check if contains RexOver introduced by dedup */ - static boolean containsRowNumberDedup(LogicalProject project) { - return project.getProjects().stream() - .anyMatch(p -> p instanceof RexOver && p.getKind() == SqlKind.ROW_NUMBER) - && project.getRowType().getFieldNames().contains(ROW_NUMBER_COLUMN_FOR_DEDUP); + /** Check if contains dedup */ + static boolean containsRowNumberDedup(RelNode node) { + return node.getRowType().getFieldNames().stream().anyMatch(ROW_NUMBER_COLUMN_FOR_DEDUP::equals); } - /** Check if contains RexOver introduced by dedup top/rare */ - static boolean containsRowNumberRareTop(LogicalProject project) { - return project.getProjects().stream() - .anyMatch(p -> p instanceof RexOver && p.getKind() == SqlKind.ROW_NUMBER) - && project.getRowType().getFieldNames().contains(ROW_NUMBER_COLUMN_FOR_RARE_TOP); + /** Check if contains dedup for top/rare */ + static boolean containsRowNumberRareTop(RelNode node) { + return node.getRowType().getFieldNames().stream() + .anyMatch(ROW_NUMBER_COLUMN_FOR_RARE_TOP::equals); } /** Get all RexWindow list from LogicalProject */ @@ -508,10 +515,6 @@ static boolean distinctProjectList(LogicalProject project) { return project.getNamedProjects().stream().allMatch(rexSet::add); } - static boolean containsRexOver(LogicalProject project) { - return project.getProjects().stream().anyMatch(RexOver::containsOver); - } - /** * The LogicalSort is a LIMIT that should be pushed down when its fetch field is not null and its * collation is empty. For example: sort name | head 5 should not be pushed down @@ -524,7 +527,7 @@ static boolean isLogicalSortLimit(LogicalSort sort) { return sort.fetch != null; } - static boolean projectContainsExpr(Project project) { + static boolean containsRexCall(Project project) { return project.getProjects().stream().anyMatch(p -> p instanceof RexCall); } @@ -595,4 +598,58 @@ static void replaceTop(RelBuilder relBuilder, RelNode relNode) { throw new IllegalStateException("Unable to invoke RelBuilder.replaceTop", e); } } + + static void addIgnoreNullBucketHintToAggregate(RelBuilder relBuilder) { + final RelHint statHits = + RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); + assert relBuilder.peek() instanceof LogicalAggregate + : "Stats hits should be added to LogicalAggregate"; + relBuilder.hints(statHits); + relBuilder + .getCluster() + .setHintStrategies( + HintStrategyTable.builder() + .hintStrategy( + "stats_args", + (hint, rel) -> { + return rel instanceof LogicalAggregate; + }) + .build()); + } + + /** Extract the RexLiteral from the aggregate call if the aggregate call is a LITERAL_AGG. */ + static @Nullable RexLiteral getObjectFromLiteralAgg(AggregateCall aggCall) { + if (aggCall.getAggregation().kind == SqlKind.LITERAL_AGG) { + return (RexLiteral) + aggCall.rexList.stream().filter(rex -> rex instanceof RexLiteral).findAny().orElse(null); + } else { + return null; + } + } + + /** + * This is a helper method to create a target mapping easily for replacing calling {@link + * Mappings#target(List, int)} + * + * @param rexNodes the rex list in schema + * @param schema the schema which contains the rex list + * @return the target mapping + */ + static Mapping mapping(List rexNodes, RelDataType schema) { + return Mappings.target(getSelectColumns(rexNodes), schema.getFieldCount()); + } + + static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) { + RexNode condition = filter.getCondition(); + return isNotNullOnRef(condition) + || (condition instanceof RexCall rexCall + && rexCall.getOperator().equals(SqlStdOperatorTable.AND) + && rexCall.getOperands().stream().allMatch(PlanUtils::isNotNullOnRef)); + } + + private static boolean isNotNullOnRef(RexNode rex) { + return rex instanceof RexCall rexCall + && rexCall.isA(SqlKind.IS_NOT_NULL) + && rexCall.getOperands().get(0) instanceof RexInputRef; + } } diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java index 9234c0a2352..3ec5f954984 100644 --- a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java +++ b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java @@ -62,10 +62,18 @@ default Optional getOriginalPath() { } /** - * Get the original path. Types like alias type should be derived from the type of the original - * field. + * Get the original expr path. Types like alias type should be derived from the type of the + * original field. */ default ExprType getOriginalExprType() { return this; } + + /** + * Get the original data type. Types like alias type should be derived from the type of the + * original field. + */ + default ExprType getOriginalType() { + return this; + } } diff --git a/docs/user/ppl/interfaces/endpoint.rst b/docs/user/ppl/interfaces/endpoint.rst index b4acc21d8f4..08032ad6cda 100644 --- a/docs/user/ppl/interfaces/endpoint.rst +++ b/docs/user/ppl/interfaces/endpoint.rst @@ -117,12 +117,12 @@ Explain query:: sh$ curl -sS -H 'Content-Type: application/json' \ ... -X POST localhost:9200/_plugins/_ppl/_explain?format=extended \ - ... -d '{"query" : "source=state_country | where age>30 | dedup age"}' + ... -d '{"query" : "source=state_country | where age>30"}' { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[<=($12, 1)])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5], _id=[$6], _index=[$7], _score=[$8], _maxscore=[$9], _sort=[$10], _routing=[$11], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $5 ORDER BY $5)])\n LogicalFilter(condition=[IS NOT NULL($5)])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..6=[{inputs}], expr#7=[1], expr#8=[<=($t6, $t7)], proj#0..5=[{exprs}], $condition=[$t8])\n EnumerableWindow(window#0=[window(partition {5} order by [5] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n", - "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n int prevStart;\n int prevEnd;\n final java.util.Comparator comparator = new java.util.Comparator(){\n public int compare(Object[] v0, Object[] v1) {\n final int c;\n c = org.apache.calcite.runtime.Utilities.compareNullsLast((Long) v0[5], (Long) v1[5]);\n if (c != 0) {\n return c;\n }\n return 0;\n }\n\n public int compare(Object o0, Object o1) {\n return this.compare((Object[]) o0, (Object[]) o1);\n }\n\n };\n final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap();\n v1stashed.scan().foreach(new org.apache.calcite.linq4j.function.Function1() {\n public Object apply(Object[] v) {\n Long key = (Long) v[5];\n multiMap.putMulti(key, v);\n return null;\n }\n public Object apply(Object v) {\n return apply(\n (Object[]) v);\n }\n }\n );\n final java.util.Iterator iterator = multiMap.arrays(comparator);\n final java.util.ArrayList _list = new java.util.ArrayList(\n multiMap.size());\n Long a0w0 = (Long) null;\n while (iterator.hasNext()) {\n final Object[] _rows = (Object[]) iterator.next();\n prevStart = -1;\n prevEnd = 2147483647;\n for (int i = 0; i < _rows.length; (++i)) {\n final Object[] row = (Object[]) _rows[i];\n if (i != prevEnd) {\n int actualStart = i < prevEnd ? 0 : prevEnd + 1;\n prevEnd = i;\n a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue());\n }\n _list.add(new Object[] {\n row[0],\n row[1],\n row[2],\n row[3],\n row[4],\n row[5],\n a0w0});\n }\n }\n multiMap.clear();\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable = org.apache.calcite.linq4j.Linq4j.asEnumerable(_list);\n final org.apache.calcite.linq4j.AbstractEnumerable child = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[6]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Object input_value = current[0];\n final Object input_value0 = current[1];\n final Object input_value1 = current[2];\n final Object input_value2 = current[3];\n final Object input_value3 = current[4];\n final Object input_value4 = current[5];\n return new Object[] {\n input_value,\n input_value0,\n input_value1,\n input_value2,\n input_value3,\n input_value4};\n }\n\n static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue();\n };\n }\n\n };\n return child.take(10000);\n}\n\n\npublic Class getElementType() {\n return java.lang.Object[].class;\n}\n\n\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n", + "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n return v1stashed.scan();\n}\n\n\npublic Class getElementType() {\n return java.lang.Object[].class;\n}\n\n\n" } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index c2dce34fc38..6c90f8aa72b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -82,25 +82,29 @@ public void supportSearchSargPushDown_timeRange() throws IOException { } // Only for Calcite - @Ignore("https://github.com/opensearch-project/OpenSearch/issues/3725") + @Test public void testJoinWithCriteriaAndMaxOption() throws IOException { + // TODO could be optimized with https://github.com/opensearch-project/OpenSearch/issues/3725 + enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_bank | join max=1 left=l right=r on" + " l.account_number=r.account_number opensearch-sql_test_index_bank"; - var result = explainQueryToString(query); - String expected = loadExpectedPlan("explain_join_with_criteria_max_option.json"); - assertJsonEqualsIgnoreId(expected, result); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_join_with_criteria_max_option.yaml"); + assertYamlEqualsIgnoreId(expected, result); } // Only for Calcite - @Ignore("https://github.com/opensearch-project/OpenSearch/issues/3725") + @Test public void testJoinWithFieldListAndMaxOption() throws IOException { + // TODO could be optimized with https://github.com/opensearch-project/OpenSearch/issues/3725 + enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_bank | join type=inner max=1 account_number" + " opensearch-sql_test_index_bank"; - var result = explainQueryToString(query); - String expected = loadExpectedPlan("explain_join_with_fields_max_option.json"); - assertJsonEqualsIgnoreId(expected, result); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_join_with_fields_max_option.yaml"); + assertYamlEqualsIgnoreId(expected, result); } // Only for Calcite @@ -1788,4 +1792,115 @@ public void testInternalItemAccessOnStructs() throws IOException { + " info.dummy_sub_field", TEST_INDEX_WEBLOGS))); } + + @Test + public void testComplexDedup() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_complex1.yaml"); + assertYamlEqualsIgnoreId( + expected, explainQueryYaml("source=opensearch-sql_test_index_account | dedup 1 gender")); + expected = loadExpectedPlan("explain_dedup_complex2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " dedup 1 gender, state")); + expected = loadExpectedPlan("explain_dedup_complex3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml("source=opensearch-sql_test_index_account | dedup 2 gender, state")); + expected = loadExpectedPlan("explain_dedup_complex4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " dedup 2 gender, state")); + } + + @Ignore("https://github.com/opensearch-project/sql/issues/4789") + public void testDedupExpr() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_expr1.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | dedup 1" + + " new_gender")); + expected = loadExpectedPlan("explain_dedup_expr2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval new_gender = lower(gender), new_state = lower(state) | dedup 1 new_gender," + + " new_state")); + expected = loadExpectedPlan("explain_dedup_expr3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | eval" + + " new_state = lower(state) | dedup 2 new_gender, new_state")); + expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval new_gender = lower(gender) | eval new_state = lower(state) | sort gender," + + " -state | dedup 2 new_gender, new_state")); + } + + @Ignore("https://github.com/opensearch-project/sql/issues/4789") + public void testDedupRename() throws IOException { + // rename changes nothing, reuse the same yaml files of testDedupExpr() + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_expr1.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval tmp_gender = lower(gender) | rename" + + " tmp_gender as new_gender | dedup 1 new_gender")); + expected = loadExpectedPlan("explain_dedup_expr2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval tmp_gender = lower(gender), tmp_state = lower(state) | rename tmp_gender" + + " as new_gender | rename tmp_state as new_state | dedup 1 new_gender," + + " new_state")); + expected = loadExpectedPlan("explain_dedup_expr3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval tmp_gender = lower(gender) | eval" + + " tmp_state = lower(state) | rename tmp_gender as new_gender | rename tmp_state" + + " as new_state | dedup 2 new_gender, new_state")); + expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename" + + " tmp_gender as new_gender | rename tmp_state as new_state | sort gender," + + " -state | dedup 2 new_gender, new_state")); + } + + @Ignore("SortExprIndexScanRule not work?") + public void testDedupRename2() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename" + + " tmp_gender as new_gender | rename tmp_state as new_state | sort new_gender," + + " -new_state | dedup 2 new_gender, new_state")); + } + + @Test + public void testDedupTextTypeNotPushdown() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_text_type_no_push.yaml"); + assertYamlEqualsIgnoreId( + expected, explainQueryYaml(String.format("source=%s | dedup email", TEST_INDEX_BANK))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java index fc9604d62fd..3a2e6d69f3b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java @@ -248,4 +248,119 @@ public void testReorderDedupFieldsShouldNotAffectResult() throws IOException { schema("category", null, "string"), schema("id", null, "int")); } + + @Test + public void testDedupComplex() throws IOException { + JSONObject actual = + executeQuery(String.format("source=%s | dedup 1 name", TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", "A", 1), + rows("Z", "B", 1), + rows("X", "C", 1), + rows("Z", "D", 1), + rows(null, "E", 1)); + actual = + executeQuery( + String.format( + "source=%s | fields category, name | dedup 1 name", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, rows("X", "A"), rows("Z", "B"), rows("X", "C"), rows("Z", "D"), rows(null, "E")); + actual = + executeQuery( + String.format("source=%s | dedup 1 name, category", TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", "A", 1), + rows("Y", "A", 1), + rows("Y", "B", 1), + rows("Z", "B", 1), + rows("X", "C", 1), + rows("Z", "D", 1)); + actual = + executeQuery( + String.format( + "source=%s | fields category, id, name | dedup 2 name, category", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", 1, "A"), + rows("X", 1, "A"), + rows("Y", 1, "A"), + rows("Y", 1, "A"), + rows("Y", 1, "B"), + rows("Z", 1, "B"), + rows("Z", 1, "B"), + rows("X", 1, "C"), + rows("X", 1, "C"), + rows("Z", 1, "D")); + } + + @Test + public void testDedupExpr() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval new_name = lower(name) | dedup 1 new_name", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", "A", 1, "a"), + rows("Z", "B", 1, "b"), + rows("X", "C", 1, "c"), + rows("Z", "D", 1, "d"), + rows(null, "E", 1, "e")); + actual = + executeQuery( + String.format( + "source=%s | fields category, name, id | eval new_name = lower(name), new_category" + + " = lower(category) | dedup 1 new_name, new_category", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", "C", 1, "c", "x"), + rows("Z", "D", 1, "d", "z"), + rows("X", "A", 1, "a", "x"), + rows("Y", "B", 1, "b", "y"), + rows("Y", "A", 1, "a", "y"), + rows("Z", "B", 1, "b", "z")); + actual = + executeQuery( + String.format( + "source=%s | eval new_name = lower(name), new_category = lower(category) | dedup 2" + + " name, category", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("Y", "A", 1, "a", "y"), + rows("Y", "A", 1, "a", "y"), + rows("Z", "B", 1, "b", "z"), + rows("Z", "B", 1, "b", "z"), + rows("X", "A", 1, "a", "x"), + rows("X", "A", 1, "a", "x"), + rows("Y", "B", 1, "b", "y"), + rows("Z", "D", 1, "d", "z"), + rows("X", "C", 1, "c", "x"), + rows("X", "C", 1, "c", "x")); + actual = + executeQuery( + String.format( + "source=%s | fields category, id, name | eval new_name = lower(name) | eval" + + " new_category = lower(category) | sort name, -category | dedup 2 new_name," + + " new_category", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", 1, "C", "c", "x"), + rows("X", 1, "C", "c", "x"), + rows("Z", 1, "D", "d", "z"), + rows("X", 1, "A", "a", "x"), + rows("X", 1, "A", "a", "x"), + rows("Y", 1, "B", "b", "y"), + rows("Y", 1, "A", "a", "y"), + rows("Y", 1, "A", "a", "y"), + rows("Z", 1, "B", "b", "z"), + rows("Z", 1, "B", "b", "z")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 2319cf703a4..decde50aed3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -484,32 +484,32 @@ public void testStatsByTimeSpan() throws IOException { TEST_INDEX_BANK))); } - @Ignore("https://github.com/opensearch-project/OpenSearch/issues/3725") + @Test public void testDedupPushdown() throws IOException { - String expected = loadExpectedPlan("explain_dedup_push.json"); - assertJsonEqualsIgnoreId( + String expected = loadExpectedPlan("explain_dedup_push.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age" + " | dedup 1 gender")); } @Test - public void testDedupKeepEmptyTruePushdown() throws IOException { - String expected = loadExpectedPlan("explain_dedup_keepempty_true_push.json"); - assertJsonEqualsIgnoreId( + public void testDedupKeepEmptyTrueNotPushed() throws IOException { + String expected = loadExpectedPlan("explain_dedup_keepempty_true_not_pushed.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age" + " | dedup gender KEEPEMPTY=true")); } - @Ignore("https://github.com/opensearch-project/OpenSearch/issues/3725") + @Test public void testDedupKeepEmptyFalsePushdown() throws IOException { - String expected = loadExpectedPlan("explain_dedup_keepempty_false_push.json"); - assertJsonEqualsIgnoreId( + String expected = loadExpectedPlan("explain_dedup_keepempty_false_push.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age" + " | dedup gender KEEPEMPTY=false")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index fb5c0be026b..b7e030e1295 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -425,7 +425,9 @@ private static String eliminateTimeStamp(String s) { } private static String eliminateRelId(String s) { - return s.replaceAll("rel#\\d+", "rel#").replaceAll("RelSubset#\\d+", "RelSubset#"); + return s.replaceAll("rel#\\d+", "rel#") + .replaceAll("RelSubset#\\d+", "RelSubset#") + .replaceAll("LogicalProject#\\d+", "LogicalProject#"); } private static String eliminatePid(String s) { @@ -452,6 +454,7 @@ private static String cleanUpYaml(String s) { return s.replaceAll("\"utcTimestamp\":\\d+", "\"utcTimestamp\": 0") .replaceAll("rel#\\d+", "rel#") .replaceAll("RelSubset#\\d+", "RelSubset#") + .replaceAll("LogicalProject#\\d+", "LogicalProject#") .replaceAll("pitId=[^,]+,", "pitId=*,"); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml new file mode 100644 index 00000000000..1b4c379f9b6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[<=($17, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + LogicalFilter(condition=[IS NOT NULL($4)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml new file mode 100644 index 00000000000..6d5c76b1443 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3]) + LogicalFilter(condition=[<=($4, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1, $3)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml new file mode 100644 index 00000000000..af72f304286 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[<=($17, 2)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $7 ORDER BY $4, $7)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml new file mode 100644 index 00000000000..a2385ccdbb2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3]) + LogicalFilter(condition=[<=($4, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1, $3)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.json deleted file mode 100644 index d662486df85..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)])\n LogicalFilter(condition=[IS NOT NULL($1)])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], FILTER->IS NOT NULL($1), COLLAPSE->gender, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"gender\",\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]},\"collapse\":{\"field\":\"gender.keyword\"}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml new file mode 100644 index 00000000000..c16dde54e22 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml new file mode 100644 index 00000000000..978cedd813f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t1)], expr#5=[1], expr#6=[<=($t3, $t5)], expr#7=[OR($t4, $t6)], proj#0..2=[{exprs}], $condition=[$t7]) + EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_push.json deleted file mode 100644 index 92b6103864f..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t1)], expr#5=[1], expr#6=[<=($t3, $t5)], expr#7=[OR($t4, $t6)], proj#0..2=[{exprs}], $condition=[$t7])\n EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.json deleted file mode 100644 index d662486df85..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)])\n LogicalFilter(condition=[IS NOT NULL($1)])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], FILTER->IS NOT NULL($1), COLLAPSE->gender, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"gender\",\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]},\"collapse\":{\"field\":\"gender.keyword\"}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml new file mode 100644 index 00000000000..c16dde54e22 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml new file mode 100644 index 00000000000..451fc8ff5d7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($19, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $11 ORDER BY $11)]) + LogicalFilter(condition=[IS NOT NULL($11)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) + EnumerableWindow(window#0=[window(partition {11} order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($11)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"email","boost":1.0}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.json deleted file mode 100644 index 08db116a7c9..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], r.account_number=[$13], r.firstname=[$14], r.address=[$15], r.birthdate=[$16], r.gender=[$17], r.city=[$18], r.lastname=[$19], r.balance=[$20], r.employer=[$21], r.state=[$22], r.age=[$23], r.email=[$24], r.male=[$25])\n LogicalJoin(condition=[=($0, $13)], joinType=[inner])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[<=($13, 1)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT->[{\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($0), COLLAPSE->account_number, SORT->[{\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"account_number\",\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}}],\"collapse\":{\"field\":\"account_number\"}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml new file mode 100644 index 00000000000..7b4e5516c04 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml @@ -0,0 +1,28 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], r.account_number=[$13], r.firstname=[$14], r.address=[$15], r.birthdate=[$16], r.gender=[$17], r.city=[$18], r.lastname=[$19], r.balance=[$20], r.employer=[$21], r.state=[$22], r.age=[$23], r.email=[$24], r.male=[$25]) + LogicalJoin(condition=[=($0, $13)], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT->[{ + "account_number" : { + "order" : "asc", + "missing" : "_last" + } + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) + EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[IS NOT NULL($t0)], proj#0..12=[{exprs}], $condition=[$t13]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.json deleted file mode 100644 index c1ee2aa0b30..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$13], firstname=[$14], address=[$15], birthdate=[$16], gender=[$17], city=[$18], lastname=[$19], balance=[$20], employer=[$21], state=[$22], age=[$23], email=[$24], male=[$25])\n LogicalJoin(condition=[=($0, $13)], joinType=[inner])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[<=($13, 1)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableCalc(expr#0..13=[{inputs}], proj#0..12=[{exprs}])\n EnumerableLimit(fetch=[10000])\n EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($0), COLLAPSE->account_number, SORT->[{\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"account_number\",\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}}],\"collapse\":{\"field\":\"account_number\"}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number], SORT->[{\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml new file mode 100644 index 00000000000..71ab58f77d3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml @@ -0,0 +1,29 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$13], firstname=[$14], address=[$15], birthdate=[$16], gender=[$17], city=[$18], lastname=[$19], balance=[$20], employer=[$21], state=[$22], age=[$23], email=[$24], male=[$25]) + LogicalJoin(condition=[=($0, $13)], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..13=[{inputs}], account_number=[$t1], firstname=[$t2], address=[$t3], birthdate=[$t4], gender=[$t5], city=[$t6], lastname=[$t7], balance=[$t8], employer=[$t9], state=[$t10], age=[$t11], email=[$t12], male=[$t13]) + EnumerableLimit(fetch=[10000]) + EnumerableMergeJoin(condition=[=($0, $1)], joinType=[inner]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number], SORT->[{ + "account_number" : { + "order" : "asc", + "missing" : "_last" + } + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) + EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[IS NOT NULL($t0)], proj#0..12=[{exprs}], $condition=[$t13]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.json deleted file mode 100644 index 625dc968ab4..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)])\n LogicalFilter(condition=[IS NOT NULL($1)])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19])\n EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml new file mode 100644 index 00000000000..2706c009421 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19]) + EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml new file mode 100644 index 00000000000..ee74cf48e4a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NULL($t4)], expr#19=[1], expr#20=[<=($t17, $t19)], expr#21=[OR($t18, $t20)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t21]) + EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_push.json deleted file mode 100644 index d1592e9fa89..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NULL($t4)], expr#19=[1], expr#20=[<=($t17, $t19)], expr#21=[OR($t18, $t20)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t21])\n EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.json deleted file mode 100644 index 625dc968ab4..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)])\n LogicalFilter(condition=[IS NOT NULL($1)])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19])\n EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml new file mode 100644 index 00000000000..2706c009421 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19]) + EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.json deleted file mode 100644 index 11ca44cdea2..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], r.account_number=[$13], r.firstname=[$14], r.address=[$15], r.birthdate=[$16], r.gender=[$17], r.city=[$18], r.lastname=[$19], r.balance=[$20], r.employer=[$21], r.state=[$22], r.age=[$23], r.email=[$24], r.male=[$25])\n LogicalJoin(condition=[=($0, $13)], joinType=[inner])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[<=($13, 1)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..18=[{inputs}], proj#0..12=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..19=[{inputs}], expr#20=[1], expr#21=[<=($t19, $t20)], proj#0..12=[{exprs}], $condition=[$t21])\n EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t0)], proj#0..18=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.json deleted file mode 100644 index a2b931bba32..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$13], firstname=[$14], address=[$15], birthdate=[$16], gender=[$17], city=[$18], lastname=[$19], balance=[$20], employer=[$21], state=[$22], age=[$23], email=[$24], male=[$25])\n LogicalJoin(condition=[=($0, $13)], joinType=[inner])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[<=($13, 1)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableCalc(expr#0..13=[{inputs}], account_number=[$t1], firstname=[$t2], address=[$t3], birthdate=[$t4], gender=[$t5], city=[$t6], lastname=[$t7], balance=[$t8], employer=[$t9], state=[$t10], age=[$t11], email=[$t12], male=[$t13])\n EnumerableLimit(fetch=[10000])\n EnumerableMergeJoin(condition=[=($0, $1)], joinType=[inner])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..18=[{inputs}], account_number=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..19=[{inputs}], expr#20=[1], expr#21=[<=($t19, $t20)], proj#0..12=[{exprs}], $condition=[$t21])\n EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t0)], proj#0..18=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.json deleted file mode 100644 index 4f85572e388..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "DedupeOperator", - "description": { - "dedupeList": "[gender]", - "allowedDuplication": 1, - "keepEmpty": false, - "consecutive": false - }, - "children": [{ - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - }] - }] - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml new file mode 100644 index 00000000000..7ff290348e9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml @@ -0,0 +1,24 @@ +root: + name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: DedupeOperator + description: + dedupeList: "[gender]" + allowedDuplication: 1 + keepEmpty: false + consecutive: false + children: + - name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ + excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml new file mode 100644 index 00000000000..3a711878435 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml @@ -0,0 +1,24 @@ +root: + name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: DedupeOperator + description: + dedupeList: "[gender]" + allowedDuplication: 1 + keepEmpty: true + consecutive: false + children: + - name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ + excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_push.json deleted file mode 100644 index 46fa0793af9..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_push.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "DedupeOperator", - "description": { - "dedupeList": "[gender]", - "allowedDuplication": 1, - "keepEmpty": true, - "consecutive": false - }, - "children": [{ - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - }] - }] - } -} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.json deleted file mode 100644 index e7728735ee0..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "DedupeOperator", - "description": { - "dedupeList": "[gender]", - "allowedDuplication": 1, - "keepEmpty": false, - "consecutive": false - }, - "children": [{ - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - }] - }] - } -} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml new file mode 100644 index 00000000000..7ff290348e9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml @@ -0,0 +1,24 @@ +root: + name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: DedupeOperator + description: + dedupeList: "[gender]" + allowedDuplication: 1 + keepEmpty: false + consecutive: false + children: + - name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ + excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4563_4664.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4563_4664.yml index b5aa11876bb..f5ecf4eb1cf 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4563_4664.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4563_4664.yml @@ -43,11 +43,11 @@ teardown: Content-Type: 'application/json' ppl: body: - query: source=test | rename status as http_status | dedup http_status | fields http_status + query: source=test | rename status as http_status | dedup http_status | fields http_status | sort http_status - match: { total: 3 } - match: { schema: [{"name": "http_status", "type": "string"}] } - - match: { datarows: [["200"], ["500"], ["404"]] } + - match: { datarows: [["200"], ["404"], ["500"]] } --- "4664: Test rename then filter": diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchAliasType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchAliasType.java index eedd2b1eef0..1954134c11b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchAliasType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchAliasType.java @@ -8,6 +8,7 @@ import java.util.List; import java.util.Optional; import java.util.Set; +import lombok.Getter; import org.opensearch.sql.data.type.ExprType; /** @@ -21,7 +22,7 @@ public class OpenSearchAliasType extends OpenSearchDataType { public static final Set objectFieldTypes = Set.of(MappingType.Object, MappingType.Nested); private final String path; - private final OpenSearchDataType originalType; + @Getter private final OpenSearchDataType originalType; public OpenSearchAliasType(String path, OpenSearchDataType type) { super(type.getExprCoreType()); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 2aca2ad334e..fc5610d73f0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -259,7 +259,7 @@ private ExprValue parseContent(Content content) { * value. For example, {"empty_field": []}. */ private Optional type(String field) { - return Optional.ofNullable(typeMapping.get(field)); + return Optional.ofNullable(typeMapping.get(field)).map(ExprType::getOriginalType); } /** @@ -316,6 +316,11 @@ private static ExprValue parseDateTimeString(String value, OpenSearchDateType da } private static ExprValue createOpenSearchDateType(Content value, ExprType type) { + return createOpenSearchDateType(value, type, false); + } + + private static ExprValue createOpenSearchDateType( + Content value, ExprType type, Boolean supportArrays) { OpenSearchDateType dt = (OpenSearchDateType) type; ExprCoreType returnFormat = dt.getExprCoreType(); if (value.isNumber()) { // isNumber diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java index d87611f213b..0f6c654df79 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java @@ -164,7 +164,7 @@ public interface Config extends OpenSearchRuleConfig { // 1. No RexOver and no duplicate projection // 2. Contains width_bucket function on date field referring // to bin command with parameter bins - Predicate.not(PlanUtils::containsRexOver) + Predicate.not(LogicalProject::containsOver) .and(PlanUtils::distinctProjectList) .or(Config::containsWidthBucketFuncOnDate)) .oneInput( @@ -225,7 +225,7 @@ public interface Config extends OpenSearchRuleConfig { .oneInput( b1 -> b1.operand(LogicalFilter.class) - .predicate(Config::mayBeFilterFromBucketNonNull) + .predicate(PlanUtils::mayBeFilterFromBucketNonNull) .oneInput( b2 -> b2.operand(LogicalProject.class) @@ -236,7 +236,7 @@ public interface Config extends OpenSearchRuleConfig { // 2. Contains width_bucket function on date // field referring // to bin command with parameter bins - Predicate.not(PlanUtils::containsRexOver) + Predicate.not(LogicalProject::containsOver) .and(PlanUtils::distinctProjectList) .or(Config::containsWidthBucketFuncOnDate)) .oneInput( @@ -263,18 +263,19 @@ public interface Config extends OpenSearchRuleConfig { b1 -> b1.operand(LogicalProject.class) .predicate( - Predicate.not(PlanUtils::containsRexOver) + Predicate.not(LogicalProject::containsOver) .and(PlanUtils::distinctProjectList)) .oneInput( b2 -> b2.operand(LogicalFilter.class) - .predicate(Config::mayBeFilterFromBucketNonNull) + .predicate(PlanUtils::mayBeFilterFromBucketNonNull) .oneInput( b3 -> b3.operand(LogicalProject.class) .predicate( Predicate.not( - PlanUtils::containsRexOver) + LogicalProject + ::containsOver) .and( PlanUtils ::distinctProjectList) @@ -300,21 +301,6 @@ default AggregateIndexScanRule toRule() { return new AggregateIndexScanRule(this); } - static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) { - RexNode condition = filter.getCondition(); - return isNotNullOnRef(condition) - || (condition instanceof RexCall rexCall - && rexCall.getOperator().equals(SqlStdOperatorTable.AND) - && rexCall.getOperands().stream() - .allMatch(AggregateIndexScanRule.Config::isNotNullOnRef)); - } - - private static boolean isNotNullOnRef(RexNode rex) { - return rex instanceof RexCall rexCall - && rexCall.isA(SqlKind.IS_NOT_NULL) - && rexCall.getOperands().get(0) instanceof RexInputRef; - } - static boolean containsWidthBucketFuncOnDate(LogicalProject project) { return project.getProjects().stream() .anyMatch( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index bdace1bd0a3..068900d3f18 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -5,20 +5,23 @@ package org.opensearch.sql.opensearch.planner.rules; -import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; - +import java.util.ArrayList; import java.util.List; import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindow; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.mapping.Mapping; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.immutables.value.Value; @@ -37,106 +40,260 @@ protected DedupPushdownRule(Config config) { @Override public void onMatch(RelOptRuleCall call) { - final LogicalProject finalOutput = call.rel(0); - // TODO Used when number of duplication is more than 1 + final LogicalProject finalProject = call.rel(0); final LogicalFilter numOfDedupFilter = call.rel(1); final LogicalProject projectWithWindow = call.rel(2); - final CalciteLogicalIndexScan scan = call.rel(3); - List windows = PlanUtils.getRexWindowFromProject(projectWithWindow); - if (windows.isEmpty() || windows.stream().anyMatch(w -> w.partitionKeys.size() > 1)) { - // TODO leverage inner_hits for multiple partition keys - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup with multiple fields"); - } - return; + if (call.rels.length == 5) { + final CalciteLogicalIndexScan scan = call.rel(4); + apply(call, finalProject, numOfDedupFilter, projectWithWindow, scan); + } else { + throw new AssertionError( + String.format( + "The length of rels should be %s but got %s", + this.operands.size(), call.rels.length)); } - final List fieldNameList = projectWithWindow.getInput().getRowType().getFieldNames(); - List selectColumns = PlanUtils.getSelectColumns(windows.getFirst().partitionKeys); - String fieldName = fieldNameList.get(selectColumns.getFirst()); + } - CalciteLogicalIndexScan newScan = scan.pushDownCollapse(finalOutput, fieldName); - if (newScan != null) { - call.transformTo(newScan); + protected void apply( + RelOptRuleCall call, + LogicalProject finalProject, + LogicalFilter numOfDedupFilter, + LogicalProject projectWithWindow, + CalciteLogicalIndexScan scan) { + List windows = PlanUtils.getRexWindowFromProject(projectWithWindow); + if (windows.size() != 1) { + return; } - } - private static boolean validFilter(LogicalFilter filter) { - if (filter.getCondition().getKind() != SqlKind.LESS_THAN_OR_EQUAL) { - return false; + List dedupColumns = windows.get(0).partitionKeys; + if (dedupColumns.stream() + .filter(rex -> rex.isA(SqlKind.INPUT_REF)) + .anyMatch(rex -> rex.getType().getSqlTypeName() == SqlTypeName.MAP)) { + LOG.debug("Cannot pushdown the dedup since the dedup fields contains MAP type"); + // TODO https://github.com/opensearch-project/sql/issues/4564 + return; } - List operandsOfCondition = ((RexCall) filter.getCondition()).getOperands(); - RexNode leftOperand = operandsOfCondition.getFirst(); - if (!(leftOperand instanceof RexInputRef ref)) { + if (projectWithWindow.getProjects().stream() + .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) + .filter(Predicate.not(dedupColumns::contains)) + .anyMatch(rex -> !rex.isA(SqlKind.INPUT_REF))) { + // TODO fallback to the approach of Collapse search + // | eval new_age = age + 1 | fields gender, new_age | dedup 1 gender if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup since the left operand is not RexInputRef"); + LOG.debug( + "Cannot pushdown the dedup since the final outputs contain a column which is not" + + " included in table schema"); } - return false; + return; } - String referenceName = filter.getRowType().getFieldNames().get(ref.getIndex()); - if (!referenceName.equals(ROW_NUMBER_COLUMN_FOR_DEDUP)) { + + List rexCallsExceptWindow = + projectWithWindow.getProjects().stream() + .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) + .filter(rex -> rex instanceof RexCall) + .toList(); + if (!rexCallsExceptWindow.isEmpty() + && dedupColumnsContainRexCall(rexCallsExceptWindow, dedupColumns)) { + // TODO https://github.com/opensearch-project/sql/issues/4789 + // | eval new_gender = lower(gender) | fields new_gender, age | dedup 1 new_gender if (LOG.isDebugEnabled()) { - LOG.debug( - "Cannot pushdown the dedup since the left operand is not {}", - ROW_NUMBER_COLUMN_FOR_DEDUP); + LOG.debug("Cannot pushdown the dedup since the dedup columns contain RexCall"); } - return false; + return; } - RexNode rightOperand = operandsOfCondition.getLast(); - if (!(rightOperand instanceof RexLiteral numLiteral)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup since the right operand is not RexLiteral"); + + // must be row_number <= number + assert numOfDedupFilter.getCondition().isA(SqlKind.LESS_THAN_OR_EQUAL); + RexLiteral literal = + (RexLiteral) ((RexCall) numOfDedupFilter.getCondition()).getOperands().getLast(); + Integer dedupNumer = literal.getValueAs(Integer.class); + + // We convert the dedup pushdown to composite aggregate + top_hits: + // Aggregate(literalAgg(dedupNumer), groups) + // +- Project(groups, remaining) + // +- Scan + // Step 1: Initial a RelBuilder to build aggregate by pushing Scan and Project + RelBuilder relBuilder = call.builder(); + relBuilder.push(scan); + // To baseline the rowType, merge the fields() and projectWithWindow + List mergedRexList = new ArrayList<>(); + List mergedFieldNames = new ArrayList<>(); + List builderFields = relBuilder.fields(); + List projectFields = projectWithWindow.getProjects(); + List builderFieldNames = relBuilder.peek().getRowType().getFieldNames(); + List projectFieldNames = projectWithWindow.getRowType().getFieldNames(); + + // Add existing fields with proper names + // For rename case: source = t | rename old as new | dedup new + for (RexNode field : builderFields) { + mergedRexList.add(field); + int projectIndex = projectFields.indexOf(field); + if (projectIndex >= 0) { + mergedFieldNames.add(projectFieldNames.get(projectIndex)); + } else { + mergedFieldNames.add(builderFieldNames.get(builderFields.indexOf(field))); } - return false; } - Integer num = numLiteral.getValueAs(Integer.class); - if (num == null || num > 1) { - // TODO leverage inner_hits for num > 1 - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup since number of duplicate events is larger than 1"); + // Append new fields from project (excluding ROW_NUMBER and duplicates) + for (RexNode field : projectFields) { + if (!field.isA(SqlKind.ROW_NUMBER) && !builderFields.contains(field)) { + mergedRexList.add(field); + mergedFieldNames.add(field.toString()); } - return false; } - return true; + // Force add the project + relBuilder.project(mergedRexList, mergedFieldNames, true); + LogicalProject baseline = (LogicalProject) relBuilder.peek(); + Mapping mappingForDedupColumns = + PlanUtils.mapping(dedupColumns, relBuilder.peek().getRowType()); + + // Step 2: Push a Project which groups is first, then remaining finalOutput columns + List reordered = new ArrayList<>(PlanUtils.getInputRefs(dedupColumns)); + baseline.getProjects().stream() + .filter(Predicate.not(dedupColumns::contains)) + .forEach(reordered::add); + relBuilder.project(reordered); + // childProject includes all list of finalOutput columns + LogicalProject childProject = (LogicalProject) relBuilder.peek(); + + // Step 3: Push an Aggregate + // We push down a LITERAL_AGG with dedupNumer for converting the dedup command to aggregate: + // (1) Pass the dedupNumer to AggregateAnalyzer.processAggregateCalls() + // (2) Distinguish it from an optimization operator and user defined aggregator. + // (LITERAL_AGG is used in optimization normally, see {@link SqlKind#LITERAL_AGG}) + final List newDedupColumns = RexUtil.apply(mappingForDedupColumns, dedupColumns); + relBuilder.aggregate(relBuilder.groupKey(newDedupColumns), relBuilder.literalAgg(dedupNumer)); + // add bucket_nullable = false hint + PlanUtils.addIgnoreNullBucketHintToAggregate(relBuilder); + // peek the aggregate after hint being added + LogicalAggregate aggregate = (LogicalAggregate) relBuilder.build(); + + CalciteLogicalIndexScan newScan = + (CalciteLogicalIndexScan) scan.pushDownAggregate(aggregate, childProject); + if (newScan != null) { + // Reorder back to original order + call.transformTo(newScan.copyWithNewSchema(finalProject.getRowType())); + } + } + + private static boolean dedupColumnsContainRexCall( + List calls, List dedupColumns) { + List dedupColumnIndicesFromCall = + PlanUtils.getSelectColumns(calls).stream().distinct().toList(); + List dedupColumnsIndicesFromPartitionKeys = + PlanUtils.getSelectColumns(dedupColumns).stream().distinct().toList(); + return dedupColumnsIndicesFromPartitionKeys.stream() + .anyMatch(dedupColumnIndicesFromCall::contains); } - /** - * Match fixed pattern:
- * LogicalProject(remove _row_number_dedup_)
- * LogicalFilter(condition=[<=($1, numOfDedup)])
- * LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])
- * LogicalFilter(condition=[IS NOT NULL($0)])
- */ @Value.Immutable public interface Config extends OpenSearchRuleConfig { + // Can only push the case with KEEPEMPTY=false: + // +- LogicalProject(no _row_number_dedup_) + // +- LogicalFilter(condition contains _row_number_dedup_) + // +- LogicalProject(contains _row_number_dedup_) + // +- LogicalFilter(condition=IS NOT NULL(dedupColumn))" + // +- CalciteLogicalIndexScan Config DEFAULT = ImmutableDedupPushdownRule.Config.builder() .build() + .withDescription("Dedup-to-Aggregate") .withOperandSupplier( b0 -> b0.operand(LogicalProject.class) + .predicate(Predicate.not(PlanUtils::containsRowNumberDedup)) .oneInput( b1 -> b1.operand(LogicalFilter.class) - .predicate(DedupPushdownRule::validFilter) + .predicate(Config::validDedupNumberChecker) .oneInput( b2 -> b2.operand(LogicalProject.class) .predicate(PlanUtils::containsRowNumberDedup) .oneInput( b3 -> - b3.operand(CalciteLogicalIndexScan.class) + b3.operand(LogicalFilter.class) .predicate( - Predicate.not( - AbstractCalciteIndexScan - ::isLimitPushed) - .and( - AbstractCalciteIndexScan - ::noAggregatePushed)) - .noInputs())))); + PlanUtils + ::mayBeFilterFromBucketNonNull) + .oneInput( + b4 -> + b4.operand( + CalciteLogicalIndexScan + .class) + .predicate( + Predicate.not( + AbstractCalciteIndexScan + ::isLimitPushed) + .and( + AbstractCalciteIndexScan + ::noAggregatePushed) + .and( + AbstractCalciteIndexScan + ::isProjectPushed)) + .noInputs()))))); + // +- LogicalProject(no _row_number_dedup_) + // +- LogicalFilter(condition contains _row_number_dedup_) + // +- LogicalProject(contains _row_number_dedup_) + // +- LogicalFilter(condition IS NOT NULL(dedupColumn)) + // +- LogicalProject(dedupColumn is call) + // +- CalciteLogicalIndexScan + Config DEDUP_EXPR = + ImmutableDedupPushdownRule.Config.builder() + .build() + .withDescription("DedupWithExpression-to-Aggregate") + .withOperandSupplier( + b0 -> + b0.operand(LogicalProject.class) + .predicate(Predicate.not(PlanUtils::containsRowNumberDedup)) + .oneInput( + b1 -> + b1.operand(LogicalFilter.class) + .predicate(Config::validDedupNumberChecker) + .oneInput( + b2 -> + b2.operand(LogicalProject.class) + .predicate(PlanUtils::containsRowNumberDedup) + .oneInput( + b3 -> + b3.operand(LogicalFilter.class) + .predicate(Config::isNotNull) + .oneInput( + b4 -> + b4.operand(LogicalProject.class) + .predicate( + PlanUtils + ::containsRexCall) + .oneInput( + b5 -> + b5.operand( + CalciteLogicalIndexScan + .class) + .predicate( + Predicate + .not( + AbstractCalciteIndexScan + ::isLimitPushed) + .and( + AbstractCalciteIndexScan + ::noAggregatePushed) + .and( + AbstractCalciteIndexScan + ::isProjectPushed)) + .noInputs())))))); @Override default DedupPushdownRule toRule() { return new DedupPushdownRule(this); } + + private static boolean validDedupNumberChecker(LogicalFilter filter) { + return filter.getCondition().isA(SqlKind.LESS_THAN_OR_EQUAL) + && PlanUtils.containsRowNumberDedup(filter); + } + + private static boolean isNotNull(LogicalFilter filter) { + return filter.getCondition().isA(SqlKind.IS_NOT_NULL); + } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java index a09be09a34b..2034eb1c6d8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java @@ -204,7 +204,7 @@ public interface Config extends OpenSearchRuleConfig { b1.operand(EnumerableProject.class) .predicate( Predicate.not(Project::containsOver) - .and(PlanUtils::projectContainsExpr)) + .and(PlanUtils::containsRexCall)) .anyInputs())); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index c991b1964e9..ef55868b202 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -29,6 +29,8 @@ public class OpenSearchIndexRules { SortIndexScanRule.Config.DEFAULT.toRule(); private static final DedupPushdownRule DEDUP_PUSH_DOWN = DedupPushdownRule.Config.DEFAULT.toRule(); + private static final DedupPushdownRule DEDUP_EXPR_PUSH_DOWN = + DedupPushdownRule.Config.DEDUP_EXPR.toRule(); private static final SortProjectExprTransposeRule SORT_PROJECT_EXPR_TRANSPOSE = SortProjectExprTransposeRule.Config.DEFAULT.toRule(); private static final ExpandCollationOnProjectExprRule EXPAND_COLLATION_ON_PROJECT_EXPR = @@ -54,8 +56,9 @@ public class OpenSearchIndexRules { BUCKET_NON_NULL_AGG_WITH_UDF_INDEX_SCAN, LIMIT_INDEX_SCAN, SORT_INDEX_SCAN, - // TODO enable if https://github.com/opensearch-project/OpenSearch/issues/3725 resolved - // DEDUP_PUSH_DOWN, + DEDUP_PUSH_DOWN, + // TODO https://github.com/opensearch-project/sql/issues/4789 + // DEDUP_EXPR_PUSH_DOWN, SORT_PROJECT_EXPR_TRANSPOSE, SORT_AGGREGATION_METRICS_RULE, RARE_TOP_PUSH_DOWN, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java index 9f27bfd3954..fe0fae8e64a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java @@ -134,7 +134,7 @@ public interface Config extends OpenSearchRuleConfig { b1.operand(LogicalProject.class) .predicate( Predicate.not(LogicalProject::containsOver) - .and(PlanUtils::projectContainsExpr)) + .and(PlanUtils::containsRexCall)) .anyInputs())); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 2abfb5a401b..b52d4a06820 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -73,14 +73,17 @@ import org.opensearch.search.aggregations.metrics.ValueCountAggregationBuilder; import org.opensearch.search.aggregations.support.ValueType; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; +import org.opensearch.sql.opensearch.request.PredicateAnalyzer.ScriptQueryExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CountAsTotalHitsParser; @@ -146,14 +149,9 @@ > T build(RexNode node, T sourceBuilde T build(RexNode node, Function fieldBuilder, Function scriptBuilder) { if (node == null) return fieldBuilder.apply(METADATA_FIELD); else if (node instanceof RexInputRef ref) { - return fieldBuilder.apply( - new NamedFieldExpression(ref.getIndex(), rowType.getFieldNames(), fieldTypes) - .getReferenceForTermQuery()); + return fieldBuilder.apply(inferNamedField(node).getReferenceForTermQuery()); } else if (node instanceof RexCall || node instanceof RexLiteral) { - return scriptBuilder.apply( - (new PredicateAnalyzer.ScriptQueryExpression( - node, rowType, fieldTypes, cluster, Collections.emptyMap())) - .getScript()); + return scriptBuilder.apply(inferScript(node).getScript()); } throw new IllegalStateException( String.format("Metric aggregation doesn't support RexNode %s", node)); @@ -167,6 +165,15 @@ NamedFieldExpression inferNamedField(RexNode node) { String.format("Cannot infer field name from RexNode %s", node)); } + ScriptQueryExpression inferScript(RexNode node) { + if (node instanceof RexCall || node instanceof RexLiteral) { + return new ScriptQueryExpression( + node, rowType, fieldTypes, cluster, Collections.emptyMap()); + } + throw new IllegalStateException( + String.format("Metric aggregation doesn't support RexNode %s", node)); + } + T inferValue(RexNode node, Class clazz) { if (node instanceof RexLiteral literal) { return literal.getValueAs(clazz); @@ -341,10 +348,21 @@ private static Pair> processAggregateCalls( return Pair.of(metricBuilder, metricParserList); } + /** + * Convert aggregate arguments through child project. Normally, just return the rex nodes of + * Project which are included in aggCall expression. If the aggCall is a LITERAL_AGG, it returns + * all rex nodes of Project except WindowFunction. + * + * @param aggCall the aggregate call + * @param project the project + * @return the converted RexNode list + */ private static List convertAggArgThroughProject(AggregateCall aggCall, Project project) { return project == null ? List.of() - : aggCall.getArgList().stream().map(project.getProjects()::get).toList(); + : PlanUtils.getObjectFromLiteralAgg(aggCall) != null + ? project.getProjects().stream().filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)).toList() + : aggCall.getArgList().stream().map(project.getProjects()::get).toList(); } private static Pair createAggregationBuilderAndParser( @@ -417,7 +435,7 @@ private static Pair createRegularAggregation( .sort( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), SortOrder.ASC), - new TopHitsParser(aggFieldName, true)); + new TopHitsParser(aggFieldName, true, false)); } } case MAX -> { @@ -436,7 +454,7 @@ private static Pair createRegularAggregation( .sort( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), SortOrder.DESC), - new TopHitsParser(aggFieldName, true)); + new TopHitsParser(aggFieldName, true, false)); } } case VAR_SAMP -> @@ -486,7 +504,7 @@ yield switch (functionName) { helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) .size(helper.inferValue(args.getLast(), Integer.class)) .from(0), - new TopHitsParser(aggFieldName)); + new TopHitsParser(aggFieldName, false, true)); case FIRST -> { TopHitsAggregationBuilder firstBuilder = AggregationBuilders.topHits(aggFieldName).size(1).from(0); @@ -494,7 +512,7 @@ yield switch (functionName) { firstBuilder.fetchField( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()); } - yield Pair.of(firstBuilder, new TopHitsParser(aggFieldName, true)); + yield Pair.of(firstBuilder, new TopHitsParser(aggFieldName, true, false)); } case LAST -> { TopHitsAggregationBuilder lastBuilder = @@ -506,7 +524,7 @@ yield switch (functionName) { lastBuilder.fetchField( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()); } - yield Pair.of(lastBuilder, new TopHitsParser(aggFieldName, true)); + yield Pair.of(lastBuilder, new TopHitsParser(aggFieldName, true, false)); } case PERCENTILE_APPROX -> { PercentilesAggregationBuilder aggBuilder = @@ -530,6 +548,38 @@ yield switch (functionName) { String.format("Unsupported push-down aggregator %s", aggCall.getAggregation())); }; } + case LITERAL_AGG -> { + RexLiteral literal = PlanUtils.getObjectFromLiteralAgg(aggCall); + if (literal == null || !(literal.getValue() instanceof Number)) { + throw new AggregateAnalyzer.AggregateAnalyzerException( + String.format("Unsupported push-down aggregator %s", aggCall.getAggregation())); + } + Integer dedupNumber = literal.getValueAs(Integer.class); + // Disable fetchSource since TopHitsParser only parses fetchField currently. + TopHitsAggregationBuilder topHitsAggregationBuilder = + AggregationBuilders.topHits(aggFieldName).from(0).size(dedupNumber); + List sources = new ArrayList<>(); + List scripts = new ArrayList<>(); + args.forEach( + rex -> { + if (rex instanceof RexInputRef) { + sources.add(helper.inferNamedField(rex).getReference()); + } else if (rex instanceof RexCall || rex instanceof RexLiteral) { + scripts.add( + new SearchSourceBuilder.ScriptField( + rex.toString(), helper.inferScript(rex).getScript(), false)); + } else { + throw new AggregateAnalyzer.AggregateAnalyzerException( + String.format( + "Unsupported push-down aggregator %s due to rex type is %s", + aggCall.getAggregation(), rex.getKind())); + } + }); + topHitsAggregationBuilder.fetchSource( + sources.stream().distinct().toArray(String[]::new), new String[0]); + topHitsAggregationBuilder.scriptFields(scripts); + yield Pair.of(topHitsAggregationBuilder, new TopHitsParser(aggFieldName, false, false)); + } default -> throw new AggregateAnalyzer.AggregateAnalyzerException( String.format("unsupported aggregator %s", aggCall.getAggregation())); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 7a0a18c79ac..5c3ecd11e49 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -73,6 +73,8 @@ public class OpenSearchRequestBuilder { @ToString.Exclude private final Settings settings; + @ToString.Exclude private boolean topHitsAgg = false; + public static class PushDownUnSupportedException extends RuntimeException { public PushDownUnSupportedException(String message) { super(message); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index 4f3e37ac4e6..e35c7efcf7b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -229,7 +229,7 @@ private Iterator handleAggregationResponse() { for (Map.Entry value : entry.entrySet()) { builder.put( value.getKey(), - exprValueFactory.construct(value.getKey(), value.getValue(), false)); + exprValueFactory.construct(value.getKey(), value.getValue(), true)); } return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/ArgMaxMinParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/ArgMaxMinParser.java index 55dacd7081c..597da4e4967 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/ArgMaxMinParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/ArgMaxMinParser.java @@ -6,6 +6,8 @@ package org.opensearch.sql.opensearch.response.agg; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import lombok.Value; import org.opensearch.search.SearchHit; @@ -19,20 +21,22 @@ public class ArgMaxMinParser implements MetricParser { String name; @Override - public Map parse(Aggregation agg) { + public List> parse(Aggregation agg) { TopHits topHits = (TopHits) agg; SearchHit[] hits = topHits.getHits().getHits(); if (hits.length == 0) { - return Collections.singletonMap(agg.getName(), null); + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), null))); } // Get value from fields (fetchField) if (hits[0].getFields() != null && !hits[0].getFields().isEmpty()) { Object value = hits[0].getFields().values().iterator().next().getValue(); - return Collections.singletonMap(agg.getName(), value); + return Collections.singletonList(Collections.singletonMap(agg.getName(), value)); + } else { + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), null))); } - - return Collections.singletonMap(agg.getName(), null); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index db6e4eef248..761aef4fd98 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -6,6 +6,7 @@ package org.opensearch.sql.opensearch.response.agg; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -84,9 +85,11 @@ private boolean isLeafAgg(Aggregations aggregations) { } private List> parseLeafAgg(Aggregations aggregations, long docCount) { - Map resultMap = metricsParser.parse(aggregations); - countAggNameList.forEach(countAggName -> resultMap.put(countAggName, docCount)); - return List.of(resultMap); + List> resultMapList = metricsParser.parse(aggregations); + List> maps = + resultMapList.isEmpty() ? List.of(new HashMap<>()) : resultMapList; + countAggNameList.forEach(countAggName -> maps.forEach(map -> map.put(countAggName, docCount))); + return maps; } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java index 57941311d44..4646ffa9d3e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java @@ -5,11 +5,12 @@ package org.opensearch.sql.opensearch.response.agg; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; import org.opensearch.search.SearchHits; @@ -26,8 +27,6 @@ public class CompositeAggregationParser implements OpenSearchAggregationResponseParser { private final MetricParserHelper metricsParser; - // countAggNameList dedicated the list of count aggregations which are filled by doc_count - private List countAggNameList = List.of(); public CompositeAggregationParser(MetricParser... metricParserList) { metricsParser = new MetricParserHelper(Arrays.asList(metricParserList)); @@ -37,25 +36,17 @@ public CompositeAggregationParser(List metricParserList) { metricsParser = new MetricParserHelper(metricParserList); } - /** CompositeAggregationParser with count aggregation name list, used in v3 */ - public CompositeAggregationParser( - List metricParserList, List countAggNameList) { - metricsParser = new MetricParserHelper(metricParserList); - this.countAggNameList = countAggNameList; - } - @Override public List> parse(Aggregations aggregations) { return ((CompositeAggregation) aggregations.asList().get(0)) - .getBuckets().stream().map(this::parse).collect(Collectors.toList()); + .getBuckets().stream().map(this::parse).flatMap(Collection::stream).toList(); } - private Map parse(CompositeAggregation.Bucket bucket) { - Map resultMap = new HashMap<>(); - resultMap.putAll(bucket.getKey()); - resultMap.putAll(metricsParser.parse(bucket.getAggregations())); - countAggNameList.forEach(name -> resultMap.put(name, bucket.getDocCount())); - return resultMap; + private List> parse(CompositeAggregation.Bucket bucket) { + List> resultMapList = new ArrayList<>(); + resultMapList.add(new HashMap<>(bucket.getKey())); + resultMapList.addAll(metricsParser.parse(bucket.getAggregations())); + return resultMapList; } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FilterParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FilterParser.java index 95571df30a7..de9a4a2fbd0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FilterParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FilterParser.java @@ -5,6 +5,7 @@ package org.opensearch.sql.opensearch.response.agg; +import java.util.List; import java.util.Map; import lombok.Builder; import lombok.EqualsAndHashCode; @@ -25,7 +26,7 @@ public class FilterParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation aggregations) { + public List> parse(Aggregation aggregations) { return metricsParser.parse(((Filter) aggregations).getAggregations().asList().get(0)); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParser.java index 04781d2a94a..db163bcd97a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParser.java @@ -5,6 +5,7 @@ package org.opensearch.sql.opensearch.response.agg; +import java.util.List; import java.util.Map; import org.opensearch.search.aggregations.Aggregation; @@ -20,5 +21,5 @@ public interface MetricParser { * @param aggregation {@link Aggregation} * @return the map between metric name and metric value. */ - Map parse(Aggregation aggregation); + List> parse(Aggregation aggregation); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParserHelper.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParserHelper.java index 8886668abb0..cc85df5bf7b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParserHelper.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParserHelper.java @@ -5,7 +5,8 @@ package org.opensearch.sql.opensearch.response.agg; -import java.util.HashMap; +import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -45,18 +46,26 @@ public MetricParserHelper(List metricParserList, List coun * @param aggregations {@link Aggregations} * @return the map between metric name and metric value. */ - public Map parse(Aggregations aggregations) { - Map resultMap = new HashMap<>(); + public List> parse(Aggregations aggregations) { + List> resultMapList = new ArrayList<>(); + Map mergeMap = new LinkedHashMap<>(); for (Aggregation aggregation : aggregations) { - if (metricParserMap.containsKey(aggregation.getName())) { - resultMap.putAll(metricParserMap.get(aggregation.getName()).parse(aggregation)); - } else { + MetricParser parser = metricParserMap.get(aggregation.getName()); + if (parser == null) { throw new RuntimeException( StringUtils.format( "couldn't parse field %s in aggregation response", aggregation.getName())); } + List> resList = parser.parse(aggregation); + if (resList.size() == 1) { // single value parser + mergeMap.putAll(resList.get(0)); + } else if (resList.size() > 1) { // top_hits parser + resultMapList.addAll(resList); + } + } + if (!mergeMap.isEmpty()) { + resultMapList.add(mergeMap); } - // countAggNameList.forEach(name -> resultMap.put(name, bucket.getDocCount())); - return resultMap; + return resultMapList; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/NoBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/NoBucketAggregationParser.java index df8dcdd4ce9..a43a0251a48 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/NoBucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/NoBucketAggregationParser.java @@ -6,7 +6,6 @@ package org.opensearch.sql.opensearch.response.agg; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Map; import lombok.Getter; @@ -29,7 +28,7 @@ public NoBucketAggregationParser(List metricParserList) { @Override public List> parse(Aggregations aggregations) { - return Collections.singletonList(metricsParser.parse(aggregations)); + return metricsParser.parse(aggregations); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/PercentilesParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/PercentilesParser.java index c9d78a94418..ce538f1405c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/PercentilesParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/PercentilesParser.java @@ -7,6 +7,8 @@ import com.google.common.collect.Streams; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; @@ -23,14 +25,16 @@ public class PercentilesParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation agg) { - return Collections.singletonMap( - agg.getName(), - // TODO a better implementation here is providing a class `MultiValueParser` - // similar to `SingleValueParser`. However, there is no method `values()` available - // in `org.opensearch.search.aggregations.metrics.MultiValue`. - Streams.stream(((Percentiles) agg).iterator()) - .map(Percentile::getValue) - .collect(Collectors.toList())); + public List> parse(Aggregation agg) { + return Collections.singletonList( + new HashMap<>( + Collections.singletonMap( + agg.getName(), + // TODO a better implementation here is providing a class `MultiValueParser` + // similar to `SingleValueParser`. However, there is no method `values()` available + // in `org.opensearch.search.aggregations.metrics.MultiValue`. + Streams.stream(((Percentiles) agg).iterator()) + .map(Percentile::getValue) + .collect(Collectors.toList())))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SinglePercentileParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SinglePercentileParser.java index 9665d863fc7..ef47d9d629a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SinglePercentileParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SinglePercentileParser.java @@ -7,6 +7,8 @@ import com.google.common.collect.Streams; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -21,12 +23,14 @@ public class SinglePercentileParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation agg) { - return Collections.singletonMap( - agg.getName(), - // TODO `Percentiles` implements interface - // `org.opensearch.search.aggregations.metrics.MultiValue`, but there is not - // method `values()` available in this interface. So we - Streams.stream(((Percentiles) agg).iterator()).findFirst().get().getValue()); + public List> parse(Aggregation agg) { + return Collections.singletonList( + new HashMap<>( + Collections.singletonMap( + agg.getName(), + // TODO `Percentiles` implements interface + // `org.opensearch.search.aggregations.metrics.MultiValue`, but there is not + // method `values()` available in this interface. So we + Streams.stream(((Percentiles) agg).iterator()).findFirst().get().getValue()))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java index 5487f9ca605..f2a3ab785d4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java @@ -8,6 +8,8 @@ import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanInfValue; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -23,8 +25,11 @@ public class SingleValueParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation agg) { - return Collections.singletonMap( - agg.getName(), handleNanInfValue(((NumericMetricsAggregation.SingleValue) agg).value())); + public List> parse(Aggregation agg) { + return Collections.singletonList( + new HashMap<>( + Collections.singletonMap( + agg.getName(), + handleNanInfValue(((NumericMetricsAggregation.SingleValue) agg).value())))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java index ccda391c5eb..d1e044fdd87 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java @@ -8,6 +8,8 @@ import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanInfValue; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.function.Function; import lombok.EqualsAndHashCode; @@ -26,8 +28,10 @@ public class StatsParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation agg) { - return Collections.singletonMap( - agg.getName(), handleNanInfValue(valueExtractor.apply((ExtendedStats) agg))); + public List> parse(Aggregation agg) { + return Collections.singletonList( + new HashMap<>( + Collections.singletonMap( + agg.getName(), handleNanInfValue(valueExtractor.apply((ExtendedStats) agg))))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java index c9d9cf61d9e..0d662fcc385 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java @@ -7,10 +7,15 @@ import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; +import org.opensearch.common.document.DocumentField; import org.opensearch.search.SearchHit; import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.metrics.TopHits; @@ -21,45 +26,72 @@ public class TopHitsParser implements MetricParser { @Getter private final String name; private final boolean returnSingleValue; + private final boolean returnMergeValue; - public TopHitsParser(String name) { - this.name = name; - this.returnSingleValue = false; - } - - public TopHitsParser(String name, boolean returnSingleValue) { + public TopHitsParser(String name, boolean returnSingleValue, boolean returnMergeValue) { this.name = name; this.returnSingleValue = returnSingleValue; + this.returnMergeValue = returnMergeValue; } @Override - public Map parse(Aggregation agg) { + public List> parse(Aggregation agg) { TopHits topHits = (TopHits) agg; SearchHit[] hits = topHits.getHits().getHits(); if (hits.length == 0) { - return Collections.singletonMap(agg.getName(), null); + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), null))); } if (returnSingleValue) { + if (hits[0].getFields() == null || hits[0].getFields().isEmpty()) { + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), null))); + } // Extract the single value from the first (and only) hit from fields (fetchField) - if (hits[0].getFields() != null && !hits[0].getFields().isEmpty()) { - Object value = hits[0].getFields().values().iterator().next().getValue(); - return Collections.singletonMap(agg.getName(), value); + Object value = hits[0].getFields().values().iterator().next().getValue(); + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), value))); + } else if (returnMergeValue) { + if (hits[0].getFields() == null || hits[0].getFields().isEmpty()) { + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), Collections.emptyList()))); } - return Collections.singletonMap(agg.getName(), null); - } else { // Return all values as a list from fields (fetchField) - if (hits[0].getFields() != null && !hits[0].getFields().isEmpty()) { - return Collections.singletonMap( - agg.getName(), - Arrays.stream(hits) - .flatMap(h -> h.getFields().values().stream()) - .map(f -> f.getValue()) - .filter(v -> v != null) // Filter out null values - .collect(Collectors.toList())); - } - return Collections.singletonMap(agg.getName(), Collections.emptyList()); + return Collections.singletonList( + Collections.singletonMap( + agg.getName(), + Arrays.stream(hits) + .flatMap(h -> h.getFields().values().stream()) + .map(DocumentField::getValue) + .filter(Objects::nonNull) // Filter out null values + .collect(Collectors.toList()))); + } else { + // "hits": { + // "hits": [ + // { + // "_source": { + // "name": "A", + // "category": "X" + // } + // }, + // { + // "_source": { + // "name": "A", + // "category": "Y" + // } + // } + // ] + // } + // will converts to: + // List[ + // LinkedHashMap["name" -> "A", "category" -> "X"], + // LinkedHashMap["name" -> "A", "category" -> "Y"] + // ] + return Arrays.stream(hits) + .>map(hit -> new LinkedHashMap<>(hit.getSourceAsMap())) + .toList(); } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 11421fca0a1..29b240613b5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -125,7 +125,6 @@ public double estimateRowCount(RelMetadataQuery mq) { case SORT_AGG_METRICS -> NumberUtil.min(rowCount, osIndex.getBucketSize().doubleValue()); // Refer the org.apache.calcite.rel.metadata.RelMdRowCount - case COLLAPSE -> rowCount / 10; case FILTER, SCRIPT -> NumberUtil.multiply( rowCount, @@ -182,11 +181,6 @@ public double estimateRowCount(RelMetadataQuery mq) { sortKeys.stream().filter(digest -> digest.getExpression() != null).count(); dCpu += NumberUtil.multiply(dRows, 1.1 * complexExprCount); } - // Refer the org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(Aggregate rel,...) - case COLLAPSE -> { - dRows = dRows / 10; - dCpu += dRows; - } // Ignore cost the primitive filter but it will affect the rows count. case FILTER -> dRows = @@ -430,4 +424,12 @@ public boolean isMetricsOrderPushed() { public boolean isTopKPushed() { return this.getPushDownContext().isTopKPushed(); } + + public boolean isScriptPushed() { + return this.getPushDownContext().isScriptPushed(); + } + + public boolean isProjectPushed() { + return this.getPushDownContext().isProjectPushed(); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index bf79f803544..2821aa037da 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -227,7 +227,7 @@ public CalciteLogicalIndexScan pushDownCollapse(Project finalOutput, String fiel } CalciteLogicalIndexScan newScan = this.copyWithNewSchema(finalOutput.getRowType()); newScan.pushDownContext.add( - PushDownType.COLLAPSE, + PushDownType.AGGREGATION, fieldName, (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownCollapse(field)); return newScan; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 29700fd6606..16c81facace 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -36,6 +36,7 @@ public class PushDownContext extends AbstractCollection { private boolean isSortExprPushed = false; private boolean isTopKPushed = false; private boolean isRareTopPushed = false; + private boolean isScriptPushed = false; public PushDownContext(OpenSearchIndex osIndex) { this.osIndex = osIndex; @@ -130,6 +131,9 @@ public boolean add(PushDownOperation operation) { if (operation.type() == PushDownType.RARE_TOP) { isRareTopPushed = true; } + if (operation.type() == PushDownType.SCRIPT) { + isScriptPushed = true; + } return true; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java index 81927e9f8d6..c763808164d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java @@ -12,8 +12,7 @@ public enum PushDownType { AGGREGATION, SORT, LIMIT, - SCRIPT, - COLLAPSE, + SCRIPT, // script in predicate SORT_AGG_METRICS, // convert composite aggregate to terms or multi-terms bucket aggregate RARE_TOP, // convert composite aggregate to nested aggregate SORT_EXPR diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java index 0f523d65341..0a189584af3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java @@ -156,7 +156,7 @@ public Pair visitNamedAggregator( node.getArguments().get(1), condition, name, - new TopHitsParser(name)); + new TopHitsParser(name, false, true)); case "percentile": case "percentile_approx": return make( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index ec0e5d919b3..660744c8bb7 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -359,7 +359,8 @@ void analyze_firstAggregation() throws ExpressionNotAnalyzableException { .withAggCall(b -> b.aggregateCall(PPLBuiltinOperators.FIRST, b.field("a")).as("first_a")) .expectDslQuery( "[{\"first_a\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false}}}]") - .expectResponseParser(new MetricParserHelper(List.of(new TopHitsParser("first_a", true)))) + .expectResponseParser( + new MetricParserHelper(List.of(new TopHitsParser("first_a", true, false)))) .verify(); } @@ -369,7 +370,8 @@ void analyze_lastAggregation() throws ExpressionNotAnalyzableException { .withAggCall(b -> b.aggregateCall(PPLBuiltinOperators.LAST, b.field("b")).as("last_b")) .expectDslQuery( "[{\"last_b\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"sort\":[{\"_doc\":{\"order\":\"desc\"}}]}}}]") - .expectResponseParser(new MetricParserHelper(List.of(new TopHitsParser("last_b", true)))) + .expectResponseParser( + new MetricParserHelper(List.of(new TopHitsParser("last_b", true, false)))) .verify(); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java index 5dc88ad5d64..7ba64eaa475 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java @@ -131,9 +131,9 @@ void two_bucket_one_metric_should_pass() { new CompositeAggregationParser(new SingleValueParser("avg")); assertThat( parse(parser, response), - containsInAnyOrder( - ImmutableMap.of("type", "cost", "region", "us", "avg", 20d), - ImmutableMap.of("type", "sale", "region", "uk", "avg", 130d))); + contains( + ImmutableMap.of("type", "cost", "region", "us"), ImmutableMap.of("avg", 20d), + ImmutableMap.of("type", "sale", "region", "uk"), ImmutableMap.of("avg", 130d))); } @Test @@ -296,10 +296,11 @@ void top_hits_aggregation_should_pass() { + " }\n" + "}"; OpenSearchAggregationResponseParser parser = - new CompositeAggregationParser(new TopHitsParser("take")); + new CompositeAggregationParser(new TopHitsParser("take", false, true)); assertThat( parse(parser, response), - contains(ImmutableMap.of("type", "take", "take", ImmutableList.of("m", "f")))); + contains( + ImmutableMap.of("type", "take"), ImmutableMap.of("take", ImmutableList.of("m", "f")))); } /** SELECT PERCENTILE(age, 50) FROM accounts. */ @@ -422,9 +423,11 @@ void two_bucket_one_metric_percentile_should_pass() { new SinglePercentileParser("percentile"), new SingleValueParser("max")); assertThat( parse(parser, response), - containsInAnyOrder( - ImmutableMap.of("type", "cost", "region", "us", "percentile", 40d), - ImmutableMap.of("type", "sale", "region", "uk", "percentile", 100d))); + contains( + ImmutableMap.of("type", "cost", "region", "us"), + ImmutableMap.of("percentile", 40d), + ImmutableMap.of("type", "sale", "region", "uk"), + ImmutableMap.of("percentile", 100d))); } /** SELECT PERCENTILES(age) FROM accounts. */ @@ -560,21 +563,11 @@ void two_bucket_percentiles_should_pass() { new CompositeAggregationParser(new PercentilesParser("percentiles")); assertThat( parse(parser, response), - containsInAnyOrder( - ImmutableMap.of( - "type", - "cost", - "region", - "us", - "percentiles", - List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)), - ImmutableMap.of( - "type", - "sale", - "region", - "uk", - "percentiles", - List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)))); + contains( + ImmutableMap.of("type", "cost", "region", "us"), + ImmutableMap.of("percentiles", List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)), + ImmutableMap.of("type", "sale", "region", "uk"), + ImmutableMap.of("percentiles", List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)))); } public List> parse(OpenSearchAggregationResponseParser parser, String json) { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java index c67d7cfaa3e..021a64aad7d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java @@ -191,19 +191,6 @@ void test_cost_on_sort_pushdown() { assertEquals(99000, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } - @Test - void test_cost_on_collapse_pushdown() { - RelDataType relDataType = mock(RelDataType.class); - lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(10)); - lenient().when(table.getRowType()).thenReturn(relDataType); - - CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - scan.getPushDownContext() - .add( - new PushDownOperation(PushDownType.COLLAPSE, null, (OSRequestBuilderAction) req -> {})); - assertEquals(9900, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); - } - @Test void test_cost_on_aggregate_pushdown() { RelDataType relDataType = mock(RelDataType.class); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 6cf3c91fe7b..0e14e5e6cbc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -184,4 +184,98 @@ public void testDedupKeepEmpty2() { + "WHERE `DEPTNO` IS NULL OR `JOB` IS NULL OR `_row_number_dedup_` <= 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testDedupExpr() { + String ppl = + "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields EMPNO, ENAME, JOB, DEPTNO, NEW_DEPTNO |" + + " dedup 1 NEW_DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$3], NEW_DEPTNO=[$4])\n" + + " LogicalFilter(condition=[<=($5, 1)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$3], NEW_DEPTNO=[$4]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)])\n" + + " LogicalFilter(condition=[IS NOT NULL($4)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$7]," + + " NEW_DEPTNO=[+($7, 1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | fields EMPNO, ENAME, JOB, DEPTNO | eval NEW_DEPTNO = DEPTNO + 1 | dedup 1" + + " NEW_DEPTNO"; + root = getRelNode(ppl); + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields NEW_DEPTNO, EMPNO, ENAME, JOB | dedup 1" + + " JOB"; + root = getRelNode(ppl); + expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $3 ORDER BY $3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields NEW_DEPTNO, EMPNO, ENAME, JOB | sort" + + " NEW_DEPTNO | dedup 1 NEW_DEPTNO"; + root = getRelNode(ppl); + expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testRenameDedup() { + String ppl = + "source=EMP | eval TEMP_DEPTNO = DEPTNO + 1 | rename TEMP_DEPTNO as NEW_DEPTNO | fields" + + " NEW_DEPTNO, EMPNO, ENAME, JOB | dedup 1 NEW_DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | eval TEMP_DEPTNO = DEPTNO + 1 | rename TEMP_DEPTNO as NEW_DEPTNO | fields" + + " NEW_DEPTNO, EMPNO, ENAME, JOB | dedup 1 JOB"; + root = getRelNode(ppl); + expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $3 ORDER BY $3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | eval TEMP_DEPTNO = DEPTNO + 1 | rename TEMP_DEPTNO as NEW_DEPTNO | fields" + + " NEW_DEPTNO, EMPNO, ENAME, JOB | sort NEW_DEPTNO | dedup 1 NEW_DEPTNO"; + root = getRelNode(ppl); + expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java index ff230540c93..580de2c80fb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java @@ -1002,7 +1002,7 @@ public void testJoinWithFieldListMaxGreaterThanZero() { + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + " LogicalFilter(condition=[<=($3, 1)])\n" + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + " LogicalTableScan(table=[[scott, DEPT]])\n"; verifyLogical(root, expectedLogical); verifyResultCount(root, 14); @@ -1013,9 +1013,9 @@ public void testJoinWithFieldListMaxGreaterThanZero() { + "FROM `scott`.`EMP`\n" + "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n" + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`" - + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_dedup_`\n" + + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_join_max_dedup_`\n" + "FROM `scott`.`DEPT`) `t`\n" - + "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; + + "WHERE `_row_number_join_max_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -1031,7 +1031,7 @@ public void testJoinWithCriteriaMaxGreaterThanZero() { + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + " LogicalFilter(condition=[<=($3, 1)])\n" + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + " LogicalTableScan(table=[[scott, DEPT]])\n"; verifyLogical(root, expectedLogical); verifyResultCount(root, 14); @@ -1043,9 +1043,9 @@ public void testJoinWithCriteriaMaxGreaterThanZero() { + "FROM `scott`.`EMP`\n" + "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n" + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`" - + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_dedup_`\n" + + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_join_max_dedup_`\n" + "FROM `scott`.`DEPT`) `t`\n" - + "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; + + "WHERE `_row_number_join_max_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); }