diff --git a/common/src/java/org/apache/hadoop/hive/conf/CteSuggesterType.java b/common/src/java/org/apache/hadoop/hive/conf/CteSuggesterType.java new file mode 100644 index 000000000000..2529c243f69d --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/conf/CteSuggesterType.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.conf; + +/** + * Type of suggester used for common table expression (CTE) detection and materialization. + */ +public enum CteSuggesterType { + /** + * Materialization is based on the AST/SQL structure of the query. The suggester only works when the + * query explicitly defines CTEs using WITH clauses. The suggester applies early during the syntactic analysis phase + * of the query and materializes WITH clauses into tables using heuristics and configured thresholds. + */ + AST, + /** + * Materialization is based on the algebraic structure of the query. The suggester applies during the cost-based + * optimization phase and the exact behavior can be configured via + * {@link org.apache.hadoop.hive.conf.HiveConf.ConfVars#HIVE_CTE_SUGGESTER_CLASS} property. + */ + CBO, + /** + * Materialization is disabled. + */ + NONE; + + public boolean enabled(HiveConf conf) { + return this.name().equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE)); + } +} diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index fa89ee7be8ec..6a6a0f1a652c 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2767,8 +2767,15 @@ public static enum ConfVars { // CTE @InterfaceStability.Unstable - HIVE_CTE_SUGGESTER_CLASS("hive.optimize.cte.suggester.class", "", - "Class for finding and suggesting common table expressions (CTEs) based on a given query. The class must implement the CommonTableExpressionSuggester interface."), + HIVE_CTE_SUGGESTER_TYPE("hive.optimize.cte.suggester.type", "AST", new StringSet("AST", "CBO", "NONE"), + "The type of the suggester that is used for finding and materializing common table expressions " + + "(CTEs) based on a given query."), + @InterfaceStability.Unstable + HIVE_CTE_SUGGESTER_CLASS("hive.optimize.cte.suggester.class", + "org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester", + "The class implementing the common table expression (CTE) suggester logic. This configuration is " + + "only relevant for the CBO suggester. The class must implement the CommonTableExpressionSuggester " + + "interface."), HIVE_CTE_MATERIALIZE_THRESHOLD("hive.optimize.cte.materialize.threshold", 3, "If the number of references to a CTE clause exceeds this threshold, Hive will materialize it\n" + "before executing the main query block. -1 will disable this feature."), diff --git a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java index ebf1664761b7..f032c44b8ccb 100644 --- a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java +++ b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java @@ -25,6 +25,7 @@ import java.util.List; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_FORMATTER; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_RESOLVER_STYLE; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_EXPLAIN_NODE_VISIT_LIMIT; @@ -71,6 +72,13 @@ public static Collection generateParameters() { list.add(new Object[] { HIVE_DATETIME_RESOLVER_STYLE, "lenient", null}); list.add(new Object[] { HIVE_DATETIME_RESOLVER_STYLE, "OTHER", "Invalid value.. expects one of [smart, strict, " + "lenient]" }); + list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "AST", null}); + list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "ast", null}); + list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "CBO", null}); + list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "cbo", null}); + list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "NONE", null}); + list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "nOnE", null}); + list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "OTHER", "Invalid value.. expects one of [ast, cbo, none]"}); return list; } diff --git a/data/conf/perf/tpcds30tb/cte/hive-site.xml b/data/conf/perf/tpcds30tb/cte/hive-site.xml index ddc45fba6d80..d08437eb10ac 100644 --- a/data/conf/perf/tpcds30tb/cte/hive-site.xml +++ b/data/conf/perf/tpcds30tb/cte/hive-site.xml @@ -43,8 +43,20 @@ org.apache.hadoop.hive.ql.lockmgr.DbTxnManager + + hive.optimize.cte.suggester.type + CBO + hive.optimize.cte.suggester.class org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionPrintSuggester + + hive.optimize.cte.materialize.threshold + 1 + + + hive.optimize.cte.materialize.full.aggregate.only + false + diff --git a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTPCDSCteCliDriver.java b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTPCDSCteCliDriver.java index 83dc4a035de9..9cbfe092f79c 100644 --- a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTPCDSCteCliDriver.java +++ b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTPCDSCteCliDriver.java @@ -26,7 +26,6 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; -import org.junit.runners.model.Statement; import java.io.File; import java.util.List; @@ -44,27 +43,8 @@ public static List getParameters() throws Exception { @ClassRule public static TestRule cliClassRule = adapter.buildClassRule(); - /** - * Rule for calling only {@link CliAdapter#setUp()} and {@link CliAdapter#tearDown()} before/after running each test. - * - * At the moment of writing this class the rule is mostly necessary for calling {@link CliAdapter#tearDown()} to avoid - * state from one test pass to other (e.g., disabling one test should not disable subsequent ones). - * - * {@link CliAdapter#buildTestRule()} cannot not used since it is doing more than necessary for this test case. For - * instance, we do not want to create and destroy the metastore after each query. - */ @Rule - public TestRule cliTestRule = (statement, description) -> new Statement() { - @Override - public void evaluate() throws Throwable { - adapter.setUp(); - try { - statement.evaluate(); - } finally { - adapter.tearDown(); - } - } - }; + public TestRule cliTestRule = adapter.buildTestRule(); private final String name; private final File qfile; diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 2b6bcd9015af..fab2f78f8a6e 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -22,7 +22,9 @@ import java.net.URL; import java.util.HashMap; import java.util.Map; +import java.util.Set; +import com.google.common.collect.ImmutableSet; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.QTestMiniClusters; import org.apache.hadoop.hive.ql.QTestMiniClusters.MiniClusterType; @@ -349,7 +351,11 @@ public TPCDSCteCliConfig() { setClusterType(MiniClusterType.LLAP_LOCAL); setMetastoreType("postgres.tpcds"); // At the moment only makes sense to check CBO plans + Set skipQueries = ImmutableSet.of(64); // Skipped due to HIVE-29249 for (int i = 1; i < 100; i++) { + if (skipQueries.contains(i)) { + continue; + } includeQuery("cbo_query" + i + ".q"); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 5150de01baac..0211eda39bdd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -135,6 +135,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.conf.Constants; +import org.apache.hadoop.hive.conf.CteSuggesterType; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; @@ -635,7 +636,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept // unfortunately making prunedPartitions immutable is not possible // here with SemiJoins not all tables are costed in CBO, so their // PartitionList is not evaluated until the run phase. - getMetaData(getQB(), true); + getMetaData(getQB(), CteSuggesterType.CBO.enabled(conf)); disableJoinMerge = defaultJoinMerge; sinkOp = genPlan(getQB()); @@ -1743,7 +1744,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.POSTJOIN_ORDERING); // Perform the CTE rewriting near the end of CBO transformations to avoid interference of the new HiveTableSpool // operator with other rules (especially those related to constant folding and branch pruning). - if (!forViewCreation) { + if (!forViewCreation && CteSuggesterType.CBO.enabled(conf)) { calcitePlan = applyCteRewriting(planner, calcitePlan, mdProvider.getMetadataProvider(), executorProvider); if (LOG.isDebugEnabled()) { LOG.debug("Plan after CTE rewriting:\n{}", RelOptUtil.toString(calcitePlan)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 14e6df4dca3a..39372238f923 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -91,6 +91,7 @@ import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.conf.Constants; +import org.apache.hadoop.hive.conf.CteSuggesterType; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveConf.ResultFileFormat; @@ -13056,7 +13057,7 @@ protected boolean analyzeAndResolveChildTree(ASTNode child, PlannerContext plann // Resolve Parse Tree and Get Metadata // Materialization is allowed if it is not a view definition - getMetaData(qb, createVwDesc == null && !forViewCreation); + getMetaData(qb, createVwDesc == null && !forViewCreation && CteSuggesterType.AST.enabled(conf)); LOG.info("Completed getting MetaData in Semantic Analysis"); return true; diff --git a/ql/src/test/queries/clientpositive/cte_cbo_plan_json.q b/ql/src/test/queries/clientpositive/cte_cbo_plan_json.q index f5821b01d21d..7ad7732a8125 100644 --- a/ql/src/test/queries/clientpositive/cte_cbo_plan_json.q +++ b/ql/src/test/queries/clientpositive/cte_cbo_plan_json.q @@ -7,7 +7,7 @@ CREATE TABLE emps ); set hive.optimize.cte.materialize.threshold=1; -set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester; +set hive.optimize.cte.suggester.type=CBO; set hive.optimize.cte.materialize.full.aggregate.only=false; EXPLAIN FORMATTED CBO diff --git a/ql/src/test/queries/clientpositive/cte_cbo_rewrite_0.q b/ql/src/test/queries/clientpositive/cte_cbo_rewrite_0.q index f24b1428e8bc..1d294ccc99e3 100644 --- a/ql/src/test/queries/clientpositive/cte_cbo_rewrite_0.q +++ b/ql/src/test/queries/clientpositive/cte_cbo_rewrite_0.q @@ -59,7 +59,7 @@ HAVING AVG(e.salary) < 100000; set hive.optimize.cte.materialize.threshold=1; set hive.optimize.cte.materialize.full.aggregate.only=false; -set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester; +set hive.optimize.cte.suggester.type=CBO; SELECT d.name, 'HIGH' FROM emps e diff --git a/ql/src/test/queries/clientpositive/cte_mat_12.q b/ql/src/test/queries/clientpositive/cte_mat_12.q index 89698ca1854e..d68234b1a2f9 100644 --- a/ql/src/test/queries/clientpositive/cte_mat_12.q +++ b/ql/src/test/queries/clientpositive/cte_mat_12.q @@ -1,4 +1,4 @@ --- Verify that hive.optimize.cte.materialize.full.aggregate.only behaves as expected for implicitly discovered (hive.optimize.cte.suggester.class) CTEs in the query +-- Verify that hive.optimize.cte.materialize.full.aggregate.only behaves as expected for implicitly discovered (hive.optimize.cte.suggester.type=CBO) CTEs in the query CREATE TABLE emps ( empid INTEGER, @@ -8,7 +8,7 @@ CREATE TABLE emps ); set hive.optimize.cte.materialize.threshold=1; -set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester; +set hive.optimize.cte.suggester.type=CBO; set hive.optimize.cte.materialize.full.aggregate.only=true; diff --git a/ql/src/test/queries/clientpositive/cte_mat_type.q b/ql/src/test/queries/clientpositive/cte_mat_type.q new file mode 100644 index 000000000000..a96240780d53 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cte_mat_type.q @@ -0,0 +1,67 @@ +CREATE TABLE emps +( + empid INTEGER, + deptno INTEGER, + name VARCHAR(10), + salary DECIMAL(8, 2) +); + +CREATE TABLE depts +( + deptno INTEGER, + name VARCHAR(20) +); + +set hive.optimize.cte.materialize.threshold=1; +set hive.optimize.cte.materialize.full.aggregate.only=false; + +set hive.optimize.cte.suggester.type=AST; + +EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000; + +set hive.optimize.cte.suggester.type=CBO; + +EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000; + +set hive.optimize.cte.suggester.type=NONE; + +EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000; diff --git a/ql/src/test/results/clientpositive/llap/cte_mat_type.q.out b/ql/src/test/results/clientpositive/llap/cte_mat_type.q.out new file mode 100644 index 000000000000..17a6b6be7ea4 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cte_mat_type.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: CREATE TABLE emps +( + empid INTEGER, + deptno INTEGER, + name VARCHAR(10), + salary DECIMAL(8, 2) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: CREATE TABLE emps +( + empid INTEGER, + deptno INTEGER, + name VARCHAR(10), + salary DECIMAL(8, 2) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: CREATE TABLE depts +( + deptno INTEGER, + name VARCHAR(20) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: CREATE TABLE depts +( + deptno INTEGER, + name VARCHAR(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000 +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_avg +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_avg +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{0, 1}]) + HiveProject(d_name=[$0], _o__c1=[$1]) + HiveUnion(all=[true]) + HiveProject(d_name=[$0], _o__c1=[_UTF-16LE'HIGH':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[>=($1, 100000:DECIMAL(6, 0))]) + HiveTableScan(table=[[default, dept_avg]], table:alias=[da]) + HiveProject(d_name=[$0], _o__c1=[_UTF-16LE'LOW':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[<($1, 100000:DECIMAL(6, 0))]) + HiveTableScan(table=[[default, dept_avg]], table:alias=[da]) + +PREHOOK: query: EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000 +PREHOOK: type: QUERY +PREHOOK: Input: default@cte_suggestion_0 +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cte_suggestion_0 +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{0, 1}]) + HiveProject(d_name=[$0], _o__c1=[$1]) + HiveUnion(all=[true]) + HiveProject(d_name=[$0], _o__c1=[_UTF-16LE'HIGH':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[>=(CAST(/($1, $2)):DECIMAL(12, 6), 100000:DECIMAL(6, 0))]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(name=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{3}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(deptno=[$1], salary=[$3]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, emps]], table:alias=[e]) + HiveProject(deptno=[$0], name=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, depts]], table:alias=[d]) + HiveProject(d_name=[$0], _o__c1=[_UTF-16LE'LOW':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[<(CAST(/($1, $2)):DECIMAL(12, 6), 100000:DECIMAL(6, 0))]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + +PREHOOK: query: EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000 +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +WITH dept_avg AS ( + SELECT d.name AS d_name, AVG(e.salary) AS avg_salary + FROM emps e + INNER JOIN depts d ON e.deptno = d.deptno + GROUP BY d.name +) +SELECT d_name, 'HIGH' +FROM dept_avg da +WHERE da.avg_salary >= 100000 +UNION +SELECT d_name, 'LOW' +FROM dept_avg da +WHERE da.avg_salary < 100000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{0, 1}]) + HiveProject(d_name=[$0], _o__c1=[$1]) + HiveUnion(all=[true]) + HiveProject(d_name=[$0], _o__c1=[_UTF-16LE'HIGH':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[>=(CAST(/($1, $2)):DECIMAL(12, 6), 100000:DECIMAL(6, 0))]) + HiveAggregate(group=[{3}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(deptno=[$1], salary=[$3]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, emps]], table:alias=[e]) + HiveProject(deptno=[$0], name=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, depts]], table:alias=[d]) + HiveProject(d_name=[$0], _o__c1=[_UTF-16LE'LOW':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[<(CAST(/($1, $2)):DECIMAL(12, 6), 100000:DECIMAL(6, 0))]) + HiveAggregate(group=[{3}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(deptno=[$1], salary=[$3]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, emps]], table:alias=[e]) + HiveProject(deptno=[$0], name=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, depts]], table:alias=[d]) + diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query1.q.out index 9c98d2820e2c..92e6a3acb4d5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query1.q.out @@ -18,9 +18,10 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(sr_customer_sk=[$2], sr_store_sk=[$6], sr_fee=[$13], sr_returned_date_sk=[$19]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($2), IS NOT NULL($19))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0]) HiveFilter(condition=[=($24, _UTF-16LE'NM')]) HiveTableScan(table=[[default, store]], table:alias=[store]) @@ -33,7 +34,5 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(sr_customer_sk=[$2], sr_store_sk=[$6], sr_fee=[$13], sr_returned_date_sk=[$19]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($19))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query11.q.out index cb11533e2d3a..c6eeaa5d5fe1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query11.q.out @@ -34,43 +34,41 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveAggregate(group=[{5, 8}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$3], $f8=[-($2, $1)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_list_price=[$16], ss_sold_date_sk=[$22]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 2000)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(customer_id=[$0], year_total=[$1], EXPR$0=[>($1, 0:DECIMAL(1, 0))]) - HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{5}], agg#0=[sum($2)]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$3], $f8=[-($2, $1)]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_list_price=[$16], ss_sold_date_sk=[$22]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 1999)]) + HiveFilter(condition=[=($1, 2000)]) HiveProject(d_date_sk=[$0], d_year=[$6]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(customer_id=[$0], year_total=[$1], EXPR$0=[>($1, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{5}], agg#0=[sum($2)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableSpool(table=[[cte, cte_suggestion_3]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($1, 1999)]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(c_customer_id=[$0], $f1=[$1]) HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$3], $f8=[-($2, $1)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_list_price=[$24], ws_sold_date_sk=[$33]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 2000)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$3], $f8=[-($2, $1)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_list_price=[$24], ws_sold_date_sk=[$33]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(customer_id=[$0], year_total=[$1], EXPR$1=[>($1, 0:DECIMAL(1, 0))]) @@ -78,14 +76,8 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$3], $f8=[-($2, $1)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_list_price=[$24], ws_sold_date_sk=[$33]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 1999)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query14.q.out index d6fb4777839c..11472ce6cb93 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query14.q.out @@ -1,17 +1,39 @@ +CTE Suggestion: +HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) + HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_quantity=[$9], ss_list_price=[$11], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + CTE Suggestion: HiveProject(d_date_sk=[$0]) HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -CTE Suggestion: -HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveFilter(condition=[sq_count_check($0)]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveTableScan(table=[[default, avg_sales]], table:alias=[avg_sales]) - HiveProject(average_sales=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, avg_sales]], table:alias=[avg_sales]) - CTE Suggestion: HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(11):INTEGER]) HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) @@ -64,239 +86,300 @@ HiveProject(ss_item_sk=[$0]) HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) HiveTableScan(table=[[default, item]], table:alias=[iws]) -Warning: Map Join MAPJOIN[1090][bigTable=?] in task 'Reducer 8' is a cross product -Warning: Map Join MAPJOIN[1129][bigTable=?] in task 'Reducer 12' is a cross product -Warning: Map Join MAPJOIN[1145][bigTable=?] in task 'Reducer 20' is a cross product -Warning: Map Join MAPJOIN[1191][bigTable=?] in task 'Reducer 32' is a cross product +Warning: Map Join MAPJOIN[1163][bigTable=?] in task 'Reducer 4' is a cross product +Warning: Map Join MAPJOIN[1265][bigTable=?] in task 'Reducer 28' is a cross product +Warning: Map Join MAPJOIN[1283][bigTable=?] in task 'Reducer 31' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject(channel=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2, 3}], groups=[[{0, 1, 2, 3}, {0, 1, 2}, {0, 1}, {0}, {}]], agg#0=[sum($4)], agg#1=[sum($5)]) - HiveProject(channel=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], sales=[$4], number_sales=[$5]) - HiveUnion(all=[true]) - HiveProject(channel=[_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) - HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$8], $f1=[$9], $f2=[$10], $f3=[*(CAST($1):DECIMAL(10, 0), $2)]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($0, $7)], joinType=[semi]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_quantity=[$9], ss_list_price=[$11], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(11):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[sq_count_check($0)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveTableScan(table=[[default, avg_sales]], table:alias=[avg_sales]) - HiveProject(average_sales=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, avg_sales]], table:alias=[avg_sales]) - HiveProject(channel=[_UTF-16LE'catalog':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) - HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$8], $f1=[$9], $f2=[$10], $f3=[*(CAST($1):DECIMAL(10, 0), $2)]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($0, $7)], joinType=[semi]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(11):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[sq_count_check($0)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveTableScan(table=[[default, avg_sales]], table:alias=[avg_sales]) - HiveProject(average_sales=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, avg_sales]], table:alias=[avg_sales]) - HiveProject(channel=[_UTF-16LE'web':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) - HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$8], $f1=[$9], $f2=[$10], $f3=[*(CAST($1):DECIMAL(10, 0), $2)]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($0, $7)], joinType=[semi]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(11):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) + HiveProject(channel=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(channel=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], sales=[$4], number_sales=[$5]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$8], $f1=[$9], $f2=[$10], $f3=[*(CAST($1):DECIMAL(10, 0), $2)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $7)], joinType=[semi]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_quantity=[$9], ss_list_price=[$11], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(11):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$14], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) + HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_quantity=[$9], ss_list_price=[$11], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1998, $6), <=($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1998, $6), <=($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$8], $f1=[$9], $f2=[$10], $f3=[*(CAST($1):DECIMAL(10, 0), $2)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $7)], joinType=[semi]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$14], cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$14], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) + HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_quantity=[$9], ss_list_price=[$11], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1998, $6), <=($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1998, $6), <=($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'web':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$8], $f1=[$9], $f2=[$10], $f3=[*(CAST($1):DECIMAL(10, 0), $2)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $7)], joinType=[semi]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[sq_count_check($0)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveTableScan(table=[[default, avg_sales]], table:alias=[avg_sales]) - HiveProject(average_sales=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, avg_sales]], table:alias=[avg_sales]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$14], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) + HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_quantity=[$9], ss_list_price=[$11], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1999, $6), <=($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1998, $6), <=($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(<=(1998, $6), <=($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out index 32d0bf74ef8a..d0b62cbf6906 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out @@ -39,8 +39,8 @@ HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER] HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[316][bigTable=?] in task 'Reducer 15' is a cross product -Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 14' is a cross product +Warning: Map Join MAPJOIN[320][bigTable=?] in task 'Reducer 16' is a cross product +Warning: Map Join MAPJOIN[322][bigTable=?] in task 'Reducer 17' is a cross product CBO PLAN: HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(sales=[$0]) @@ -52,9 +52,10 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(cs_bill_customer_sk=[$2], cs_item_sk=[$14], cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f1=[$0]) HiveFilter(condition=[>($2, 4)]) HiveProject(i_item_sk=[$3], d_date=[$1], $f2=[$2]) @@ -66,7 +67,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveFilter(condition=[IS NOT NULL($22)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveFilter(condition=[OR(=($6, 1999), =($6, 2000), =($6, 2001), =($6, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0]) HiveTableScan(table=[[default, item]], table:alias=[item]) @@ -89,7 +90,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveFilter(condition=[OR(=($6, 1999), =($6, 2000), =($6, 2001), =($6, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(sales=[*(CAST($2):DECIMAL(10, 0), $3)]) HiveSemiJoin(condition=[=($1, $8)], joinType=[semi]) @@ -98,9 +99,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(ws_item_sk=[$2], ws_bill_customer_sk=[$3], ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject($f1=[$0]) HiveFilter(condition=[>($2, 4)]) HiveProject(i_item_sk=[$3], d_date=[$1], $f2=[$2]) @@ -112,7 +111,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveFilter(condition=[IS NOT NULL($22)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveFilter(condition=[OR(=($6, 1999), =($6, 2000), =($6, 2001), =($6, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0]) HiveTableScan(table=[[default, item]], table:alias=[item]) @@ -135,6 +134,6 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveFilter(condition=[OR(=($6, 1999), =($6, 2000), =($6, 2001), =($6, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query24.q.out index 1f324b32875f..525f172aed86 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query24.q.out @@ -18,7 +18,7 @@ HiveJoin(condition=[AND(=($1, $7), =($2, $16))], joinType=[inner], algorithm=[no HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) -Warning: Map Join MAPJOIN[331][bigTable=?] in task 'Reducer 10' is a cross product +Warning: Map Join MAPJOIN[219][bigTable=?] in task 'Reducer 15' is a cross product CBO PLAN: HiveProject(c_last_name=[$0], c_first_name=[$1], s_store_name=[$2], paid=[$3]) HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -29,25 +29,28 @@ HiveProject(c_last_name=[$0], c_first_name=[$1], s_store_name=[$2], paid=[$3]) HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_units=[$7], i_manager_id=[$8], $f9=[$9]) HiveAggregate(group=[{9, 10, 13, 17, 18, 21, 22, 23, 24}], agg#0=[sum($4)]) HiveJoin(condition=[=($0, $20)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($1, $7), =($2, $16))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($3, $6), =($0, $5))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$6], ss_ticket_number=[$8], ss_sales_price=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], EXPR$0=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) - HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], EXPR$0=[UPPER($10)]) - HiveFilter(condition=[IS NOT NULL($9)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(sr_item_sk=[$5], c_customer_sk=[CAST($7):BIGINT], s_store_sk=[CAST($16):BIGINT], sr_ticket_number=[$6], ss_sales_price=[$4], sr_item_sk0=[$5], sr_ticket_number0=[$6], c_customer_sk0=[$7], ca_address_sk=[CAST($12):BIGINT], c_first_name=[$9], c_last_name=[$10], c_birth_country=[$11], ca_address_sk0=[$12], ca_state=[$13], s_zip=[$19], EXPR$0=[$15], s_store_sk0=[$16], s_store_name=[$17], s_state=[$18], s_zip0=[$19]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ss_store_sk=[$2], ss_ticket_number=[$3], ss_sales_price=[$4], sr_item_sk=[$5], sr_ticket_number=[$6], c_customer_sk=[$7], c_current_addr_sk=[$8], c_first_name=[$9], c_last_name=[$10], c_birth_country=[$11], ca_address_sk=[$12], ca_state=[$13], ca_zip=[$14], EXPR$0=[$15], s_store_sk=[$16], s_store_name=[$17], s_state=[$18], s_zip=[$19]) + HiveJoin(condition=[AND(=($1, $7), =($2, $16))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $6), =($0, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$6], ss_ticket_number=[$8], ss_sales_price=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], EXPR$0=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) + HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], EXPR$0=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) HiveFilter(condition=[=($17, _UTF-16LE'orchid ')]) HiveTableScan(table=[[default, item]], table:alias=[item]) @@ -58,25 +61,8 @@ HiveProject(c_last_name=[$0], c_first_name=[$1], s_store_name=[$2], paid=[$3]) HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10]) HiveAggregate(group=[{9, 10, 13, 17, 18, 21, 22, 23, 24, 25}], agg#0=[sum($4)]) HiveJoin(condition=[=($0, $20)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($1, $7), =($2, $16))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($3, $6), =($0, $5))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$6], ss_ticket_number=[$8], ss_sales_price=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], EXPR$0=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) - HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], EXPR$0=[UPPER($10)]) - HiveFilter(condition=[IS NOT NULL($9)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(sr_item_sk=[$5], c_customer_sk=[CAST($7):BIGINT], s_store_sk=[CAST($16):BIGINT], sr_ticket_number=[$6], ss_sales_price=[$4], sr_item_sk0=[$5], sr_ticket_number0=[$6], c_customer_sk0=[$7], ca_address_sk=[CAST($12):BIGINT], c_first_name=[$9], c_last_name=[$10], c_birth_country=[$11], ca_address_sk0=[$12], ca_state=[$13], s_zip=[$19], EXPR$0=[$15], s_store_sk0=[$16], s_store_name=[$17], s_state=[$18], s_zip0=[$19]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query30.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query30.q.out index 88b12fed8466..2688d8355711 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query30.q.out @@ -25,30 +25,28 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveProject(wr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) HiveAggregate(group=[{1, 2}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8]) - HiveFilter(condition=[IS NOT NULL($8)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_returning_customer_sk=[$6], wr_returning_addr_sk=[$9], wr_return_amt=[$14], wr_returned_date_sk=[$23]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($6), IS NOT NULL($23))]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_state=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) HiveProject(ca_state=[$0], wr_returning_customer_sk=[$1], $f2=[$2]) HiveAggregate(group=[{1, 2}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8]) - HiveFilter(condition=[IS NOT NULL($8)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_returning_customer_sk=[$6], wr_returning_addr_sk=[$9], wr_return_amt=[$14], wr_returned_date_sk=[$23]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($23))]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query31.q.out index 82dc0aa17083..c4ac275acfe0 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query31.q.out @@ -36,42 +36,39 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveProject($f0=[$0], $f3=[$1], EXPR$4=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{1}], agg#0=[sum($3)]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_county=[$7]) - HiveFilter(condition=[IS NOT NULL($7)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableSpool(table=[[cte, cte_suggestion_4]]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IS NOT NULL($7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($3)]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_county=[$7]) - HiveFilter(condition=[IS NOT NULL($7)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_4]], table:alias=[cte_suggestion_4]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f3=[$1], EXPR$4=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{1}], agg#0=[sum($3)]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_county=[$7]) - HiveFilter(condition=[IS NOT NULL($7)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_4]], table:alias=[cte_suggestion_4]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableSpool(table=[[cte, cte_suggestion_5]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1], ca_county0=[$2], $f10=[$3], ca_county1=[$4], $f11=[$5]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -79,39 +76,24 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveAggregate(group=[{5}], agg#0=[sum($1)]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_county=[$7]) - HiveFilter(condition=[IS NOT NULL($7)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableSpool(table=[[cte, cte_suggestion_3]]) + HiveProject(ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_5]], table:alias=[cte_suggestion_5]) + HiveTableScan(table=[[cte, cte_suggestion_4]], table:alias=[cte_suggestion_4]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{5}], agg#0=[sum($1)]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_county=[$7]) - HiveFilter(condition=[IS NOT NULL($7)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_4]], table:alias=[cte_suggestion_4]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{5}], agg#0=[sum($1)]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_county=[$7]) - HiveFilter(condition=[IS NOT NULL($7)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveTableScan(table=[[cte, cte_suggestion_4]], table:alias=[cte_suggestion_4]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query33.q.out index 9cfefefedb45..f0aaf347e785 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query33.q.out @@ -33,18 +33,22 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) + HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableSpool(table=[[cte, cte_suggestion_3]]) + HiveProject(i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_manufact_id=[$0], $f1=[$1]) HiveAggregate(group=[{10}], agg#0=[sum($2)]) HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) @@ -54,18 +58,10 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[IS NOT NULL($13)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) HiveProject(i_manufact_id=[$0], $f1=[$1]) HiveAggregate(group=[{10}], agg#0=[sum($2)]) HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) @@ -75,16 +71,8 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[IS NOT NULL($13)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query35.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query35.q.out index 026308856485..90e8be7de495 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query35.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query35.q.out @@ -25,27 +25,24 @@ HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$3], _o HiveProject(ss_customer_sk=[$2], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), <($10, 4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), <($10, 4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(literalTrue=[true], ws_bill_customer_sk=[$0]) HiveAggregate(group=[{0}]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_bill_customer_sk=[$3], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), <($10, 4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(literalTrue=[true], cs_ship_customer_sk=[$0]) HiveAggregate(group=[{0}]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_ship_customer_sk=[$6], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), <($10, 4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query4.q.out index e21ff99330a5..742a81f59972 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query4.q.out @@ -39,14 +39,16 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) - HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_sales_price=[$22], ws_ext_wholesale_cost=[$23], ws_ext_list_price=[$24], ws_sold_date_sk=[$33]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 2000)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) + HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_sales_price=[$22], ws_ext_wholesale_cost=[$23], ws_ext_list_price=[$24], ws_sold_date_sk=[$33]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($1, 2000)]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveJoin(condition=[AND(CASE($5, CASE($10, >(/($7, $9), /($2, $4)), false), false), =($0, $8))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -56,14 +58,12 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveAggregate(group=[{5, 8}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) - HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_sales_price=[$14], ss_ext_wholesale_cost=[$15], ss_ext_list_price=[$16], ss_sold_date_sk=[$22]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 2000)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) + HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_sales_price=[$14], ss_ext_wholesale_cost=[$15], ss_ext_list_price=[$16], ss_sold_date_sk=[$22]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(customer_id=[$0], year_total=[$1], EXPR$131=[>($1, 0:DECIMAL(1, 0))]) @@ -71,28 +71,24 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], ss_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) - HiveProject(ss_customer_sk=[$2], ss_ext_discount_amt=[$13], ss_ext_sales_price=[$14], ss_ext_wholesale_cost=[$15], ss_ext_list_price=[$16], ss_sold_date_sk=[$22]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 1999)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveTableSpool(table=[[cte, cte_suggestion_4]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($1, 1999)]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(c_customer_id=[$0], $f1=[$1]) HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_bill_customer_sk=[$0], cs_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) - HiveProject(cs_bill_customer_sk=[$2], cs_ext_discount_amt=[$21], cs_ext_sales_price=[$22], cs_ext_wholesale_cost=[$23], cs_ext_list_price=[$24], cs_sold_date_sk=[$33]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 2000)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_3]]) + HiveProject(cs_bill_customer_sk=[$0], cs_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) + HiveProject(cs_bill_customer_sk=[$2], cs_ext_discount_amt=[$21], cs_ext_sales_price=[$22], cs_ext_wholesale_cost=[$23], cs_ext_list_price=[$24], cs_sold_date_sk=[$33]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(customer_id=[$0], year_total=[$1], EXPR$1=[>($1, 0:DECIMAL(1, 0))]) @@ -100,14 +96,8 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_bill_customer_sk=[$0], cs_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) - HiveProject(cs_bill_customer_sk=[$2], cs_ext_discount_amt=[$21], cs_ext_sales_price=[$22], cs_ext_wholesale_cost=[$23], cs_ext_list_price=[$24], cs_sold_date_sk=[$33]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 1999)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) + HiveTableScan(table=[[cte, cte_suggestion_4]], table:alias=[cte_suggestion_4]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(customer_id=[$0], year_total=[$1], EXPR$0=[>($1, 0:DECIMAL(1, 0))]) @@ -115,14 +105,8 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_customer_sk=[$0], ws_sold_date_sk=[$5], $f8=[/(+(-(-($4, $3), $1), $2), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5))]) - HiveProject(ws_bill_customer_sk=[$3], ws_ext_discount_amt=[$21], ws_ext_sales_price=[$22], ws_ext_wholesale_cost=[$23], ws_ext_list_price=[$24], ws_sold_date_sk=[$33]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($1, 1999)]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_4]], table:alias=[cte_suggestion_4]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query49.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query49.q.out index aefffaccb0ec..cae929a48897 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query49.q.out @@ -27,9 +27,10 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(ws_item_sk=[$2], ws_order_number=[$16], ws_quantity=[$17], ws_net_paid=[$28], ws_net_profit=[$32], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(>($17, 0), >($32, 1:DECIMAL(1, 0)), >($28, 0:DECIMAL(1, 0)), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(channel=[_UTF-16LE'catalog':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)]) @@ -44,9 +45,7 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(cs_item_sk=[$14], cs_order_number=[$16], cs_quantity=[$17], cs_net_paid=[$28], cs_net_profit=[$32], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(>($17, 0), >($32, 1:DECIMAL(1, 0)), >($28, 0:DECIMAL(1, 0)), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(channel=[_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)]) @@ -61,7 +60,5 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(ss_item_sk=[$1], ss_ticket_number=[$8], ss_quantity=[$9], ss_net_paid=[$19], ss_net_profit=[$21], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(>($9, 0), >($21, 1:DECIMAL(1, 0)), >($19, 0:DECIMAL(1, 0)), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query54.q.out index 3cfb0a6d6128..7fd2be350e5f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query54.q.out @@ -6,10 +6,12 @@ CTE Suggestion: HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($3))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[292][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[286][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[294][bigTable=?] in task 'Map 21' is a cross product -Warning: Map Join MAPJOIN[291][bigTable=?] in task 'Map 21' is a cross product +Warning: Map Join MAPJOIN[304][bigTable=?] in task 'Map 3' is a cross product +Warning: Map Join MAPJOIN[298][bigTable=?] in task 'Map 3' is a cross product +Warning: Map Join MAPJOIN[358][bigTable=?] in task 'Map 21' is a cross product +Warning: Map Join MAPJOIN[359][bigTable=?] in task 'Map 21' is a cross product +Warning: Map Join MAPJOIN[378][bigTable=?] in task 'Map 23' is a cross product +Warning: Map Join MAPJOIN[379][bigTable=?] in task 'Map 23' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(segment=[$0], num_customers=[$1], segment_base=[*($0, 50)]) @@ -30,8 +32,10 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject($f0=[$0]) HiveAggregate(group=[{0}]) HiveProject($f0=[+($3, 3)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30], ROW__IS__DELETED=[$31]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) @@ -39,8 +43,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject($f0=[$0]) HiveAggregate(group=[{0}]) HiveProject($f0=[+($3, 1)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -67,25 +70,31 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(i_item_sk=[$0]) HiveFilter(condition=[AND(=($10, _UTF-16LE'consignment '), =($12, _UTF-16LE'Jewelry '))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0], d_month_seq=[$1], $f0=[$2], $f00=[$3]) HiveJoin(condition=[<=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[<=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], d_month_seq=[$3]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_month_seq=[$1]) + HiveUnion(all=[true]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30], ROW__IS__DELETED=[$31]) + HiveFilter(condition=[OR(<>(1999, $6), <>(3, $8))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30], ROW__IS__DELETED=[$31]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0]) HiveAggregate(group=[{0}]) HiveProject($f0=[+($3, 1)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject($f0=[$0]) HiveAggregate(group=[{0}]) HiveProject($f0=[+($3, 3)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query56.q.out index 4f39e767f3c1..a7fbb4bfa350 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query56.q.out @@ -28,16 +28,18 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8:DECIMAL(5, 2)):DECIMAL(5, 2)]) + HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$1]) - HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) + HiveFilter(condition=[OR(=($17, _UTF-16LE'lace'), =($17, _UTF-16LE'orchid'), =($17, _UTF-16LE'chiffon'))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{10}], agg#0=[sum($2)]) @@ -48,16 +50,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$1]) - HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) + HiveFilter(condition=[OR(=($17, _UTF-16LE'lace'), =($17, _UTF-16LE'orchid'), =($17, _UTF-16LE'chiffon'))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{10}], agg#0=[sum($2)]) @@ -68,15 +66,11 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$1]) - HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) + HiveFilter(condition=[OR(=($17, _UTF-16LE'lace'), =($17, _UTF-16LE'orchid'), =($17, _UTF-16LE'chiffon'))]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out index 19fab63adbe8..caefa922c75d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out @@ -18,11 +18,12 @@ HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 7' is a cross product +Warning: Map Join MAPJOIN[385][bigTable=?] in task 'Map 17' is a cross product +Warning: Map Join MAPJOIN[393][bigTable=?] in task 'Map 16' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) - HiveJoin(condition=[AND(=($4, $0), BETWEEN(false, $7, $2, $3), BETWEEN(false, $5, $2, $3), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $7), *(1.1:DECIMAL(2, 1), $7)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $5), *(1.1:DECIMAL(2, 1), $5)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($4, $0), <=($2, $7), <=($7, $3), <=($2, $5), <=($5, $3), <=(*(0.9:DECIMAL(1, 1), $7), $1), <=($1, *(1.1:DECIMAL(2, 1), $7)), <=(*(0.9:DECIMAL(1, 1), $5), $1), <=($1, *(1.1:DECIMAL(2, 1), $5)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(item_id=[$0], ws_item_rev=[$1], EXPR$0=[*(0.9:DECIMAL(1, 1), $1)], EXPR$1=[*(1.1:DECIMAL(2, 1), $1)]) HiveAggregate(group=[{6}], agg#0=[sum($1)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -31,9 +32,10 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ws_item_sk=[$2], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) @@ -51,7 +53,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($2, $0), <=(*(0.9:DECIMAL(1, 1), $1), $3), <=($3, *(1.1:DECIMAL(2, 1), $1)), <=(*(0.9:DECIMAL(1, 1), $3), $1), <=($1, *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{6}], agg#0=[sum($1)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -60,9 +62,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) @@ -88,9 +88,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_item_sk=[$1], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[IS NOT NULL($22)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query59.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query59.q.out index 01fa8c08caf1..c5c076cf7c66 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query59.q.out @@ -22,30 +22,24 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) - HiveProject($f0=[$4], $f1=[$0], $f2=[CASE($5, $1, null:DECIMAL(7, 2))], $f3=[CASE($6, $1, null:DECIMAL(7, 2))], $f4=[CASE($7, $1, null:DECIMAL(7, 2))], $f5=[CASE($8, $1, null:DECIMAL(7, 2))], $f6=[CASE($9, $1, null:DECIMAL(7, 2))], $f7=[CASE($10, $1, null:DECIMAL(7, 2))], $f8=[CASE($11, $1, null:DECIMAL(7, 2))]) - HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_store_sk=[$6], ss_sales_price=[$12], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4], EXPR$0=[=($14, _UTF-16LE'Sunday ')], EXPR$1=[=($14, _UTF-16LE'Monday ')], EXPR$2=[=($14, _UTF-16LE'Tuesday ')], EXPR$3=[=($14, _UTF-16LE'Wednesday')], EXPR$4=[=($14, _UTF-16LE'Thursday ')], EXPR$5=[=($14, _UTF-16LE'Friday ')], EXPR$6=[=($14, _UTF-16LE'Saturday ')]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject($f0=[$4], $f1=[$0], $f2=[CASE($5, $1, null:DECIMAL(7, 2))], $f3=[CASE($6, $1, null:DECIMAL(7, 2))], $f4=[CASE($7, $1, null:DECIMAL(7, 2))], $f5=[CASE($8, $1, null:DECIMAL(7, 2))], $f6=[CASE($9, $1, null:DECIMAL(7, 2))], $f7=[CASE($10, $1, null:DECIMAL(7, 2))], $f8=[CASE($11, $1, null:DECIMAL(7, 2))]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$6], ss_sales_price=[$12], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], EXPR$0=[=($14, _UTF-16LE'Sunday ')], EXPR$1=[=($14, _UTF-16LE'Monday ')], EXPR$2=[=($14, _UTF-16LE'Tuesday ')], EXPR$3=[=($14, _UTF-16LE'Wednesday')], EXPR$4=[=($14, _UTF-16LE'Thursday ')], EXPR$5=[=($14, _UTF-16LE'Friday ')], EXPR$6=[=($14, _UTF-16LE'Saturday ')]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) + HiveFilter(condition=[AND(<=(1185, $3), <=($3, 1196), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], d_week_seq=[$8]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) - HiveProject($f0=[$4], $f1=[$0], $f2=[CASE($5, $1, null:DECIMAL(7, 2))], $f3=[CASE($6, $1, null:DECIMAL(7, 2))], $f4=[CASE($7, $1, null:DECIMAL(7, 2))], $f5=[CASE($8, $1, null:DECIMAL(7, 2))], $f6=[CASE($9, $1, null:DECIMAL(7, 2))], $f7=[CASE($10, $1, null:DECIMAL(7, 2))], $f8=[CASE($11, $1, null:DECIMAL(7, 2))]) - HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_store_sk=[$6], ss_sales_price=[$12], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4], EXPR$0=[=($14, _UTF-16LE'Sunday ')], EXPR$1=[=($14, _UTF-16LE'Monday ')], EXPR$2=[=($14, _UTF-16LE'Tuesday ')], EXPR$3=[=($14, _UTF-16LE'Wednesday')], EXPR$4=[=($14, _UTF-16LE'Thursday ')], EXPR$5=[=($14, _UTF-16LE'Friday ')], EXPR$6=[=($14, _UTF-16LE'Saturday ')]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveFilter(condition=[AND(<=(1197, $3), <=($3, 1208), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query60.q.out index 64126092ba03..b1e13678fadd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query60.q.out @@ -28,17 +28,20 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) + HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(i_item_id=[$1]) + HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{10}], agg#0=[sum($2)]) HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) @@ -48,17 +51,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{10}], agg#0=[sum($2)]) HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) @@ -68,15 +65,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query61.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query61.q.out index 3dc8da7141c3..cc654c232e2c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query61.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query61.q.out @@ -22,64 +22,57 @@ HiveProject(s_store_sk=[$0]) HiveFilter(condition=[=($27, -7:DECIMAL(1, 0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) -Warning: Map Join MAPJOIN[249][bigTable=?] in task 'Reducer 7' is a cross product +Warning: Map Join MAPJOIN[247][bigTable=?] in task 'Reducer 10' is a cross product CBO PLAN: HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CAST($1):DECIMAL(15, 4)), 100:DECIMAL(10, 0))]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[sum($7)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -7:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ss_store_sk=[$2], ss_promo_sk=[$3], ss_ext_sales_price=[$4], ss_sold_date_sk=[$5], d_date_sk=[$6], i_item_sk=[$7], s_store_sk=[$8], p_promo_sk=[$9]) - HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$6], ss_promo_sk=[$7], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($6), IS NOT NULL($7), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(c_customer_sk=[$0], ca_address_sk=[CAST($2):BIGINT], ca_address_sk0=[$2]) + HiveTableSpool(table=[[cte, cte_suggestion_3]]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -7:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$6], ss_promo_sk=[$7], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($6), IS NOT NULL($7), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 11))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) HiveProject(i_item_sk=[$0]) HiveFilter(condition=[=($12, _UTF-16LE'Electronics ')]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) HiveProject(s_store_sk=[$0]) HiveFilter(condition=[=($27, -7:DECIMAL(1, 0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[OR(=($9, _UTF-16LE'Y'), =($11, _UTF-16LE'Y'), =($8, _UTF-16LE'Y'))]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[OR(=($9, _UTF-16LE'Y'), =($11, _UTF-16LE'Y'), =($8, _UTF-16LE'Y'))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[sum($6)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -7:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ss_store_sk=[$2], ss_ext_sales_price=[$3], ss_sold_date_sk=[$4], d_date_sk=[$5], i_item_sk=[$6], s_store_sk=[$7]) - HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$6], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($6), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($12, _UTF-16LE'Electronics ')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($27, -7:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(c_customer_sk=[$0], ca_address_sk=[CAST($2):BIGINT], ca_address_sk0=[$2]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$6], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($6), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query64.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query64.q.out deleted file mode 100644 index 7b2e0ee6e4bd..000000000000 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query64.q.out +++ /dev/null @@ -1,167 +0,0 @@ -CTE Suggestion: -HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$8], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$18], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($22), IS NOT NULL($6), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($5))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'burnished', _UTF-16LE'chocolate', _UTF-16LE'dim', _UTF-16LE'maroon', _UTF-16LE'navajo', _UTF-16LE'steel'), BETWEEN(false, $5, 36:DECIMAL(12, 2), 45:DECIMAL(12, 2)))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - -CTE Suggestion: -HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - -CTE Suggestion: -HiveProject(cs_item_sk=[$0]) - HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_order_number=[$16], cs_ext_list_price=[$24]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$1], cr_order_number=[$15], $f2=[+(+($22, $23), $24)]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - -CTE Suggestion: -HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) - -CTE Suggestion: -HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - -CTE Suggestion: -HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - -CBO PLAN: -HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number=[$3], b_streen_name=[$4], b_city=[$5], b_zip=[$6], c_street_number=[$7], c_street_name=[$8], c_city=[$9], c_zip=[$10], syear=[CAST(2000):INTEGER], cnt=[$11], s1=[$12], s2=[$13], s3=[$14], s11=[$15], s21=[$16], s31=[$17], syear1=[CAST(2001):INTEGER], cnt1=[$18]) - HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$18], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - HiveProject(product_name=[$7], store_name=[$9], store_zip=[$10], b_street_number=[$11], b_streen_name=[$12], b_city=[$13], b_zip=[$14], c_street_number=[$15], c_street_name=[$16], c_city=[$17], c_zip=[$18], cnt=[$19], s1=[$20], s2=[$21], s3=[$22], s11=[$4], s21=[$5], s31=[$6], cnt1=[$3]) - HiveJoin(condition=[AND(=($8, $0), <=($3, $19), =($9, $1), =($10, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f1=[$0], $f2=[$1], $f3=[$2], $f15=[$13], $f16=[$14], $f17=[$15], $f18=[$16]) - HiveFilter(condition=[IS NOT NULL($13)]) - HiveProject(i_item_sk=[$4], s_store_name=[$5], s_zip=[$6], ca_street_number=[$0], ca_street_name=[$1], ca_city=[$2], ca_zip=[$3], ca_street_number0=[$9], ca_street_name0=[$10], ca_city0=[$11], ca_zip0=[$12], d_year=[$7], d_year0=[$8], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16]) - HiveAggregate(group=[{3, 4, 5, 6, 24, 29, 30, 34, 36, 42, 43, 44, 45}], agg#0=[count()], agg#1=[sum($20)], agg#2=[sum($21)], agg#3=[sum($22)]) - HiveJoin(condition=[=($10, $41)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($8, $39), <>($38, $40))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($15, $37)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($11, $35)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($12, $33)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($9, $32)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($16, $31)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($18, $28)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($13, $0), =($19, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($15, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) - HiveJoin(condition=[=($6, $20)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($10, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$8], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$18], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($6), IS NOT NULL($7), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(12, 2), 45:DECIMAL(12, 2)), IN($17, _UTF-16LE'burnished', _UTF-16LE'chocolate', _UTF-16LE'dim', _UTF-16LE'maroon', _UTF-16LE'navajo', _UTF-16LE'steel'))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cs_item_sk=[$0]) - HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_order_number=[$16], cs_ext_list_price=[$24]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$1], cr_order_number=[$15], $f2=[+(+($22, $23), $24)]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) - HiveFilter(condition=[IS NOT NULL($14)]) - HiveProject(i_product_name=[$5], i_item_sk=[$4], s_store_name=[$6], s_zip=[$7], ca_street_number=[$0], ca_street_name=[$1], ca_city=[$2], ca_zip=[$3], ca_street_number0=[$10], ca_street_name0=[$11], ca_city0=[$12], ca_zip0=[$13], d_year=[$8], d_year0=[$9], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17]) - HiveAggregate(group=[{3, 4, 5, 6, 24, 25, 29, 30, 34, 36, 42, 43, 44, 45}], agg#0=[count()], agg#1=[sum($20)], agg#2=[sum($21)], agg#3=[sum($22)]) - HiveJoin(condition=[=($10, $41)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($8, $39), <>($38, $40))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($15, $37)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($11, $35)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($12, $33)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($9, $32)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($16, $31)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($18, $28)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($13, $0), =($19, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($15, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) - HiveJoin(condition=[=($6, $20)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($10, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$8], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$18], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($6), IS NOT NULL($7), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(12, 2), 45:DECIMAL(12, 2)), IN($17, _UTF-16LE'burnished', _UTF-16LE'chocolate', _UTF-16LE'dim', _UTF-16LE'maroon', _UTF-16LE'navajo', _UTF-16LE'steel'))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cs_item_sk=[$0]) - HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_order_number=[$16], cs_ext_list_price=[$24]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$1], cr_order_number=[$15], $f2=[+(+($22, $23), $24)]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) - diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query66.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query66.q.out index ee161b8e7476..9eb0c5cdc6a1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query66.q.out @@ -31,13 +31,14 @@ HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($13), IS NOT NULL($14), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(t_time_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $2, 49530, 78330)]) + HiveFilter(condition=[AND(<=(49530, $2), <=($2, 78330))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) - HiveProject(d_date_sk=[$0], EXPR$0=[=($8, 1)], EXPR$1=[=($8, 2)], EXPR$2=[=($8, 3)], EXPR$3=[=($8, 4)], EXPR$4=[=($8, 5)], EXPR$5=[=($8, 6)], EXPR$6=[=($8, 7)], EXPR$7=[=($8, 8)], EXPR$8=[=($8, 9)], EXPR$9=[=($8, 10)], EXPR$10=[=($8, 11)], EXPR$11=[=($8, 12)]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0], EXPR$0=[=($8, 1)], EXPR$1=[=($8, 2)], EXPR$2=[=($8, 3)], EXPR$3=[=($8, 4)], EXPR$4=[=($8, 5)], EXPR$5=[=($8, 6)], EXPR$6=[=($8, 7)], EXPR$7=[=($8, 8)], EXPR$8=[=($8, 9)], EXPR$9=[=($8, 10)], EXPR$10=[=($8, 11)], EXPR$11=[=($8, 12)]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(sm_ship_mode_sk=[$0]) - HiveFilter(condition=[IN($4, _UTF-16LE'AIRBORNE', _UTF-16LE'DIAMOND')]) + HiveFilter(condition=[OR(=($4, _UTF-16LE'DIAMOND'), =($4, _UTF-16LE'AIRBORNE'))]) HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) @@ -52,13 +53,11 @@ HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($12), IS NOT NULL($0), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(t_time_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $2, 49530, 78330)]) + HiveFilter(condition=[AND(<=(49530, $2), <=($2, 78330))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) - HiveProject(d_date_sk=[$0], EXPR$0=[=($8, 1)], EXPR$1=[=($8, 2)], EXPR$2=[=($8, 3)], EXPR$3=[=($8, 4)], EXPR$4=[=($8, 5)], EXPR$5=[=($8, 6)], EXPR$6=[=($8, 7)], EXPR$7=[=($8, 8)], EXPR$8=[=($8, 9)], EXPR$9=[=($8, 10)], EXPR$10=[=($8, 11)], EXPR$11=[=($8, 12)]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(sm_ship_mode_sk=[$0]) - HiveFilter(condition=[IN($4, _UTF-16LE'AIRBORNE', _UTF-16LE'DIAMOND')]) + HiveFilter(condition=[OR(=($4, _UTF-16LE'DIAMOND'), =($4, _UTF-16LE'AIRBORNE'))]) HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query71.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query71.q.out index 5928e8144af6..52f65c94dcf1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query71.q.out @@ -17,29 +17,26 @@ HiveProject(brand_id=[$0], brand=[$1], t_hour=[$2], t_minute=[$3], ext_price=[$4 HiveProject(ws_sold_time_sk=[$0], ws_item_sk=[$2], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ext_price=[$2], sold_item_sk=[$1], time_sk=[$0]) HiveJoin(condition=[=($4, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_time_sk=[$0], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(ext_price=[$2], sold_item_sk=[$1], time_sk=[$0]) HiveJoin(condition=[=($4, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_time_sk=[$0], ss_item_sk=[$1], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) HiveFilter(condition=[=($20, 1)]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(t_time_sk=[$0], t_hour=[$3], t_minute=[$4]) - HiveFilter(condition=[IN($9, _UTF-16LE'breakfast', _UTF-16LE'dinner')]) + HiveFilter(condition=[OR(=($9, _UTF-16LE'dinner'), =($9, _UTF-16LE'breakfast'))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query74.q.out index 442e0f18163b..5f0f56ba21ee 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query74.q.out @@ -28,12 +28,14 @@ HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ HiveAggregate(group=[{5, 6, 7}], agg#0=[sum($1)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$2], ss_net_paid=[$19], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1999)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(ss_customer_sk=[$2], ss_net_paid=[$19], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1999)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(customer_id=[$0], year_total=[$1], EXPR$0=[>($1, 0:DECIMAL(1, 0))]) @@ -41,24 +43,22 @@ HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ HiveAggregate(group=[{5}], agg#0=[sum($1)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$2], ss_net_paid=[$19], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableSpool(table=[[cte, cte_suggestion_3]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(c_customer_id=[$0], $f1=[$1]) HiveAggregate(group=[{5}], agg#0=[sum($1)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_customer_sk=[$3], ws_net_paid=[$28], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1999)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(ws_bill_customer_sk=[$3], ws_net_paid=[$28], ws_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($33))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(customer_id=[$0], year_total=[$1], EXPR$1=[>($1, 0:DECIMAL(1, 0))]) @@ -66,12 +66,8 @@ HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ HiveAggregate(group=[{5}], agg#0=[sum($1)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_bill_customer_sk=[$3], ws_net_paid=[$28], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query75.q.out index 73ff1e22219b..50de126dce24 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query75.q.out @@ -49,45 +49,42 @@ HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_i HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($5, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_order_number=[$16], cs_quantity=[$17], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports '), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableSpool(table=[[cte, cte_suggestion_4]]) + HiveProject(cs_item_sk=[$14], cs_order_number=[$16], cs_quantity=[$17], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports '), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8], sr_return_quantity=[$9], sr_return_amt=[$10]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($5, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_ticket_number=[$8], ss_quantity=[$9], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports '), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableSpool(table=[[cte, cte_suggestion_5]]) + HiveProject(ss_item_sk=[$1], ss_ticket_number=[$8], ss_quantity=[$9], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_quantity=[$13], wr_return_amt=[$14]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($5, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_order_number=[$16], ws_quantity=[$17], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports '), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(ws_item_sk=[$2], ws_order_number=[$16], ws_quantity=[$17], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) @@ -104,43 +101,28 @@ HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_i HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($5, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_order_number=[$16], cs_quantity=[$17], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports '), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableScan(table=[[cte, cte_suggestion_4]], table:alias=[cte_suggestion_4]) + HiveTableSpool(table=[[cte, cte_suggestion_3]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8], sr_return_quantity=[$9], sr_return_amt=[$10]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($5, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_ticket_number=[$8], ss_quantity=[$9], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports '), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableScan(table=[[cte, cte_suggestion_5]], table:alias=[cte_suggestion_5]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_quantity=[$13], wr_return_amt=[$14]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($5, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_order_number=[$16], ws_quantity=[$17], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports '), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) + HiveTableScan(table=[[cte, cte_suggestion_3]], table:alias=[cte_suggestion_3]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query78.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query78.q.out index 7ba907f42afd..27e057b02ac9 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query78.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query78.q.out @@ -19,9 +19,10 @@ HiveProject(ss_sold_year=[CAST(2000):INTEGER], ss_item_sk=[$0], ss_customer_sk=[ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ws_item_sk=[$0], ws_bill_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) HiveFilter(condition=[>($2, 0)]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)]) @@ -33,9 +34,7 @@ HiveProject(ss_sold_year=[CAST(2000):INTEGER], ss_item_sk=[$0], ss_customer_sk=[ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject($f2=[$1], $f3=[$2], $f4=[$3], $f5=[$4]) HiveFilter(condition=[>($2, 0)]) HiveProject(cs_item_sk=[$1], cs_bill_customer_sk=[$0], $f2=[$2], $f3=[$3], $f4=[$4]) @@ -48,7 +47,5 @@ HiveProject(ss_sold_year=[CAST(2000):INTEGER], ss_item_sk=[$0], ss_customer_sk=[ HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out index f8676ae03592..1df2a4298f57 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out @@ -32,14 +32,16 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8], sr_return_amt=[$10], sr_net_loss=[$18]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveTableSpool(table=[[cte, cte_suggestion_2]]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveFilter(condition=[AND(<=(1998-08-04 00:00:00, CAST($2):TIMESTAMP(9)), <=(CAST($2):TIMESTAMP(9), 1998-09-03 00:00:00))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveTableScan(table=[[default, store]], table:alias=[store]) @@ -56,14 +58,10 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_return_amount=[$17], cr_net_loss=[$25]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveFilter(condition=[AND(<=(1998-08-04 00:00:00, CAST($2):TIMESTAMP(9)), <=(CAST($2):TIMESTAMP(9), 1998-09-03 00:00:00))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) @@ -80,14 +78,10 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) + HiveTableScan(table=[[cte, cte_suggestion_2]], table:alias=[cte_suggestion_2]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveFilter(condition=[AND(<=(1998-08-04 00:00:00, CAST($2):TIMESTAMP(9)), <=(CAST($2):TIMESTAMP(9), 1998-09-03 00:00:00))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(web_site_sk=[$0], web_site_id=[$1]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query81.q.out index a8ee1e33b08e..db90ac6b4338 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query81.q.out @@ -26,30 +26,28 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) HiveAggregate(group=[{1, 2}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8]) - HiveFilter(condition=[IS NOT NULL($8)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cr_returning_customer_sk=[$6], cr_returning_addr_sk=[$9], cr_return_amt_inc_tax=[$19], cr_returned_date_sk=[$26]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($6), IS NOT NULL($26))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_state=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) HiveProject(ca_state=[$0], cr_returning_customer_sk=[$1], $f2=[$2]) HiveAggregate(group=[{1, 2}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8]) - HiveFilter(condition=[IS NOT NULL($8)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cr_returning_customer_sk=[$6], cr_returning_addr_sk=[$9], cr_return_amt_inc_tax=[$19], cr_returned_date_sk=[$26]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($26))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query83.q.out index d28432426ab6..d60b145b0198 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query83.q.out @@ -26,16 +26,17 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cr_item_sk=[$1], cr_return_quantity=[$16], cr_returned_date_sk=[$26]) HiveFilter(condition=[IS NOT NULL($26)]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) + HiveFilter(condition=[AND(OR(=($2, 1998-01-02), =($2, 1998-10-15), =($2, 1998-11-10)), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) @@ -47,16 +48,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(sr_item_sk=[$1], sr_return_quantity=[$9], sr_returned_date_sk=[$19]) HiveFilter(condition=[IS NOT NULL($19)]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(d_date=[$0]) HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) + HiveFilter(condition=[AND(OR(=($2, 1998-01-02), =($2, 1998-10-15), =($2, 1998-11-10)), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) @@ -68,16 +67,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(wr_item_sk=[$1], wr_return_quantity=[$13], wr_returned_date_sk=[$23]) HiveFilter(condition=[IS NOT NULL($23)]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject(d_date=[$0]) HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) + HiveFilter(condition=[AND(OR(=($2, 1998-01-02), =($2, 1998-10-15), =($2, 1998-11-10)), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query88.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query88.q.out index f7144f7a23c1..ea4af8fe416f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query88.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query88.q.out @@ -13,13 +13,13 @@ HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) -Warning: Map Join MAPJOIN[605][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[604][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[603][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[602][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[601][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[600][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[599][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[589][bigTable=?] in task 'Reducer 25' is a cross product +Warning: Map Join MAPJOIN[588][bigTable=?] in task 'Reducer 25' is a cross product +Warning: Map Join MAPJOIN[587][bigTable=?] in task 'Reducer 25' is a cross product +Warning: Map Join MAPJOIN[586][bigTable=?] in task 'Reducer 25' is a cross product +Warning: Map Join MAPJOIN[585][bigTable=?] in task 'Reducer 25' is a cross product +Warning: Map Join MAPJOIN[584][bigTable=?] in task 'Reducer 25' is a cross product +Warning: Map Join MAPJOIN[583][bigTable=?] in task 'Reducer 25' is a cross product CBO PLAN: HiveProject($f0=[$0], $f00=[$7], $f01=[$6], $f02=[$5], $f03=[$4], $f04=[$3], $f05=[$2], $f06=[$1]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) @@ -34,135 +34,109 @@ HiveProject($f0=[$0], $f00=[$7], $f01=[$6], $f02=[$5], $f03=[$4], $f04=[$3], $f0 HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableSpool(table=[[cte, cte_suggestion_1]]) + HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(t_time_sk=[$0]) HiveFilter(condition=[AND(=($3, 8), >=($4, 30))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(<=($4, 5), IN($3, 0, 1, 3), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveFilter(condition=[AND(<=($4, 5), OR(=($3, 0), =($3, 1), =($3, 3)), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($5, _UTF-16LE'ese')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(t_time_sk=[$0]) HiveFilter(condition=[AND(=($3, 12), <($4, 30))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(<=($4, 5), IN($3, 0, 1, 3), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveFilter(condition=[AND(<=($4, 5), OR(=($3, 0), =($3, 1), =($3, 3)), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($5, _UTF-16LE'ese')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(t_time_sk=[$0]) HiveFilter(condition=[AND(=($3, 11), >=($4, 30))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(<=($4, 5), IN($3, 0, 1, 3), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveFilter(condition=[AND(<=($4, 5), OR(=($3, 0), =($3, 1), =($3, 3)), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($5, _UTF-16LE'ese')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(t_time_sk=[$0]) HiveFilter(condition=[AND(=($3, 11), <($4, 30))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(<=($4, 5), IN($3, 0, 1, 3), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveFilter(condition=[AND(<=($4, 5), OR(=($3, 0), =($3, 1), =($3, 3)), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($5, _UTF-16LE'ese')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(t_time_sk=[$0]) HiveFilter(condition=[AND(=($3, 10), >=($4, 30))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(<=($4, 5), IN($3, 0, 1, 3), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveFilter(condition=[AND(<=($4, 5), OR(=($3, 0), =($3, 1), =($3, 3)), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($5, _UTF-16LE'ese')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(t_time_sk=[$0]) HiveFilter(condition=[AND(=($3, 10), <($4, 30))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(<=($4, 5), IN($3, 0, 1, 3), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveFilter(condition=[AND(<=($4, 5), OR(=($3, 0), =($3, 1), =($3, 3)), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($5, _UTF-16LE'ese')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(t_time_sk=[$0]) HiveFilter(condition=[AND(=($3, 9), >=($4, 30))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(<=($4, 5), IN($3, 0, 1, 3), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveFilter(condition=[AND(<=($4, 5), OR(=($3, 0), =($3, 1), =($3, 3)), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($5, _UTF-16LE'ese')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_time_sk=[$0], ss_hdemo_sk=[$4], ss_store_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(t_time_sk=[$0]) HiveFilter(condition=[AND(=($3, 9), <($4, 30))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(<=($4, 5), IN($3, 0, 1, 3), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveFilter(condition=[AND(<=($4, 5), OR(=($3, 0), =($3, 1), =($3, 3)), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($5, _UTF-16LE'ese')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query90.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query90.q.out index d5b6ac7baab0..ae37b772d453 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query90.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query90.q.out @@ -12,7 +12,7 @@ HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[=($3, 8)]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) -Warning: Map Join MAPJOIN[149][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[153][bigTable=?] in task 'Reducer 7' is a cross product CBO PLAN: HiveProject(am_pm_ratio=[/(CAST($0):DECIMAL(15, 4), CAST($1):DECIMAL(15, 4))]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) @@ -25,14 +25,15 @@ HiveProject(am_pm_ratio=[/(CAST($0):DECIMAL(15, 4), CAST($1):DECIMAL(15, 4))]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11), IS NOT NULL($9))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(wp_web_page_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $10, 5000, 5200)]) + HiveFilter(condition=[AND(<=(5000, $10), <=($10, 5200))]) HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) HiveProject(t_time_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $3, 6, 7)]) + HiveFilter(condition=[AND(<=(6, $3), <=($3, 7))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[=($3, 8)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveTableSpool(table=[[cte, cte_suggestion_0]]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($3, 8)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -42,12 +43,10 @@ HiveProject(am_pm_ratio=[/(CAST($0):DECIMAL(15, 4), CAST($1):DECIMAL(15, 4))]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11), IS NOT NULL($9))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(wp_web_page_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $10, 5000, 5200)]) + HiveFilter(condition=[AND(<=(5000, $10), <=($10, 5200))]) HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) HiveProject(t_time_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $3, 14, 15)]) + HiveFilter(condition=[AND(<=(14, $3), <=($3, 15))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[=($3, 8)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveTableScan(table=[[cte, cte_suggestion_0]], table:alias=[cte_suggestion_0])