diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt new file mode 100644 index 0000000000000..c46fce21c25a2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt @@ -0,0 +1,362 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildRight (29) + : :- * HashAggregate (15) + : : +- Exchange (14) + : : +- * HashAggregate (13) + : : +- * Project (12) + : : +- * BroadcastHashJoin Inner BuildRight (11) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_returns (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- ReusedExchange (10) + : +- BroadcastExchange (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet default.catalog_returns (16) + : : +- ReusedExchange (19) + : +- ReusedExchange (22) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_returns (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) + + +(1) Scan parquet default.store_returns +Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#3), dynamicpruningexpression(sr_returned_date_sk#3 IN dynamicpruning#4)] +PushedFilters: [IsNotNull(sr_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] + +(3) Filter [codegen id : 5] +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Condition : isnotnull(sr_item_sk#1) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#5, i_item_id#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#5, i_item_id#6] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#5, i_item_id#6] +Condition : (isnotnull(i_item_sk#5) AND isnotnull(i_item_id#6)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#5, i_item_id#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_item_sk#1] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6] +Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#5, i_item_id#6] + +(10) ReusedExchange [Reuses operator id: 62] +Output [1]: [d_date_sk#8] + +(11) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#3] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(12) Project [codegen id : 5] +Output [2]: [sr_return_quantity#2, i_item_id#6] +Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6, d_date_sk#8] + +(13) HashAggregate [codegen id : 5] +Input [2]: [sr_return_quantity#2, i_item_id#6] +Keys [1]: [i_item_id#6] +Functions [1]: [partial_sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum#9] +Results [2]: [i_item_id#6, sum#10] + +(14) Exchange +Input [2]: [i_item_id#6, sum#10] +Arguments: hashpartitioning(i_item_id#6, 5), ENSURE_REQUIREMENTS, [id=#11] + +(15) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#6, sum#10] +Keys [1]: [i_item_id#6] +Functions [1]: [sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#12] +Results [2]: [i_item_id#6 AS item_id#13, sum(sr_return_quantity#2)#12 AS sr_item_qty#14] + +(16) Scan parquet default.catalog_returns +Output [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#17), dynamicpruningexpression(cr_returned_date_sk#17 IN dynamicpruning#4)] +PushedFilters: [IsNotNull(cr_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 10] +Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] + +(18) Filter [codegen id : 10] +Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Condition : isnotnull(cr_item_sk#15) + +(19) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#18, i_item_id#19] + +(20) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_item_sk#15] +Right keys [1]: [i_item_sk#18] +Join condition: None + +(21) Project [codegen id : 10] +Output [3]: [cr_return_quantity#16, cr_returned_date_sk#17, i_item_id#19] +Input [5]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17, i_item_sk#18, i_item_id#19] + +(22) ReusedExchange [Reuses operator id: 62] +Output [1]: [d_date_sk#20] + +(23) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_returned_date_sk#17] +Right keys [1]: [d_date_sk#20] +Join condition: None + +(24) Project [codegen id : 10] +Output [2]: [cr_return_quantity#16, i_item_id#19] +Input [4]: [cr_return_quantity#16, cr_returned_date_sk#17, i_item_id#19, d_date_sk#20] + +(25) HashAggregate [codegen id : 10] +Input [2]: [cr_return_quantity#16, i_item_id#19] +Keys [1]: [i_item_id#19] +Functions [1]: [partial_sum(cr_return_quantity#16)] +Aggregate Attributes [1]: [sum#21] +Results [2]: [i_item_id#19, sum#22] + +(26) Exchange +Input [2]: [i_item_id#19, sum#22] +Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [id=#23] + +(27) HashAggregate [codegen id : 11] +Input [2]: [i_item_id#19, sum#22] +Keys [1]: [i_item_id#19] +Functions [1]: [sum(cr_return_quantity#16)] +Aggregate Attributes [1]: [sum(cr_return_quantity#16)#24] +Results [2]: [i_item_id#19 AS item_id#25, sum(cr_return_quantity#16)#24 AS cr_item_qty#26] + +(28) BroadcastExchange +Input [2]: [item_id#25, cr_item_qty#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] + +(29) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#13] +Right keys [1]: [item_id#25] +Join condition: None + +(30) Project [codegen id : 18] +Output [3]: [item_id#13, sr_item_qty#14, cr_item_qty#26] +Input [4]: [item_id#13, sr_item_qty#14, item_id#25, cr_item_qty#26] + +(31) Scan parquet default.web_returns +Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#30), dynamicpruningexpression(wr_returned_date_sk#30 IN dynamicpruning#4)] +PushedFilters: [IsNotNull(wr_item_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 16] +Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] + +(33) Filter [codegen id : 16] +Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Condition : isnotnull(wr_item_sk#28) + +(34) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#31, i_item_id#32] + +(35) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_item_sk#28] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(36) Project [codegen id : 16] +Output [3]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32] +Input [5]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, i_item_sk#31, i_item_id#32] + +(37) ReusedExchange [Reuses operator id: 62] +Output [1]: [d_date_sk#33] + +(38) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_returned_date_sk#30] +Right keys [1]: [d_date_sk#33] +Join condition: None + +(39) Project [codegen id : 16] +Output [2]: [wr_return_quantity#29, i_item_id#32] +Input [4]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32, d_date_sk#33] + +(40) HashAggregate [codegen id : 16] +Input [2]: [wr_return_quantity#29, i_item_id#32] +Keys [1]: [i_item_id#32] +Functions [1]: [partial_sum(wr_return_quantity#29)] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#32, sum#35] + +(41) Exchange +Input [2]: [i_item_id#32, sum#35] +Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [id=#36] + +(42) HashAggregate [codegen id : 17] +Input [2]: [i_item_id#32, sum#35] +Keys [1]: [i_item_id#32] +Functions [1]: [sum(wr_return_quantity#29)] +Aggregate Attributes [1]: [sum(wr_return_quantity#29)#37] +Results [2]: [i_item_id#32 AS item_id#38, sum(wr_return_quantity#29)#37 AS wr_item_qty#39] + +(43) BroadcastExchange +Input [2]: [item_id#38, wr_item_qty#39] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] + +(44) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#13] +Right keys [1]: [item_id#38] +Join condition: None + +(45) Project [codegen id : 18] +Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44] +Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39] + +(46) TakeOrderedAndProject +Input [8]: [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Arguments: 100, [item_id#13 ASC NULLS FIRST, sr_item_qty#14 ASC NULLS FIRST], [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 1 Hosting Expression = sr_returned_date_sk#3 IN dynamicpruning#4 +BroadcastExchange (62) ++- * Project (61) + +- * BroadcastHashJoin LeftSemi BuildRight (60) + :- * Filter (49) + : +- * ColumnarToRow (48) + : +- Scan parquet default.date_dim (47) + +- BroadcastExchange (59) + +- * Project (58) + +- * BroadcastHashJoin LeftSemi BuildRight (57) + :- * ColumnarToRow (51) + : +- Scan parquet default.date_dim (50) + +- BroadcastExchange (56) + +- * Project (55) + +- * Filter (54) + +- * ColumnarToRow (53) + +- Scan parquet default.date_dim (52) + + +(47) Scan parquet default.date_dim +Output [2]: [d_date_sk#8, d_date#45] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#8, d_date#45] + +(49) Filter [codegen id : 3] +Input [2]: [d_date_sk#8, d_date#45] +Condition : isnotnull(d_date_sk#8) + +(50) Scan parquet default.date_dim +Output [2]: [d_date#46, d_week_seq#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(51) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#46, d_week_seq#47] + +(52) Scan parquet default.date_dim +Output [2]: [d_date#48, d_week_seq#49] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_date, [2000-06-30,2000-09-27,2000-11-17])] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#48, d_week_seq#49] + +(54) Filter [codegen id : 1] +Input [2]: [d_date#48, d_week_seq#49] +Condition : d_date#48 IN (2000-06-30,2000-09-27,2000-11-17) + +(55) Project [codegen id : 1] +Output [1]: [d_week_seq#49] +Input [2]: [d_date#48, d_week_seq#49] + +(56) BroadcastExchange +Input [1]: [d_week_seq#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] + +(57) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [d_week_seq#47] +Right keys [1]: [d_week_seq#49] +Join condition: None + +(58) Project [codegen id : 2] +Output [1]: [d_date#46] +Input [2]: [d_date#46, d_week_seq#47] + +(59) BroadcastExchange +Input [1]: [d_date#46] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#51] + +(60) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date#45] +Right keys [1]: [d_date#46] +Join condition: None + +(61) Project [codegen id : 3] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_date#45] + +(62) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] + +Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#17 IN dynamicpruning#4 + +Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#30 IN dynamicpruning#4 + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt new file mode 100644 index 0000000000000..29ff19d7450c8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt @@ -0,0 +1,95 @@ +TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + WholeStageCodegen (18) + Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [item_id,sr_item_qty,cr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum] [sum(sr_return_quantity),item_id,sr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,sr_return_quantity] [sum,sum] + Project [sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_return_quantity,sr_returned_date_sk,i_item_id] + BroadcastHashJoin [sr_item_sk,i_item_sk] + Filter [sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + SubqueryBroadcast [d_date_sk] #1 + BroadcastExchange #2 + WholeStageCodegen (3) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date] + BroadcastHashJoin [d_week_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Filter [i_item_sk,i_item_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (11) + HashAggregate [i_item_id,sum] [sum(cr_return_quantity),item_id,cr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (10) + HashAggregate [i_item_id,cr_return_quantity] [sum,sum] + Project [cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_return_quantity,cr_returned_date_sk,i_item_id] + BroadcastHashJoin [cr_item_sk,i_item_sk] + Filter [cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + ReusedSubquery [d_date_sk] #1 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,sum] [sum(wr_return_quantity),item_id,wr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #9 + WholeStageCodegen (16) + HashAggregate [i_item_id,wr_return_quantity] [sum,sum] + Project [wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_return_quantity,wr_returned_date_sk,i_item_id] + BroadcastHashJoin [wr_item_sk,i_item_sk] + Filter [wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + ReusedSubquery [d_date_sk] #1 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + InputAdapter + ReusedExchange [d_date_sk] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt new file mode 100644 index 0000000000000..bda63681ef500 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt @@ -0,0 +1,362 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildRight (29) + : :- * HashAggregate (15) + : : +- Exchange (14) + : : +- * HashAggregate (13) + : : +- * Project (12) + : : +- * BroadcastHashJoin Inner BuildRight (11) + : : :- * Project (6) + : : : +- * BroadcastHashJoin Inner BuildRight (5) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_returns (1) + : : : +- ReusedExchange (4) + : : +- BroadcastExchange (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet default.item (7) + : +- BroadcastExchange (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet default.catalog_returns (16) + : : +- ReusedExchange (19) + : +- ReusedExchange (22) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_returns (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) + + +(1) Scan parquet default.store_returns +Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#3), dynamicpruningexpression(sr_returned_date_sk#3 IN dynamicpruning#4)] +PushedFilters: [IsNotNull(sr_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] + +(3) Filter [codegen id : 5] +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Condition : isnotnull(sr_item_sk#1) + +(4) ReusedExchange [Reuses operator id: 62] +Output [1]: [d_date_sk#5] + +(5) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#3] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(6) Project [codegen id : 5] +Output [2]: [sr_item_sk#1, sr_return_quantity#2] +Input [4]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, d_date_sk#5] + +(7) Scan parquet default.item +Output [2]: [i_item_sk#6, i_item_id#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#6, i_item_id#7] + +(9) Filter [codegen id : 4] +Input [2]: [i_item_sk#6, i_item_id#7] +Condition : (isnotnull(i_item_sk#6) AND isnotnull(i_item_id#7)) + +(10) BroadcastExchange +Input [2]: [i_item_sk#6, i_item_id#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(11) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(12) Project [codegen id : 5] +Output [2]: [sr_return_quantity#2, i_item_id#7] +Input [4]: [sr_item_sk#1, sr_return_quantity#2, i_item_sk#6, i_item_id#7] + +(13) HashAggregate [codegen id : 5] +Input [2]: [sr_return_quantity#2, i_item_id#7] +Keys [1]: [i_item_id#7] +Functions [1]: [partial_sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum#9] +Results [2]: [i_item_id#7, sum#10] + +(14) Exchange +Input [2]: [i_item_id#7, sum#10] +Arguments: hashpartitioning(i_item_id#7, 5), ENSURE_REQUIREMENTS, [id=#11] + +(15) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#7, sum#10] +Keys [1]: [i_item_id#7] +Functions [1]: [sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#12] +Results [2]: [i_item_id#7 AS item_id#13, sum(sr_return_quantity#2)#12 AS sr_item_qty#14] + +(16) Scan parquet default.catalog_returns +Output [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#17), dynamicpruningexpression(cr_returned_date_sk#17 IN dynamicpruning#4)] +PushedFilters: [IsNotNull(cr_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 10] +Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] + +(18) Filter [codegen id : 10] +Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Condition : isnotnull(cr_item_sk#15) + +(19) ReusedExchange [Reuses operator id: 62] +Output [1]: [d_date_sk#18] + +(20) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_returned_date_sk#17] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(21) Project [codegen id : 10] +Output [2]: [cr_item_sk#15, cr_return_quantity#16] +Input [4]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17, d_date_sk#18] + +(22) ReusedExchange [Reuses operator id: 10] +Output [2]: [i_item_sk#19, i_item_id#20] + +(23) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_item_sk#15] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(24) Project [codegen id : 10] +Output [2]: [cr_return_quantity#16, i_item_id#20] +Input [4]: [cr_item_sk#15, cr_return_quantity#16, i_item_sk#19, i_item_id#20] + +(25) HashAggregate [codegen id : 10] +Input [2]: [cr_return_quantity#16, i_item_id#20] +Keys [1]: [i_item_id#20] +Functions [1]: [partial_sum(cr_return_quantity#16)] +Aggregate Attributes [1]: [sum#21] +Results [2]: [i_item_id#20, sum#22] + +(26) Exchange +Input [2]: [i_item_id#20, sum#22] +Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#23] + +(27) HashAggregate [codegen id : 11] +Input [2]: [i_item_id#20, sum#22] +Keys [1]: [i_item_id#20] +Functions [1]: [sum(cr_return_quantity#16)] +Aggregate Attributes [1]: [sum(cr_return_quantity#16)#24] +Results [2]: [i_item_id#20 AS item_id#25, sum(cr_return_quantity#16)#24 AS cr_item_qty#26] + +(28) BroadcastExchange +Input [2]: [item_id#25, cr_item_qty#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] + +(29) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#13] +Right keys [1]: [item_id#25] +Join condition: None + +(30) Project [codegen id : 18] +Output [3]: [item_id#13, sr_item_qty#14, cr_item_qty#26] +Input [4]: [item_id#13, sr_item_qty#14, item_id#25, cr_item_qty#26] + +(31) Scan parquet default.web_returns +Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#30), dynamicpruningexpression(wr_returned_date_sk#30 IN dynamicpruning#4)] +PushedFilters: [IsNotNull(wr_item_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 16] +Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] + +(33) Filter [codegen id : 16] +Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Condition : isnotnull(wr_item_sk#28) + +(34) ReusedExchange [Reuses operator id: 62] +Output [1]: [d_date_sk#31] + +(35) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_returned_date_sk#30] +Right keys [1]: [d_date_sk#31] +Join condition: None + +(36) Project [codegen id : 16] +Output [2]: [wr_item_sk#28, wr_return_quantity#29] +Input [4]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, d_date_sk#31] + +(37) ReusedExchange [Reuses operator id: 10] +Output [2]: [i_item_sk#32, i_item_id#33] + +(38) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_item_sk#28] +Right keys [1]: [i_item_sk#32] +Join condition: None + +(39) Project [codegen id : 16] +Output [2]: [wr_return_quantity#29, i_item_id#33] +Input [4]: [wr_item_sk#28, wr_return_quantity#29, i_item_sk#32, i_item_id#33] + +(40) HashAggregate [codegen id : 16] +Input [2]: [wr_return_quantity#29, i_item_id#33] +Keys [1]: [i_item_id#33] +Functions [1]: [partial_sum(wr_return_quantity#29)] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#33, sum#35] + +(41) Exchange +Input [2]: [i_item_id#33, sum#35] +Arguments: hashpartitioning(i_item_id#33, 5), ENSURE_REQUIREMENTS, [id=#36] + +(42) HashAggregate [codegen id : 17] +Input [2]: [i_item_id#33, sum#35] +Keys [1]: [i_item_id#33] +Functions [1]: [sum(wr_return_quantity#29)] +Aggregate Attributes [1]: [sum(wr_return_quantity#29)#37] +Results [2]: [i_item_id#33 AS item_id#38, sum(wr_return_quantity#29)#37 AS wr_item_qty#39] + +(43) BroadcastExchange +Input [2]: [item_id#38, wr_item_qty#39] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] + +(44) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#13] +Right keys [1]: [item_id#38] +Join condition: None + +(45) Project [codegen id : 18] +Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44] +Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39] + +(46) TakeOrderedAndProject +Input [8]: [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Arguments: 100, [item_id#13 ASC NULLS FIRST, sr_item_qty#14 ASC NULLS FIRST], [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 1 Hosting Expression = sr_returned_date_sk#3 IN dynamicpruning#4 +BroadcastExchange (62) ++- * Project (61) + +- * BroadcastHashJoin LeftSemi BuildRight (60) + :- * Filter (49) + : +- * ColumnarToRow (48) + : +- Scan parquet default.date_dim (47) + +- BroadcastExchange (59) + +- * Project (58) + +- * BroadcastHashJoin LeftSemi BuildRight (57) + :- * ColumnarToRow (51) + : +- Scan parquet default.date_dim (50) + +- BroadcastExchange (56) + +- * Project (55) + +- * Filter (54) + +- * ColumnarToRow (53) + +- Scan parquet default.date_dim (52) + + +(47) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_date#45] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#5, d_date#45] + +(49) Filter [codegen id : 3] +Input [2]: [d_date_sk#5, d_date#45] +Condition : isnotnull(d_date_sk#5) + +(50) Scan parquet default.date_dim +Output [2]: [d_date#46, d_week_seq#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(51) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#46, d_week_seq#47] + +(52) Scan parquet default.date_dim +Output [2]: [d_date#48, d_week_seq#49] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_date, [2000-06-30,2000-09-27,2000-11-17])] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#48, d_week_seq#49] + +(54) Filter [codegen id : 1] +Input [2]: [d_date#48, d_week_seq#49] +Condition : d_date#48 IN (2000-06-30,2000-09-27,2000-11-17) + +(55) Project [codegen id : 1] +Output [1]: [d_week_seq#49] +Input [2]: [d_date#48, d_week_seq#49] + +(56) BroadcastExchange +Input [1]: [d_week_seq#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] + +(57) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [d_week_seq#47] +Right keys [1]: [d_week_seq#49] +Join condition: None + +(58) Project [codegen id : 2] +Output [1]: [d_date#46] +Input [2]: [d_date#46, d_week_seq#47] + +(59) BroadcastExchange +Input [1]: [d_date#46] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#51] + +(60) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date#45] +Right keys [1]: [d_date#46] +Join condition: None + +(61) Project [codegen id : 3] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#45] + +(62) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] + +Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#17 IN dynamicpruning#4 + +Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#30 IN dynamicpruning#4 + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt new file mode 100644 index 0000000000000..7f38503363767 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt @@ -0,0 +1,95 @@ +TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + WholeStageCodegen (18) + Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [item_id,sr_item_qty,cr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum] [sum(sr_return_quantity),item_id,sr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,sr_return_quantity] [sum,sum] + Project [sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_item_sk,i_item_sk] + Project [sr_item_sk,sr_return_quantity] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + SubqueryBroadcast [d_date_sk] #1 + BroadcastExchange #2 + WholeStageCodegen (3) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date] + BroadcastHashJoin [d_week_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk,i_item_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (11) + HashAggregate [i_item_id,sum] [sum(cr_return_quantity),item_id,cr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (10) + HashAggregate [i_item_id,cr_return_quantity] [sum,sum] + Project [cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_item_sk,i_item_sk] + Project [cr_item_sk,cr_return_quantity] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + ReusedSubquery [d_date_sk] #1 + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,sum] [sum(wr_return_quantity),item_id,wr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #9 + WholeStageCodegen (16) + HashAggregate [i_item_id,wr_return_quantity] [sum,sum] + Project [wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_item_sk,i_item_sk] + Project [wr_item_sk,wr_return_quantity] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + ReusedSubquery [d_date_sk] #1 + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala index 262a6920d29c6..a0207e9b01920 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec, ValidateRequirements} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.tags.ExtendedSQLTest // scalastyle:off line.size.limit @@ -82,8 +83,18 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite { def goldenFilePath: String + private val approvedAnsiPlans: Seq[String] = Seq( + "q83", + "q83.sf100" + ) + private def getDirForTest(name: String): File = { - new File(goldenFilePath, name) + val goldenFileName = if (SQLConf.get.ansiEnabled && approvedAnsiPlans.contains(name)) { + name + ".ansi" + } else { + name + } + new File(goldenFilePath, goldenFileName) } private def isApproved(