diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 7a69e63096..1af130e00a 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -371,6 +371,13 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim inputs: Seq[Attribute], binding: Boolean, conf: SQLConf): Option[AggExpr] = { + + if (aggExpr.isDistinct) { + // https://github.com/apache/datafusion-comet/issues/1260 + withInfo(aggExpr, "distinct aggregates are not supported") + return None + } + aggExpr.aggregateFunction match { case s @ Sum(child, _) if sumDataTypeSupported(s.dataType) && isLegacyMode(s) => val childExpr = exprToProto(child, inputs, binding) diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16/explain.txt index cd2997591f..dc963d61f6 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (41) -+- CometHashAggregate (40) +* HashAggregate (41) ++- * CometColumnarToRow (40) +- CometColumnarExchange (39) +- * HashAggregate (38) +- * HashAggregate (37) @@ -225,11 +225,13 @@ Results [3]: [sum#20, sum#21, count#25] Input [3]: [sum#20, sum#21, count#25] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] -(40) CometHashAggregate +(40) CometColumnarToRow [codegen id : 2] +Input [3]: [sum#20, sum#21, count#25] + +(41) HashAggregate [codegen id : 2] Input [3]: [sum#20, sum#21, count#25] Keys: [] Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] - -(41) CometColumnarToRow [codegen id : 2] -Input [3]: [order count #26, total shipping cost #27, total net profit #28] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #28] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16/simplified.txt index aaa206da7d..daecc86311 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16/simplified.txt @@ -1,7 +1,7 @@ WholeStageCodegen (2) - CometColumnarToRow - InputAdapter - CometHashAggregate [order count ,total shipping cost ,total net profit ,sum,sum,count,count(cs_order_number),sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #1 WholeStageCodegen (1) HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28/explain.txt index 93b5ae1288..98f8e50c1f 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28/explain.txt @@ -4,8 +4,8 @@ : :- * BroadcastNestedLoopJoin Inner BuildRight (50) : : :- * BroadcastNestedLoopJoin Inner BuildRight (37) : : : :- * BroadcastNestedLoopJoin Inner BuildRight (24) -: : : : :- * CometColumnarToRow (11) -: : : : : +- CometHashAggregate (10) +: : : : :- * HashAggregate (11) +: : : : : +- * CometColumnarToRow (10) : : : : : +- CometColumnarExchange (9) : : : : : +- * HashAggregate (8) : : : : : +- * HashAggregate (7) @@ -16,8 +16,8 @@ : : : : : +- CometFilter (2) : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) : : : : +- BroadcastExchange (23) -: : : : +- * CometColumnarToRow (22) -: : : : +- CometHashAggregate (21) +: : : : +- * HashAggregate (22) +: : : : +- * CometColumnarToRow (21) : : : : +- CometColumnarExchange (20) : : : : +- * HashAggregate (19) : : : : +- * HashAggregate (18) @@ -28,8 +28,8 @@ : : : : +- CometFilter (13) : : : : +- CometScan parquet spark_catalog.default.store_sales (12) : : : +- BroadcastExchange (36) -: : : +- * CometColumnarToRow (35) -: : : +- CometHashAggregate (34) +: : : +- * HashAggregate (35) +: : : +- * CometColumnarToRow (34) : : : +- CometColumnarExchange (33) : : : +- * HashAggregate (32) : : : +- * HashAggregate (31) @@ -40,8 +40,8 @@ : : : +- CometFilter (26) : : : +- CometScan parquet spark_catalog.default.store_sales (25) : : +- BroadcastExchange (49) -: : +- * CometColumnarToRow (48) -: : +- CometHashAggregate (47) +: : +- * HashAggregate (48) +: : +- * CometColumnarToRow (47) : : +- CometColumnarExchange (46) : : +- * HashAggregate (45) : : +- * HashAggregate (44) @@ -52,8 +52,8 @@ : : +- CometFilter (39) : : +- CometScan parquet spark_catalog.default.store_sales (38) : +- BroadcastExchange (62) -: +- * CometColumnarToRow (61) -: +- CometHashAggregate (60) +: +- * HashAggregate (61) +: +- * CometColumnarToRow (60) : +- CometColumnarExchange (59) : +- * HashAggregate (58) : +- * HashAggregate (57) @@ -64,8 +64,8 @@ : +- CometFilter (52) : +- CometScan parquet spark_catalog.default.store_sales (51) +- BroadcastExchange (75) - +- * CometColumnarToRow (74) - +- CometHashAggregate (73) + +- * HashAggregate (74) + +- * CometColumnarToRow (73) +- CometColumnarExchange (72) +- * HashAggregate (71) +- * HashAggregate (70) @@ -122,13 +122,15 @@ Results [4]: [sum#6, count#7, count#8, count#12] Input [4]: [sum#6, count#7, count#8, count#12] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] -(10) CometHashAggregate +(10) CometColumnarToRow [codegen id : 12] +Input [4]: [sum#6, count#7, count#8, count#12] + +(11) HashAggregate [codegen id : 12] Input [4]: [sum#6, count#7, count#8, count#12] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] - -(11) CometColumnarToRow [codegen id : 12] -Input [3]: [B1_LP#13, B1_CNT#14, B1_CNTD#15] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#9, count(ss_list_price#3)#10, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#9 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#10 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] (12) CometScan parquet spark_catalog.default.store_sales Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] @@ -175,13 +177,15 @@ Results [4]: [sum#21, count#22, count#23, count#27] Input [4]: [sum#21, count#22, count#23, count#27] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] -(21) CometHashAggregate +(21) CometColumnarToRow [codegen id : 3] +Input [4]: [sum#21, count#22, count#23, count#27] + +(22) HashAggregate [codegen id : 3] Input [4]: [sum#21, count#22, count#23, count#27] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] - -(22) CometColumnarToRow [codegen id : 3] -Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#24, count(ss_list_price#18)#25, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#24 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#25 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] (23) BroadcastExchange Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] @@ -236,13 +240,15 @@ Results [4]: [sum#36, count#37, count#38, count#42] Input [4]: [sum#36, count#37, count#38, count#42] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=7] -(34) CometHashAggregate +(34) CometColumnarToRow [codegen id : 5] +Input [4]: [sum#36, count#37, count#38, count#42] + +(35) HashAggregate [codegen id : 5] Input [4]: [sum#36, count#37, count#38, count#42] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] - -(35) CometColumnarToRow [codegen id : 5] -Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#39, count(ss_list_price#33)#40, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#39 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#40 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] (36) BroadcastExchange Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] @@ -297,13 +303,15 @@ Results [4]: [sum#51, count#52, count#53, count#57] Input [4]: [sum#51, count#52, count#53, count#57] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=10] -(47) CometHashAggregate +(47) CometColumnarToRow [codegen id : 7] +Input [4]: [sum#51, count#52, count#53, count#57] + +(48) HashAggregate [codegen id : 7] Input [4]: [sum#51, count#52, count#53, count#57] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] - -(48) CometColumnarToRow [codegen id : 7] -Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#54, count(ss_list_price#48)#55, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#54 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#55 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] (49) BroadcastExchange Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] @@ -358,13 +366,15 @@ Results [4]: [sum#66, count#67, count#68, count#72] Input [4]: [sum#66, count#67, count#68, count#72] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=13] -(60) CometHashAggregate +(60) CometColumnarToRow [codegen id : 9] +Input [4]: [sum#66, count#67, count#68, count#72] + +(61) HashAggregate [codegen id : 9] Input [4]: [sum#66, count#67, count#68, count#72] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] - -(61) CometColumnarToRow [codegen id : 9] -Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#69, count(ss_list_price#63)#70, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#69 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#70 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] (62) BroadcastExchange Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] @@ -419,13 +429,15 @@ Results [4]: [sum#81, count#82, count#83, count#87] Input [4]: [sum#81, count#82, count#83, count#87] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=16] -(73) CometHashAggregate +(73) CometColumnarToRow [codegen id : 11] +Input [4]: [sum#81, count#82, count#83, count#87] + +(74) HashAggregate [codegen id : 11] Input [4]: [sum#81, count#82, count#83, count#87] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] - -(74) CometColumnarToRow [codegen id : 11] -Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#84, count(ss_list_price#78)#85, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#84 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#85 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] (75) BroadcastExchange Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28/simplified.txt index 9c63ca3fe1..e62a830d18 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28/simplified.txt @@ -4,9 +4,9 @@ WholeStageCodegen (12) BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin - CometColumnarToRow - InputAdapter - CometHashAggregate [B1_LP,B1_CNT,B1_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #1 WholeStageCodegen (1) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -21,9 +21,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #3 WholeStageCodegen (3) - CometColumnarToRow - InputAdapter - CometHashAggregate [B2_LP,B2_CNT,B2_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #4 WholeStageCodegen (2) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -38,9 +38,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #6 WholeStageCodegen (5) - CometColumnarToRow - InputAdapter - CometHashAggregate [B3_LP,B3_CNT,B3_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #7 WholeStageCodegen (4) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -55,9 +55,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #9 WholeStageCodegen (7) - CometColumnarToRow - InputAdapter - CometHashAggregate [B4_LP,B4_CNT,B4_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #10 WholeStageCodegen (6) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -72,9 +72,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #12 WholeStageCodegen (9) - CometColumnarToRow - InputAdapter - CometHashAggregate [B5_LP,B5_CNT,B5_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #13 WholeStageCodegen (8) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -89,9 +89,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #15 WholeStageCodegen (11) - CometColumnarToRow - InputAdapter - CometHashAggregate [B6_LP,B6_CNT,B6_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #16 WholeStageCodegen (10) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94/explain.txt index b104e6330e..54db114394 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (41) -+- CometHashAggregate (40) +* HashAggregate (41) ++- * CometColumnarToRow (40) +- CometColumnarExchange (39) +- * HashAggregate (38) +- * HashAggregate (37) @@ -225,11 +225,13 @@ Results [3]: [sum#20, sum#21, count#25] Input [3]: [sum#20, sum#21, count#25] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] -(40) CometHashAggregate +(40) CometColumnarToRow [codegen id : 2] +Input [3]: [sum#20, sum#21, count#25] + +(41) HashAggregate [codegen id : 2] Input [3]: [sum#20, sum#21, count#25] Keys: [] Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] - -(41) CometColumnarToRow [codegen id : 2] -Input [3]: [order count #26, total shipping cost #27, total net profit #28] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #28] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94/simplified.txt index 9b28886066..9001c8e061 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94/simplified.txt @@ -1,7 +1,7 @@ WholeStageCodegen (2) - CometColumnarToRow - InputAdapter - CometHashAggregate [order count ,total shipping cost ,total net profit ,sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #1 WholeStageCodegen (1) HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95/explain.txt index df85f06442..7fc71a21b0 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (54) -+- CometHashAggregate (53) +* HashAggregate (54) ++- * CometColumnarToRow (53) +- CometColumnarExchange (52) +- * HashAggregate (51) +- * HashAggregate (50) @@ -292,11 +292,13 @@ Results [3]: [sum#25, sum#26, count#30] Input [3]: [sum#25, sum#26, count#30] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] -(53) CometHashAggregate +(53) CometColumnarToRow [codegen id : 2] +Input [3]: [sum#25, sum#26, count#30] + +(54) HashAggregate [codegen id : 2] Input [3]: [sum#25, sum#26, count#30] Keys: [] Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] - -(54) CometColumnarToRow [codegen id : 2] -Input [3]: [order count #31, total shipping cost #32, total net profit #33] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#29] +Results [3]: [count(ws_order_number#4)#29 AS order count #31, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #32, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #33] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95/simplified.txt index 7ad730a339..b1aaa8fc44 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95/simplified.txt @@ -1,7 +1,7 @@ WholeStageCodegen (2) - CometColumnarToRow - InputAdapter - CometHashAggregate [order count ,total shipping cost ,total net profit ,sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #1 WholeStageCodegen (1) HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt index cd2997591f..dc963d61f6 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (41) -+- CometHashAggregate (40) +* HashAggregate (41) ++- * CometColumnarToRow (40) +- CometColumnarExchange (39) +- * HashAggregate (38) +- * HashAggregate (37) @@ -225,11 +225,13 @@ Results [3]: [sum#20, sum#21, count#25] Input [3]: [sum#20, sum#21, count#25] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] -(40) CometHashAggregate +(40) CometColumnarToRow [codegen id : 2] +Input [3]: [sum#20, sum#21, count#25] + +(41) HashAggregate [codegen id : 2] Input [3]: [sum#20, sum#21, count#25] Keys: [] Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] - -(41) CometColumnarToRow [codegen id : 2] -Input [3]: [order count #26, total shipping cost #27, total net profit #28] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #28] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index aaa206da7d..daecc86311 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,7 +1,7 @@ WholeStageCodegen (2) - CometColumnarToRow - InputAdapter - CometHashAggregate [order count ,total shipping cost ,total net profit ,sum,sum,count,count(cs_order_number),sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #1 WholeStageCodegen (1) HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt index 93b5ae1288..98f8e50c1f 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt @@ -4,8 +4,8 @@ : :- * BroadcastNestedLoopJoin Inner BuildRight (50) : : :- * BroadcastNestedLoopJoin Inner BuildRight (37) : : : :- * BroadcastNestedLoopJoin Inner BuildRight (24) -: : : : :- * CometColumnarToRow (11) -: : : : : +- CometHashAggregate (10) +: : : : :- * HashAggregate (11) +: : : : : +- * CometColumnarToRow (10) : : : : : +- CometColumnarExchange (9) : : : : : +- * HashAggregate (8) : : : : : +- * HashAggregate (7) @@ -16,8 +16,8 @@ : : : : : +- CometFilter (2) : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) : : : : +- BroadcastExchange (23) -: : : : +- * CometColumnarToRow (22) -: : : : +- CometHashAggregate (21) +: : : : +- * HashAggregate (22) +: : : : +- * CometColumnarToRow (21) : : : : +- CometColumnarExchange (20) : : : : +- * HashAggregate (19) : : : : +- * HashAggregate (18) @@ -28,8 +28,8 @@ : : : : +- CometFilter (13) : : : : +- CometScan parquet spark_catalog.default.store_sales (12) : : : +- BroadcastExchange (36) -: : : +- * CometColumnarToRow (35) -: : : +- CometHashAggregate (34) +: : : +- * HashAggregate (35) +: : : +- * CometColumnarToRow (34) : : : +- CometColumnarExchange (33) : : : +- * HashAggregate (32) : : : +- * HashAggregate (31) @@ -40,8 +40,8 @@ : : : +- CometFilter (26) : : : +- CometScan parquet spark_catalog.default.store_sales (25) : : +- BroadcastExchange (49) -: : +- * CometColumnarToRow (48) -: : +- CometHashAggregate (47) +: : +- * HashAggregate (48) +: : +- * CometColumnarToRow (47) : : +- CometColumnarExchange (46) : : +- * HashAggregate (45) : : +- * HashAggregate (44) @@ -52,8 +52,8 @@ : : +- CometFilter (39) : : +- CometScan parquet spark_catalog.default.store_sales (38) : +- BroadcastExchange (62) -: +- * CometColumnarToRow (61) -: +- CometHashAggregate (60) +: +- * HashAggregate (61) +: +- * CometColumnarToRow (60) : +- CometColumnarExchange (59) : +- * HashAggregate (58) : +- * HashAggregate (57) @@ -64,8 +64,8 @@ : +- CometFilter (52) : +- CometScan parquet spark_catalog.default.store_sales (51) +- BroadcastExchange (75) - +- * CometColumnarToRow (74) - +- CometHashAggregate (73) + +- * HashAggregate (74) + +- * CometColumnarToRow (73) +- CometColumnarExchange (72) +- * HashAggregate (71) +- * HashAggregate (70) @@ -122,13 +122,15 @@ Results [4]: [sum#6, count#7, count#8, count#12] Input [4]: [sum#6, count#7, count#8, count#12] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] -(10) CometHashAggregate +(10) CometColumnarToRow [codegen id : 12] +Input [4]: [sum#6, count#7, count#8, count#12] + +(11) HashAggregate [codegen id : 12] Input [4]: [sum#6, count#7, count#8, count#12] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] - -(11) CometColumnarToRow [codegen id : 12] -Input [3]: [B1_LP#13, B1_CNT#14, B1_CNTD#15] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#9, count(ss_list_price#3)#10, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#9 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#10 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] (12) CometScan parquet spark_catalog.default.store_sales Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] @@ -175,13 +177,15 @@ Results [4]: [sum#21, count#22, count#23, count#27] Input [4]: [sum#21, count#22, count#23, count#27] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] -(21) CometHashAggregate +(21) CometColumnarToRow [codegen id : 3] +Input [4]: [sum#21, count#22, count#23, count#27] + +(22) HashAggregate [codegen id : 3] Input [4]: [sum#21, count#22, count#23, count#27] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] - -(22) CometColumnarToRow [codegen id : 3] -Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#24, count(ss_list_price#18)#25, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#24 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#25 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] (23) BroadcastExchange Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] @@ -236,13 +240,15 @@ Results [4]: [sum#36, count#37, count#38, count#42] Input [4]: [sum#36, count#37, count#38, count#42] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=7] -(34) CometHashAggregate +(34) CometColumnarToRow [codegen id : 5] +Input [4]: [sum#36, count#37, count#38, count#42] + +(35) HashAggregate [codegen id : 5] Input [4]: [sum#36, count#37, count#38, count#42] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] - -(35) CometColumnarToRow [codegen id : 5] -Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#39, count(ss_list_price#33)#40, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#39 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#40 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] (36) BroadcastExchange Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] @@ -297,13 +303,15 @@ Results [4]: [sum#51, count#52, count#53, count#57] Input [4]: [sum#51, count#52, count#53, count#57] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=10] -(47) CometHashAggregate +(47) CometColumnarToRow [codegen id : 7] +Input [4]: [sum#51, count#52, count#53, count#57] + +(48) HashAggregate [codegen id : 7] Input [4]: [sum#51, count#52, count#53, count#57] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] - -(48) CometColumnarToRow [codegen id : 7] -Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#54, count(ss_list_price#48)#55, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#54 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#55 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] (49) BroadcastExchange Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] @@ -358,13 +366,15 @@ Results [4]: [sum#66, count#67, count#68, count#72] Input [4]: [sum#66, count#67, count#68, count#72] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=13] -(60) CometHashAggregate +(60) CometColumnarToRow [codegen id : 9] +Input [4]: [sum#66, count#67, count#68, count#72] + +(61) HashAggregate [codegen id : 9] Input [4]: [sum#66, count#67, count#68, count#72] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] - -(61) CometColumnarToRow [codegen id : 9] -Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#69, count(ss_list_price#63)#70, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#69 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#70 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] (62) BroadcastExchange Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] @@ -419,13 +429,15 @@ Results [4]: [sum#81, count#82, count#83, count#87] Input [4]: [sum#81, count#82, count#83, count#87] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=16] -(73) CometHashAggregate +(73) CometColumnarToRow [codegen id : 11] +Input [4]: [sum#81, count#82, count#83, count#87] + +(74) HashAggregate [codegen id : 11] Input [4]: [sum#81, count#82, count#83, count#87] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] - -(74) CometColumnarToRow [codegen id : 11] -Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#84, count(ss_list_price#78)#85, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#84 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#85 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] (75) BroadcastExchange Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt index 9c63ca3fe1..e62a830d18 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt @@ -4,9 +4,9 @@ WholeStageCodegen (12) BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin - CometColumnarToRow - InputAdapter - CometHashAggregate [B1_LP,B1_CNT,B1_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #1 WholeStageCodegen (1) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -21,9 +21,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #3 WholeStageCodegen (3) - CometColumnarToRow - InputAdapter - CometHashAggregate [B2_LP,B2_CNT,B2_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #4 WholeStageCodegen (2) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -38,9 +38,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #6 WholeStageCodegen (5) - CometColumnarToRow - InputAdapter - CometHashAggregate [B3_LP,B3_CNT,B3_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #7 WholeStageCodegen (4) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -55,9 +55,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #9 WholeStageCodegen (7) - CometColumnarToRow - InputAdapter - CometHashAggregate [B4_LP,B4_CNT,B4_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #10 WholeStageCodegen (6) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -72,9 +72,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #12 WholeStageCodegen (9) - CometColumnarToRow - InputAdapter - CometHashAggregate [B5_LP,B5_CNT,B5_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #13 WholeStageCodegen (8) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] @@ -89,9 +89,9 @@ WholeStageCodegen (12) InputAdapter BroadcastExchange #15 WholeStageCodegen (11) - CometColumnarToRow - InputAdapter - CometHashAggregate [B6_LP,B6_CNT,B6_CNTD,sum,count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #16 WholeStageCodegen (10) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt index b104e6330e..54db114394 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (41) -+- CometHashAggregate (40) +* HashAggregate (41) ++- * CometColumnarToRow (40) +- CometColumnarExchange (39) +- * HashAggregate (38) +- * HashAggregate (37) @@ -225,11 +225,13 @@ Results [3]: [sum#20, sum#21, count#25] Input [3]: [sum#20, sum#21, count#25] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] -(40) CometHashAggregate +(40) CometColumnarToRow [codegen id : 2] +Input [3]: [sum#20, sum#21, count#25] + +(41) HashAggregate [codegen id : 2] Input [3]: [sum#20, sum#21, count#25] Keys: [] Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] - -(41) CometColumnarToRow [codegen id : 2] -Input [3]: [order count #26, total shipping cost #27, total net profit #28] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #28] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index 9b28886066..9001c8e061 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,7 +1,7 @@ WholeStageCodegen (2) - CometColumnarToRow - InputAdapter - CometHashAggregate [order count ,total shipping cost ,total net profit ,sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #1 WholeStageCodegen (1) HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt index 85b3a27988..1d982a25cd 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (54) -+- CometHashAggregate (53) +* HashAggregate (54) ++- * CometColumnarToRow (53) +- CometColumnarExchange (52) +- * HashAggregate (51) +- * HashAggregate (50) @@ -292,11 +292,13 @@ Results [3]: [sum#21, sum#22, count#26] Input [3]: [sum#21, sum#22, count#26] Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] -(53) CometHashAggregate +(53) CometColumnarToRow [codegen id : 2] +Input [3]: [sum#21, sum#22, count#26] + +(54) HashAggregate [codegen id : 2] Input [3]: [sum#21, sum#22, count#26] Keys: [] Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] - -(54) CometColumnarToRow [codegen id : 2] -Input [3]: [order count #27, total shipping cost #28, total net profit #29] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#23, sum(UnscaledValue(ws_net_profit#6))#24, count(ws_order_number#4)#25] +Results [3]: [count(ws_order_number#4)#25 AS order count #27, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#23,17,2) AS total shipping cost #28, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#24,17,2) AS total net profit #29] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 7ad730a339..b1aaa8fc44 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,7 +1,7 @@ WholeStageCodegen (2) - CometColumnarToRow - InputAdapter - CometHashAggregate [order count ,total shipping cost ,total net profit ,sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + CometColumnarToRow + InputAdapter CometColumnarExchange #1 WholeStageCodegen (1) HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] diff --git a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala index 615ca591a3..6795f3d55d 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala @@ -974,7 +974,8 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("distinct") { + // TODO enable once https://github.com/apache/datafusion-comet/issues/1267 is implemented + ignore("distinct") { withSQLConf(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true") { Seq("native", "jvm").foreach { cometShuffleMode => withSQLConf(CometConf.COMET_SHUFFLE_MODE.key -> cometShuffleMode) {