diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q14.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q14.plan.txt index 634bc09ffa5d..332267e77f3e 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q14.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q14.plan.txt @@ -10,7 +10,7 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPARTITION, HASH, ["i_brand_id_6", "i_category_id_8", "i_class_id_7"]) partial aggregation over (i_brand_id_6, i_category_id_8, i_class_id_7) - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["ss_item_sk"]) join (INNER, REPLICATED): join (INNER, REPLICATED): @@ -21,47 +21,49 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_sk_9"]) - join (INNER, PARTITIONED): - final aggregation over (brand_id, category_id, class_id) - local exchange (REPARTITION, HASH, ["brand_id", "category_id", "class_id"]) - remote exchange (REPARTITION, HASH, ["i_brand_id_64", "i_category_id_68", "i_class_id_66"]) - partial aggregation over (i_brand_id_64, i_category_id_68, i_class_id_66) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - remote exchange (REPARTITION, HASH, ["i_brand_id_114", "i_category_id_118", "i_class_id_116"]) - partial aggregation over (i_brand_id_114, i_category_id_118, i_class_id_116) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan catalog_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - remote exchange (REPARTITION, HASH, ["i_brand_id_164", "i_category_id_168", "i_class_id_166"]) - partial aggregation over (i_brand_id_164, i_category_id_168, i_class_id_166) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_brand_id_16", "i_category_id_20", "i_class_id_18"]) - scan item + final aggregation over (i_item_sk_9) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_sk_9"]) + partial aggregation over (i_item_sk_9) + join (INNER, PARTITIONED): + final aggregation over (brand_id, category_id, class_id) + local exchange (REPARTITION, HASH, ["brand_id", "category_id", "class_id"]) + remote exchange (REPARTITION, HASH, ["i_brand_id_64", "i_category_id_68", "i_class_id_66"]) + partial aggregation over (i_brand_id_64, i_category_id_68, i_class_id_66) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + remote exchange (REPARTITION, HASH, ["i_brand_id_114", "i_category_id_118", "i_class_id_116"]) + partial aggregation over (i_brand_id_114, i_category_id_118, i_class_id_116) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan catalog_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + remote exchange (REPARTITION, HASH, ["i_brand_id_164", "i_category_id_168", "i_class_id_166"]) + partial aggregation over (i_brand_id_164, i_category_id_168, i_class_id_166) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_brand_id_16", "i_category_id_20", "i_class_id_18"]) + scan item local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) final aggregation over () @@ -90,7 +92,7 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPARTITION, HASH, ["i_brand_id_429", "i_category_id_433", "i_class_id_431"]) partial aggregation over (i_brand_id_429, i_category_id_433, i_class_id_431) - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["cs_item_sk_403"]) join (INNER, REPLICATED): join (INNER, REPLICATED): @@ -101,47 +103,49 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_sk_473"]) - join (INNER, PARTITIONED): - final aggregation over (brand_id_495, category_id_497, class_id_496) - local exchange (REPARTITION, HASH, ["brand_id_495", "category_id_497", "class_id_496"]) - remote exchange (REPARTITION, HASH, ["i_brand_id_531", "i_category_id_535", "i_class_id_533"]) - partial aggregation over (i_brand_id_531, i_category_id_535, i_class_id_533) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - remote exchange (REPARTITION, HASH, ["i_brand_id_615", "i_category_id_619", "i_class_id_617"]) - partial aggregation over (i_brand_id_615, i_category_id_619, i_class_id_617) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan catalog_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - remote exchange (REPARTITION, HASH, ["i_brand_id_699", "i_category_id_703", "i_class_id_701"]) - partial aggregation over (i_brand_id_699, i_category_id_703, i_class_id_701) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_brand_id_480", "i_category_id_484", "i_class_id_482"]) - scan item + final aggregation over (i_item_sk_473) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_sk_473"]) + partial aggregation over (i_item_sk_473) + join (INNER, PARTITIONED): + final aggregation over (brand_id_495, category_id_497, class_id_496) + local exchange (REPARTITION, HASH, ["brand_id_495", "category_id_497", "class_id_496"]) + remote exchange (REPARTITION, HASH, ["i_brand_id_531", "i_category_id_535", "i_class_id_533"]) + partial aggregation over (i_brand_id_531, i_category_id_535, i_class_id_533) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + remote exchange (REPARTITION, HASH, ["i_brand_id_615", "i_category_id_619", "i_class_id_617"]) + partial aggregation over (i_brand_id_615, i_category_id_619, i_class_id_617) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan catalog_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + remote exchange (REPARTITION, HASH, ["i_brand_id_699", "i_category_id_703", "i_class_id_701"]) + partial aggregation over (i_brand_id_699, i_category_id_703, i_class_id_701) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_brand_id_480", "i_category_id_484", "i_class_id_482"]) + scan item local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) final aggregation over () @@ -170,7 +174,7 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPARTITION, HASH, ["i_brand_id_971", "i_category_id_975", "i_class_id_973"]) partial aggregation over (i_brand_id_971, i_category_id_975, i_class_id_973) - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["ws_item_sk_933"]) join (INNER, REPLICATED): join (INNER, REPLICATED): @@ -181,47 +185,49 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_sk_1015"]) - join (INNER, PARTITIONED): - final aggregation over (brand_id_1037, category_id_1039, class_id_1038) - local exchange (REPARTITION, HASH, ["brand_id_1037", "category_id_1039", "class_id_1038"]) - remote exchange (REPARTITION, HASH, ["i_brand_id_1073", "i_category_id_1077", "i_class_id_1075"]) - partial aggregation over (i_brand_id_1073, i_category_id_1077, i_class_id_1075) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - remote exchange (REPARTITION, HASH, ["i_brand_id_1157", "i_category_id_1161", "i_class_id_1159"]) - partial aggregation over (i_brand_id_1157, i_category_id_1161, i_class_id_1159) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan catalog_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - remote exchange (REPARTITION, HASH, ["i_brand_id_1241", "i_category_id_1245", "i_class_id_1243"]) - partial aggregation over (i_brand_id_1241, i_category_id_1245, i_class_id_1243) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_brand_id_1022", "i_category_id_1026", "i_class_id_1024"]) - scan item + final aggregation over (i_item_sk_1015) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_sk_1015"]) + partial aggregation over (i_item_sk_1015) + join (INNER, PARTITIONED): + final aggregation over (brand_id_1037, category_id_1039, class_id_1038) + local exchange (REPARTITION, HASH, ["brand_id_1037", "category_id_1039", "class_id_1038"]) + remote exchange (REPARTITION, HASH, ["i_brand_id_1073", "i_category_id_1077", "i_class_id_1075"]) + partial aggregation over (i_brand_id_1073, i_category_id_1077, i_class_id_1075) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + remote exchange (REPARTITION, HASH, ["i_brand_id_1157", "i_category_id_1161", "i_class_id_1159"]) + partial aggregation over (i_brand_id_1157, i_category_id_1161, i_class_id_1159) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan catalog_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + remote exchange (REPARTITION, HASH, ["i_brand_id_1241", "i_category_id_1245", "i_class_id_1243"]) + partial aggregation over (i_brand_id_1241, i_category_id_1245, i_class_id_1243) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_brand_id_1022", "i_category_id_1026", "i_class_id_1024"]) + scan item local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) final aggregation over () diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q23.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q23.plan.txt index 6125f4d7feda..c5c777db0b35 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q23.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q23.plan.txt @@ -2,114 +2,116 @@ final aggregation over () local exchange (GATHER, SINGLE, []) remote exchange (GATHER, SINGLE, []) partial aggregation over () - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["cs_bill_customer_sk"]) - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["cs_item_sk"]) join (INNER, REPLICATED): scan catalog_sales local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["ss_item_sk"]) - final aggregation over (d_date_2, ss_item_sk, substr) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["d_date_2", "ss_item_sk", "substr"]) - partial aggregation over (d_date_2, ss_item_sk, substr) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["ss_customer_sk_32"]) - cross join: - final aggregation over (ss_customer_sk_32) - local exchange (GATHER, SINGLE, []) - partial aggregation over (ss_customer_sk_32) - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, ["ss_customer_sk_32"]) - scan store_sales + final aggregation over (ss_item_sk) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["ss_item_sk"]) + partial aggregation over (ss_item_sk) + final aggregation over (d_date_2, ss_item_sk, substr) local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["c_customer_sk"]) - scan customer + remote exchange (REPARTITION, HASH, ["d_date_2", "ss_item_sk", "substr"]) + partial aggregation over (d_date_2, ss_item_sk, substr) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + single aggregation over (ss_customer_sk_32) + cross join: + final aggregation over (ss_customer_sk_32) local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - final aggregation over () + partial aggregation over (ss_customer_sk_32) + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["ss_customer_sk_32"]) + scan store_sales local exchange (GATHER, SINGLE, []) - remote exchange (GATHER, SINGLE, []) - partial aggregation over () - final aggregation over (ss_customer_sk_56) - local exchange (GATHER, SINGLE, []) - partial aggregation over (ss_customer_sk_56) - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, ["ss_customer_sk_56"]) - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["c_customer_sk_76"]) - scan customer + remote exchange (REPARTITION, HASH, ["c_customer_sk"]) + scan customer + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over () + local exchange (GATHER, SINGLE, []) + remote exchange (GATHER, SINGLE, []) + partial aggregation over () + final aggregation over (ss_customer_sk_56) + local exchange (GATHER, SINGLE, []) + partial aggregation over (ss_customer_sk_56) + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["ss_customer_sk_56"]) + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["c_customer_sk_76"]) + scan customer partial aggregation over () - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["ws_bill_customer_sk"]) - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["ws_item_sk"]) join (INNER, REPLICATED): scan web_sales local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["ss_item_sk_157"]) - final aggregation over (d_date_180, ss_item_sk_157, substr_228) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["d_date_180", "ss_item_sk_157", "substr_228"]) - partial aggregation over (d_date_180, ss_item_sk_157, substr_228) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["ss_customer_sk_237"]) - cross join: - final aggregation over (ss_customer_sk_237) - local exchange (GATHER, SINGLE, []) - partial aggregation over (ss_customer_sk_237) - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, ["ss_customer_sk_237"]) - scan store_sales + final aggregation over (ss_item_sk_157) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["ss_item_sk_157"]) + partial aggregation over (ss_item_sk_157) + final aggregation over (d_date_180, ss_item_sk_157, substr_228) local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["c_customer_sk_257"]) - scan customer + remote exchange (REPARTITION, HASH, ["d_date_180", "ss_item_sk_157", "substr_228"]) + partial aggregation over (d_date_180, ss_item_sk_157, substr_228) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan item + single aggregation over (ss_customer_sk_237) + cross join: + final aggregation over (ss_customer_sk_237) local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - final aggregation over () + partial aggregation over (ss_customer_sk_237) + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["ss_customer_sk_237"]) + scan store_sales local exchange (GATHER, SINGLE, []) - remote exchange (GATHER, SINGLE, []) - partial aggregation over () - final aggregation over (ss_customer_sk_281) - local exchange (GATHER, SINGLE, []) - partial aggregation over (ss_customer_sk_281) - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, ["ss_customer_sk_281"]) - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["c_customer_sk_301"]) - scan customer + remote exchange (REPARTITION, HASH, ["c_customer_sk_257"]) + scan customer + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over () + local exchange (GATHER, SINGLE, []) + remote exchange (GATHER, SINGLE, []) + partial aggregation over () + final aggregation over (ss_customer_sk_281) + local exchange (GATHER, SINGLE, []) + partial aggregation over (ss_customer_sk_281) + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["ss_customer_sk_281"]) + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["c_customer_sk_301"]) + scan customer diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q33.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q33.plan.txt index 76ed5ad2d6f6..d54086161cdb 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q33.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q33.plan.txt @@ -2,69 +2,72 @@ local exchange (GATHER, SINGLE, []) remote exchange (GATHER, SINGLE, []) final aggregation over (i_manufact_id) local exchange (REPARTITION, HASH, ["i_manufact_id"]) - partial aggregation over (i_manufact_id_2) - final aggregation over (i_manufact_id_2) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_manufact_id_2) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_manufact_id_2"]) - join (INNER, REPLICATED): + partial aggregation over (i_manufact_id_16) + single aggregation over (i_manufact_id_16) + join (INNER, PARTITIONED): + final aggregation over (i_manufact_id_16) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_manufact_id_16"]) + partial aggregation over (i_manufact_id_16) scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_manufact_id_16"]) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_manufact_id_2"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address + partial aggregation over (i_manufact_id_102) + single aggregation over (i_manufact_id_102) + join (INNER, PARTITIONED): + final aggregation over (i_manufact_id_102) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_manufact_id_102"]) + partial aggregation over (i_manufact_id_102) scan item - partial aggregation over (i_manufact_id_79) - final aggregation over (i_manufact_id_79) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_manufact_id_79) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_manufact_id_79"]) - join (INNER, REPLICATED): - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan catalog_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_manufact_id_102"]) - scan item - partial aggregation over (i_manufact_id_167) - final aggregation over (i_manufact_id_167) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_manufact_id_167) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_manufact_id_167"]) - join (INNER, REPLICATED): - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_manufact_id_190"]) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_manufact_id_79"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan catalog_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address + partial aggregation over (i_manufact_id_190) + single aggregation over (i_manufact_id_190) + join (INNER, PARTITIONED): + final aggregation over (i_manufact_id_190) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_manufact_id_190"]) + partial aggregation over (i_manufact_id_190) scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_manufact_id_167"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q56.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q56.plan.txt index 08272501cae0..8a300a86fcbe 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q56.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q56.plan.txt @@ -2,69 +2,72 @@ local exchange (GATHER, SINGLE, []) remote exchange (GATHER, SINGLE, []) final aggregation over (i_item_id) local exchange (REPARTITION, HASH, ["i_item_id"]) - partial aggregation over (i_item_id_2) - final aggregation over (i_item_id_2) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_item_id_2) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_item_id_2"]) - join (INNER, REPLICATED): + partial aggregation over (i_item_id_4) + single aggregation over (i_item_id_4) + join (INNER, PARTITIONED): + final aggregation over (i_item_id_4) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_4"]) + partial aggregation over (i_item_id_4) scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id_4"]) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_2"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address + partial aggregation over (i_item_id_90) + single aggregation over (i_item_id_90) + join (INNER, PARTITIONED): + final aggregation over (i_item_id_90) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_90"]) + partial aggregation over (i_item_id_90) scan item - partial aggregation over (i_item_id_67) - final aggregation over (i_item_id_67) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_item_id_67) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_item_id_67"]) - join (INNER, REPLICATED): - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan catalog_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id_90"]) - scan item - partial aggregation over (i_item_id_155) - final aggregation over (i_item_id_155) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_item_id_155) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_item_id_155"]) - join (INNER, REPLICATED): - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id_178"]) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_67"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan catalog_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address + partial aggregation over (i_item_id_178) + single aggregation over (i_item_id_178) + join (INNER, PARTITIONED): + final aggregation over (i_item_id_178) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_178"]) + partial aggregation over (i_item_id_178) scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_155"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q58.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q58.plan.txt index 9802babef6e3..8ea8376f8796 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q58.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q58.plan.txt @@ -2,72 +2,84 @@ local exchange (GATHER, SINGLE, []) remote exchange (GATHER, SINGLE, []) join (INNER, PARTITIONED): join (INNER, PARTITIONED): - final aggregation over (i_item_id) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id"]) - partial aggregation over (i_item_id) - semijoin (REPLICATED): - join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - local exchange (GATHER, SINGLE, []) - remote exchange (GATHER, SINGLE, []) - scan date_dim final aggregation over (i_item_id_57) local exchange (GATHER, SINGLE, []) remote exchange (REPARTITION, HASH, ["i_item_id_57"]) partial aggregation over (i_item_id_57) - semijoin (REPLICATED): + join (INNER, REPLICATED): join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan catalog_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim + scan catalog_sales local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - scan item + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_date_109) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_date_109"]) + partial aggregation over (d_date_109) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + local exchange (GATHER, SINGLE, []) + remote exchange (GATHER, SINGLE, []) + scan date_dim local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - local exchange (GATHER, SINGLE, []) - remote exchange (GATHER, SINGLE, []) - scan date_dim - final aggregation over (i_item_id_166) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id_166"]) - partial aggregation over (i_item_id_166) - semijoin (REPLICATED): + scan item + final aggregation over (i_item_id_166) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_166"]) + partial aggregation over (i_item_id_166) join (INNER, REPLICATED): join (INNER, REPLICATED): scan web_sales local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - scan date_dim + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_date_218) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_date_218"]) + partial aggregation over (d_date_218) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + local exchange (GATHER, SINGLE, []) + remote exchange (GATHER, SINGLE, []) + scan date_dim local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan item + final aggregation over (i_item_id) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id"]) + partial aggregation over (i_item_id) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_date_2) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_date_2"]) + partial aggregation over (d_date_2) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + local exchange (GATHER, SINGLE, []) + remote exchange (GATHER, SINGLE, []) + scan date_dim local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - local exchange (GATHER, SINGLE, []) - remote exchange (GATHER, SINGLE, []) - scan date_dim + scan item diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q60.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q60.plan.txt index 08272501cae0..8a300a86fcbe 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q60.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q60.plan.txt @@ -2,69 +2,72 @@ local exchange (GATHER, SINGLE, []) remote exchange (GATHER, SINGLE, []) final aggregation over (i_item_id) local exchange (REPARTITION, HASH, ["i_item_id"]) - partial aggregation over (i_item_id_2) - final aggregation over (i_item_id_2) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_item_id_2) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_item_id_2"]) - join (INNER, REPLICATED): + partial aggregation over (i_item_id_4) + single aggregation over (i_item_id_4) + join (INNER, PARTITIONED): + final aggregation over (i_item_id_4) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_4"]) + partial aggregation over (i_item_id_4) scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id_4"]) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_2"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address + partial aggregation over (i_item_id_90) + single aggregation over (i_item_id_90) + join (INNER, PARTITIONED): + final aggregation over (i_item_id_90) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_90"]) + partial aggregation over (i_item_id_90) scan item - partial aggregation over (i_item_id_67) - final aggregation over (i_item_id_67) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_item_id_67) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_item_id_67"]) - join (INNER, REPLICATED): - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan catalog_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id_90"]) - scan item - partial aggregation over (i_item_id_155) - final aggregation over (i_item_id_155) - local exchange (GATHER, SINGLE, []) - partial aggregation over (i_item_id_155) - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["i_item_id_155"]) - join (INNER, REPLICATED): - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - join (INNER, REPLICATED): - scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id_178"]) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_67"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan catalog_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address + partial aggregation over (i_item_id_178) + single aggregation over (i_item_id_178) + join (INNER, PARTITIONED): + final aggregation over (i_item_id_178) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_178"]) + partial aggregation over (i_item_id_178) scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_id_155"]) + join (INNER, REPLICATED): + scan item + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): + scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q70.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q70.plan.txt index 73e9cc60a930..1af234023117 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q70.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q70.plan.txt @@ -6,7 +6,7 @@ local exchange (GATHER, SINGLE, []) local exchange (REPARTITION, HASH, ["groupid", "s_county$gid", "s_state$gid_80"]) remote exchange (REPARTITION, HASH, ["groupid", "s_county$gid", "s_state$gid_80"]) partial aggregation over (groupid, s_county$gid, s_state$gid_80) - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["s_state"]) join (INNER, REPLICATED): join (INNER, REPLICATED): @@ -17,18 +17,17 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan store - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["s_state_47"]) - final aggregation over (s_state_47) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["s_state_47"]) - partial aggregation over (s_state_47) + single aggregation over (s_state_47) + final aggregation over (s_state_47) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["s_state_47"]) + partial aggregation over (s_state_47) + join (INNER, REPLICATED): join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim + scan store_sales local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - scan store + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan store diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q83.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q83.plan.txt index 1c02c9a3b73c..771cf408ff02 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q83.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q83.plan.txt @@ -1,67 +1,94 @@ local exchange (GATHER, SINGLE, []) remote exchange (GATHER, SINGLE, []) join (INNER, PARTITIONED): - final aggregation over (i_item_id_59) + final aggregation over (i_item_id) local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id_59"]) - partial aggregation over (i_item_id_59) - semijoin (REPLICATED): - join (INNER, REPLICATED): + remote exchange (REPARTITION, HASH, ["i_item_id"]) + partial aggregation over (i_item_id) + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["sr_item_sk"]) join (INNER, REPLICATED): - scan catalog_returns + scan store_returns local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - semijoin (REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_date_2) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_date_2"]) + partial aggregation over (d_date_2) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_week_seq_33) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_week_seq_33"]) + partial aggregation over (d_week_seq_33) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_sk"]) + scan item join (INNER, PARTITIONED): - final aggregation over (i_item_id) + final aggregation over (i_item_id_59) local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["i_item_id"]) - partial aggregation over (i_item_id) - semijoin (REPLICATED): - join (INNER, REPLICATED): + remote exchange (REPARTITION, HASH, ["i_item_id_59"]) + partial aggregation over (i_item_id_59) + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["cr_item_sk"]) join (INNER, REPLICATED): - scan store_returns + scan catalog_returns local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - semijoin (REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_date_111) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_date_111"]) + partial aggregation over (d_date_111) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_week_seq_142) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_week_seq_142"]) + partial aggregation over (d_week_seq_142) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_sk_58"]) + scan item final aggregation over (i_item_id_170) local exchange (GATHER, SINGLE, []) remote exchange (REPARTITION, HASH, ["i_item_id_170"]) partial aggregation over (i_item_id_170) - semijoin (REPLICATED): - join (INNER, REPLICATED): + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["wr_item_sk"]) join (INNER, REPLICATED): scan web_returns local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan item - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - semijoin (REPLICATED): - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) + join (INNER, REPLICATED): scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_date_222) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_date_222"]) + partial aggregation over (d_date_222) + join (INNER, REPLICATED): + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + final aggregation over (d_week_seq_253) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["d_week_seq_253"]) + partial aggregation over (d_week_seq_253) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["i_item_sk_169"]) + scan item diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q95.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q95.plan.txt index 7fa7f44a4892..b7a83d7a207c 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q95.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q95.plan.txt @@ -2,25 +2,24 @@ final aggregation over () local exchange (GATHER, SINGLE, []) remote exchange (GATHER, SINGLE, []) partial aggregation over () - local exchange (GATHER, SINGLE, []) - semijoin (PARTITIONED): - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["ws_order_number"]) - join (INNER, REPLICATED): - join (INNER, REPLICATED): - join (INNER, REPLICATED): + join (INNER, PARTITIONED): + final aggregation over (ws_order_number_86) + local exchange (GATHER, SINGLE, []) + partial aggregation over (ws_order_number_86) + join (INNER, PARTITIONED): + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["ws_order_number_86"]) scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim + remote exchange (REPARTITION, HASH, ["wr_order_number"]) + scan web_returns local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan web_site + remote exchange (REPARTITION, HASH, ["ws_order_number_120"]) + scan web_sales + join (INNER, PARTITIONED): + final aggregation over (ws_order_number_17) local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["ws_order_number_17"]) + partial aggregation over (ws_order_number_17) join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["ws_order_number_17"]) scan web_sales @@ -28,14 +27,17 @@ final aggregation over () remote exchange (REPARTITION, HASH, ["ws_order_number_51"]) scan web_sales local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["ws_order_number_86"]) - join (INNER, PARTITIONED): - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, ["ws_order_number_86"]) + remote exchange (REPARTITION, HASH, ["ws_order_number"]) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + join (INNER, REPLICATED): scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["wr_order_number"]) - scan web_returns + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["ws_order_number_120"]) - scan web_sales + remote exchange (REPLICATE, BROADCAST, []) + scan web_site diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpch/q18.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpch/q18.plan.txt index d505c39571e8..c74a7f3fdf5e 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpch/q18.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpch/q18.plan.txt @@ -3,7 +3,7 @@ local exchange (GATHER, SINGLE, []) final aggregation over (custkey_0, name, orderdate, orderkey_3, totalprice) local exchange (GATHER, SINGLE, []) partial aggregation over (custkey_0, name, orderdate, orderkey_3, totalprice) - semijoin (PARTITIONED): + join (INNER, PARTITIONED): join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["orderkey_3"]) scan lineitem @@ -15,10 +15,9 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPARTITION, HASH, ["custkey"]) scan customer - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["orderkey_6"]) - final aggregation over (orderkey_6) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["orderkey_6"]) - partial aggregation over (orderkey_6) - scan lineitem + single aggregation over (orderkey_6) + final aggregation over (orderkey_6) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["orderkey_6"]) + partial aggregation over (orderkey_6) + scan lineitem diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpch/q20.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpch/q20.plan.txt index 3af2f584f171..9206e64e487c 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpch/q20.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpch/q20.plan.txt @@ -1,31 +1,35 @@ remote exchange (GATHER, SINGLE, []) local exchange (GATHER, UNKNOWN, []) remote exchange (REPARTITION, ROUND_ROBIN, []) - semijoin (PARTITIONED): + join (INNER, PARTITIONED): remote exchange (REPARTITION, HASH, ["suppkey"]) join (INNER, REPLICATED): scan supplier local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan nation - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["suppkey_4"]) - cross join: - join (LEFT, PARTITIONED): - semijoin (PARTITIONED): - remote exchange (REPARTITION, HASH, ["partkey"]) - scan partsupp + final aggregation over (suppkey_4) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["suppkey_4"]) + partial aggregation over (suppkey_4) + cross join: + join (LEFT, PARTITIONED): + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, ["partkey"]) + scan partsupp + final aggregation over (partkey_8) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["partkey_8"]) + partial aggregation over (partkey_8) + scan part + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["partkey_12"]) + final aggregation over (partkey_12, suppkey_13) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, ["partkey_12", "suppkey_13"]) + partial aggregation over (partkey_12, suppkey_13) + scan lineitem local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["partkey_8"]) - scan part - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["partkey_12"]) - final aggregation over (partkey_12, suppkey_13) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, ["partkey_12", "suppkey_13"]) - partial aggregation over (partkey_12, suppkey_13) - scan lineitem - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - single aggregation over () - values (1 rows) + remote exchange (REPLICATE, BROADCAST, []) + single aggregation over () + values (1 rows) diff --git a/presto-main/src/main/java/io/prestosql/SystemSessionProperties.java b/presto-main/src/main/java/io/prestosql/SystemSessionProperties.java index 90a5d6611b27..8f2ac759e29a 100644 --- a/presto-main/src/main/java/io/prestosql/SystemSessionProperties.java +++ b/presto-main/src/main/java/io/prestosql/SystemSessionProperties.java @@ -122,6 +122,7 @@ public final class SystemSessionProperties public static final String QUERY_MAX_TOTAL_MEMORY_PER_NODE = "query_max_total_memory_per_node"; public static final String IGNORE_DOWNSTREAM_PREFERENCES = "ignore_downstream_preferences"; public static final String ITERATIVE_COLUMN_PRUNING = "iterative_rule_based_column_pruning"; + public static final String FILTERING_SEMI_JOIN_TO_INNER = "rewrite-filtering-semi-join-to-inner-join"; public static final String REQUIRED_WORKERS_COUNT = "required_workers_count"; public static final String REQUIRED_WORKERS_MAX_WAIT_TIME = "required_workers_max_wait_time"; public static final String COST_ESTIMATION_WORKER_COUNT = "cost_estimation_worker_count"; @@ -542,6 +543,11 @@ public SystemSessionProperties( "Use iterative rules to prune unreferenced columns", featuresConfig.isIterativeRuleBasedColumnPruning(), false), + booleanProperty( + FILTERING_SEMI_JOIN_TO_INNER, + "Rewrite semi join in filtering context to inner join", + featuresConfig.isRewriteFilteringSemiJoinToInnerJoin(), + false), integerProperty( REQUIRED_WORKERS_COUNT, "Minimum number of active workers that must be available before the query will start", @@ -989,6 +995,11 @@ public static boolean isIterativeRuleBasedColumnPruning(Session session) return session.getSystemProperty(ITERATIVE_COLUMN_PRUNING, Boolean.class); } + public static boolean isRewriteFilteringSemiJoinToInnerJoin(Session session) + { + return session.getSystemProperty(FILTERING_SEMI_JOIN_TO_INNER, Boolean.class); + } + public static int getRequiredWorkers(Session session) { return session.getSystemProperty(REQUIRED_WORKERS_COUNT, Integer.class); diff --git a/presto-main/src/main/java/io/prestosql/sql/analyzer/Analysis.java b/presto-main/src/main/java/io/prestosql/sql/analyzer/Analysis.java index a208794d23f2..d2823f6e6227 100644 --- a/presto-main/src/main/java/io/prestosql/sql/analyzer/Analysis.java +++ b/presto-main/src/main/java/io/prestosql/sql/analyzer/Analysis.java @@ -219,6 +219,11 @@ public void resetUpdateType() this.target = Optional.empty(); } + public boolean isDeleteTarget(QualifiedObjectName name) + { + return "DELETE".equals(updateType) && Optional.of(name).equals(target); + } + public boolean isSkipMaterializedViewRefresh() { return skipMaterializedViewRefresh; diff --git a/presto-main/src/main/java/io/prestosql/sql/analyzer/FeaturesConfig.java b/presto-main/src/main/java/io/prestosql/sql/analyzer/FeaturesConfig.java index bdb2a6655c3c..cf8fbef22f3e 100644 --- a/presto-main/src/main/java/io/prestosql/sql/analyzer/FeaturesConfig.java +++ b/presto-main/src/main/java/io/prestosql/sql/analyzer/FeaturesConfig.java @@ -126,6 +126,7 @@ public class FeaturesConfig private boolean predicatePushdownUseTableProperties = true; private boolean ignoreDownstreamPreferences; private boolean iterativeRuleBasedColumnPruning = true; + private boolean rewriteFilteringSemiJoinToInnerJoin = true; private Duration iterativeOptimizerTimeout = new Duration(3, MINUTES); // by default let optimizer wait a long time in case it retrieves some data from ConnectorMetadata private DataSize filterAndProjectMinOutputPageSize = DataSize.of(500, KILOBYTE); @@ -963,4 +964,16 @@ public FeaturesConfig setIterativeRuleBasedColumnPruning(boolean iterativeRuleBa this.iterativeRuleBasedColumnPruning = iterativeRuleBasedColumnPruning; return this; } + + public boolean isRewriteFilteringSemiJoinToInnerJoin() + { + return rewriteFilteringSemiJoinToInnerJoin; + } + + @Config("optimizer.rewrite-filtering-semi-join-to-inner-join") + public FeaturesConfig setRewriteFilteringSemiJoinToInnerJoin(boolean rewriteFilteringSemiJoinToInnerJoin) + { + this.rewriteFilteringSemiJoinToInnerJoin = rewriteFilteringSemiJoinToInnerJoin; + return this; + } } diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/LogicalPlanner.java b/presto-main/src/main/java/io/prestosql/sql/planner/LogicalPlanner.java index 5d2b5d7e60a5..ee61ee61fe09 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/LogicalPlanner.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/LogicalPlanner.java @@ -314,7 +314,7 @@ private RelationPlan createAnalyzePlan(Analysis analysis, Analyze analyzeStateme idAllocator.getNextId(), new AggregationNode( idAllocator.getNextId(), - TableScanNode.newInstance(idAllocator.getNextId(), targetTable, tableScanOutputs.build(), symbolToColumnHandle.build()), + TableScanNode.newInstance(idAllocator.getNextId(), targetTable, tableScanOutputs.build(), symbolToColumnHandle.build(), false), statisticAggregations.getAggregations(), singleGroupingSet(groupingSymbols), ImmutableList.of(), diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/PlanCopier.java b/presto-main/src/main/java/io/prestosql/sql/planner/PlanCopier.java index ac7dd2fa1419..a92ec683cf4a 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/PlanCopier.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/PlanCopier.java @@ -120,7 +120,7 @@ public PlanNode visitSample(SampleNode node, RewriteContext context) @Override public PlanNode visitTableScan(TableScanNode node, RewriteContext context) { - return new TableScanNode(idAllocator.getNextId(), node.getTable(), node.getOutputSymbols(), node.getAssignments(), node.getEnforcedConstraint()); + return new TableScanNode(idAllocator.getNextId(), node.getTable(), node.getOutputSymbols(), node.getAssignments(), node.getEnforcedConstraint(), node.isForDelete()); } @Override diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/PlanFragmenter.java b/presto-main/src/main/java/io/prestosql/sql/planner/PlanFragmenter.java index 09c7d6ad5f52..762982643a0b 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/PlanFragmenter.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/PlanFragmenter.java @@ -786,7 +786,8 @@ public PlanNode visitTableScan(TableScanNode node, RewriteContext context) newTable, node.getOutputSymbols(), node.getAssignments(), - node.getEnforcedConstraint()); + node.getEnforcedConstraint(), + node.isForDelete()); } } } diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/PlanOptimizers.java b/presto-main/src/main/java/io/prestosql/sql/planner/PlanOptimizers.java index 163fca81953b..825ce7f56009 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/PlanOptimizers.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/PlanOptimizers.java @@ -179,6 +179,7 @@ import io.prestosql.sql.planner.iterative.rule.TransformCorrelatedScalarSubquery; import io.prestosql.sql.planner.iterative.rule.TransformCorrelatedSingleRowSubqueryToProject; import io.prestosql.sql.planner.iterative.rule.TransformExistsApplyToCorrelatedJoin; +import io.prestosql.sql.planner.iterative.rule.TransformFilteringSemiJoinToInnerJoin; import io.prestosql.sql.planner.iterative.rule.TransformUncorrelatedInPredicateSubqueryToSemiJoin; import io.prestosql.sql.planner.iterative.rule.TransformUncorrelatedSubqueryToJoin; import io.prestosql.sql.planner.iterative.rule.UnwrapCastInComparison; @@ -524,7 +525,12 @@ public PlanOptimizers( new CheckSubqueryNodesAreRewritten(), new StatsRecordingPlanOptimizer( optimizerStats, - new PredicatePushDown(metadata, typeOperators, typeAnalyzer, false, false))); + new PredicatePushDown(metadata, typeOperators, typeAnalyzer, false, false)), + new IterativeOptimizer( + ruleStats, + statsCalculator, + estimatedExchangesCostCalculator, + ImmutableSet.of(new TransformFilteringSemiJoinToInnerJoin()))); // must run after PredicatePushDown IterativeOptimizer pushIntoTableScanOptimizer = new IterativeOptimizer( ruleStats, diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/RelationPlanner.java b/presto-main/src/main/java/io/prestosql/sql/planner/RelationPlanner.java index 895de718dac3..11789116772a 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/RelationPlanner.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/RelationPlanner.java @@ -87,6 +87,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.Iterables.getOnlyElement; +import static io.prestosql.metadata.MetadataUtil.createQualifiedObjectName; import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED; import static io.prestosql.sql.analyzer.SemanticExceptions.semanticException; import static io.prestosql.sql.analyzer.TypeSignatureTranslator.toSqlType; @@ -201,7 +202,8 @@ protected RelationPlan visitTable(Table node, Void context) } List outputSymbols = outputSymbolsBuilder.build(); - PlanNode root = TableScanNode.newInstance(idAllocator.getNextId(), handle, outputSymbols, columns.build()); + boolean isDeleteTarget = analysis.isDeleteTarget(createQualifiedObjectName(session, node, node.getName())); + PlanNode root = TableScanNode.newInstance(idAllocator.getNextId(), handle, outputSymbols, columns.build(), isDeleteTarget); plan = new RelationPlan(root, scope, outputSymbols, outerContext); } diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PruneTableScanColumns.java b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PruneTableScanColumns.java index 4ddfdf9fcbe8..bf8cd3a6b3c9 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PruneTableScanColumns.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PruneTableScanColumns.java @@ -109,6 +109,7 @@ public static Optional pruneColumns(Metadata metadata, TypeProvider ty handle, newOutputs, newAssignments, - node.getEnforcedConstraint())); + node.getEnforcedConstraint(), + node.isForDelete())); } } diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushAggregationIntoTableScan.java b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushAggregationIntoTableScan.java index 3f677647f595..7e020b335960 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushAggregationIntoTableScan.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushAggregationIntoTableScan.java @@ -192,7 +192,8 @@ public Result apply(AggregationNode node, Captures captures, Context context) context.getIdAllocator().getNextId(), result.getHandle(), newScanOutputs.build(), - scanAssignments), + scanAssignments, + tableScan.isForDelete()), assignmentBuilder.build())); } diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushLimitIntoTableScan.java b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushLimitIntoTableScan.java index 80d41c8f853d..e3d77075649d 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushLimitIntoTableScan.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushLimitIntoTableScan.java @@ -69,7 +69,8 @@ public Rule.Result apply(LimitNode limit, Captures captures, Rule.Context contex result.getHandle(), tableScan.getOutputSymbols(), tableScan.getAssignments(), - tableScan.getEnforcedConstraint()); + tableScan.getEnforcedConstraint(), + tableScan.isForDelete()); if (!result.isLimitGuaranteed()) { node = new LimitNode(limit.getId(), node, limit.getCount(), limit.isPartial()); diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushPredicateIntoTableScan.java b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushPredicateIntoTableScan.java index e589492adc34..0c656dadb50f 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushPredicateIntoTableScan.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushPredicateIntoTableScan.java @@ -259,7 +259,8 @@ public static Optional pushFilterIntoTableScan( newTable, node.getOutputSymbols(), node.getAssignments(), - computeEnforced(newDomain, remainingFilter)); + computeEnforced(newDomain, remainingFilter), + node.isForDelete()); Expression resultingPredicate = createResultingPredicate( metadata, diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushProjectionIntoTableScan.java b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushProjectionIntoTableScan.java index 8fa76282a0fb..2e2a7c4754b1 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushProjectionIntoTableScan.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushProjectionIntoTableScan.java @@ -161,7 +161,8 @@ public Result apply(ProjectNode project, Captures captures, Context context) tableScan.getId(), result.get().getHandle(), newScanOutputs, - newScanAssignments), + newScanAssignments, + tableScan.isForDelete()), newProjectionAssignments.build())); } } diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushSampleIntoTableScan.java b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushSampleIntoTableScan.java index f728a1fab4ff..6d64add667ad 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushSampleIntoTableScan.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushSampleIntoTableScan.java @@ -69,7 +69,8 @@ public Rule.Result apply(SampleNode sample, Captures captures, Rule.Context cont result, tableScan.getOutputSymbols(), tableScan.getAssignments(), - tableScan.getEnforcedConstraint()))) + tableScan.getEnforcedConstraint(), + tableScan.isForDelete()))) .orElseGet(Result::empty); } diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushTopNIntoTableScan.java b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushTopNIntoTableScan.java index 7901f8b4dab0..8da144f83abe 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushTopNIntoTableScan.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/PushTopNIntoTableScan.java @@ -72,7 +72,8 @@ public Result apply(TopNNode topNNode, Captures captures, Context context) context.getIdAllocator().getNextId(), result.getHandle(), tableScan.getOutputSymbols(), - tableScan.getAssignments()); + tableScan.getAssignments(), + tableScan.isForDelete()); if (!result.isTopNGuaranteed()) { node = new TopNNode(topNNode.getId(), node, topNNode.getCount(), topNNode.getOrderingScheme(), TopNNode.Step.FINAL); diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/TransformFilteringSemiJoinToInnerJoin.java b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/TransformFilteringSemiJoinToInnerJoin.java new file mode 100644 index 000000000000..d89a427d5da5 --- /dev/null +++ b/presto-main/src/main/java/io/prestosql/sql/planner/iterative/rule/TransformFilteringSemiJoinToInnerJoin.java @@ -0,0 +1,165 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.sql.planner.iterative.rule; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.prestosql.Session; +import io.prestosql.matching.Capture; +import io.prestosql.matching.Captures; +import io.prestosql.matching.Pattern; +import io.prestosql.sql.planner.Symbol; +import io.prestosql.sql.planner.iterative.Rule; +import io.prestosql.sql.planner.optimizations.PlanNodeSearcher; +import io.prestosql.sql.planner.plan.AggregationNode; +import io.prestosql.sql.planner.plan.Assignments; +import io.prestosql.sql.planner.plan.FilterNode; +import io.prestosql.sql.planner.plan.JoinNode; +import io.prestosql.sql.planner.plan.JoinNode.EquiJoinClause; +import io.prestosql.sql.planner.plan.PlanNode; +import io.prestosql.sql.planner.plan.ProjectNode; +import io.prestosql.sql.planner.plan.SemiJoinNode; +import io.prestosql.sql.planner.plan.TableScanNode; +import io.prestosql.sql.tree.Expression; + +import java.util.List; +import java.util.Optional; +import java.util.function.Predicate; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.prestosql.SystemSessionProperties.isRewriteFilteringSemiJoinToInnerJoin; +import static io.prestosql.matching.Capture.newCapture; +import static io.prestosql.sql.ExpressionUtils.and; +import static io.prestosql.sql.ExpressionUtils.extractConjuncts; +import static io.prestosql.sql.planner.ExpressionSymbolInliner.inlineSymbols; +import static io.prestosql.sql.planner.plan.AggregationNode.Step.SINGLE; +import static io.prestosql.sql.planner.plan.AggregationNode.singleGroupingSet; +import static io.prestosql.sql.planner.plan.JoinNode.Type.INNER; +import static io.prestosql.sql.planner.plan.Patterns.filter; +import static io.prestosql.sql.planner.plan.Patterns.semiJoin; +import static io.prestosql.sql.planner.plan.Patterns.source; +import static io.prestosql.sql.tree.BooleanLiteral.TRUE_LITERAL; +import static java.util.function.Predicate.not; + +/** + * Rewrite filtering semi-join to inner join. + *

+ * Transforms: + *

+ * - Filter (semiJoinSymbol AND predicate)
+ *    - SemiJoin (semiJoinSymbol <- (a IN b))
+ *        source: plan A producing symbol a
+ *        filtering source: plan B producing symbol b
+ * 
+ *

+ * Into: + *

+ * - Project (semiJoinSymbol <- TRUE)
+ *    - Join INNER on (a = b), joinFilter (predicate with semiJoinSymbol replaced with TRUE)
+ *       - source
+ *       - Aggregation distinct(b)
+ *          - filtering source
+ * 
+ */ +public class TransformFilteringSemiJoinToInnerJoin + implements Rule +{ + private static final Capture SEMI_JOIN = newCapture(); + + private static final Pattern PATTERN = filter() + .with(source().matching(semiJoin().capturedAs(SEMI_JOIN))); + + @Override + public Pattern getPattern() + { + return PATTERN; + } + + @Override + public boolean isEnabled(Session session) + { + return isRewriteFilteringSemiJoinToInnerJoin(session); + } + + @Override + public Result apply(FilterNode filterNode, Captures captures, Context context) + { + SemiJoinNode semiJoin = captures.get(SEMI_JOIN); + + // Do not transform semi-join in context of DELETE + if (PlanNodeSearcher.searchFrom(semiJoin.getSource(), context.getLookup()) + .where(node -> node instanceof TableScanNode && ((TableScanNode) node).isForDelete()) + .matches()) { + return Result.empty(); + } + + Symbol semiJoinSymbol = semiJoin.getSemiJoinOutput(); + Predicate isSemiJoinSymbol = expression -> expression.equals(semiJoinSymbol.toSymbolReference()); + + List conjuncts = extractConjuncts(filterNode.getPredicate()); + if (conjuncts.stream().noneMatch(isSemiJoinSymbol)) { + return Result.empty(); + } + Expression filteredPredicate = and(conjuncts.stream() + .filter(not(isSemiJoinSymbol)) + .collect(toImmutableList())); + + Expression simplifiedPredicate = inlineSymbols(symbol -> { + if (symbol.equals(semiJoinSymbol)) { + return TRUE_LITERAL; + } + return symbol.toSymbolReference(); + }, filteredPredicate); + + Optional joinFilter = simplifiedPredicate.equals(TRUE_LITERAL) ? Optional.empty() : Optional.of(simplifiedPredicate); + + PlanNode filteringSourceDistinct = new AggregationNode( + context.getIdAllocator().getNextId(), + semiJoin.getFilteringSource(), + ImmutableMap.of(), + singleGroupingSet(ImmutableList.of(semiJoin.getFilteringSourceJoinSymbol())), + ImmutableList.of(), + SINGLE, + Optional.empty(), + Optional.empty()); + + JoinNode innerJoin = new JoinNode( + semiJoin.getId(), + INNER, + semiJoin.getSource(), + filteringSourceDistinct, + ImmutableList.of(new EquiJoinClause(semiJoin.getSourceJoinSymbol(), semiJoin.getFilteringSourceJoinSymbol())), + semiJoin.getSource().getOutputSymbols(), + ImmutableList.of(), + joinFilter, + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + semiJoin.getDynamicFilterId() + .map(id -> ImmutableMap.of(id, semiJoin.getFilteringSourceJoinSymbol())) + .orElse(ImmutableMap.of()), + Optional.empty()); + + ProjectNode project = new ProjectNode( + context.getIdAllocator().getNextId(), + innerJoin, + Assignments.builder() + .putIdentities(innerJoin.getOutputSymbols()) + .put(semiJoinSymbol, TRUE_LITERAL) + .build()); + + return Result.ofPlanNode(project); + } +} diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/BeginTableWrite.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/BeginTableWrite.java index 3f2ca3cc8ab4..ba076eb5cc7b 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/BeginTableWrite.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/BeginTableWrite.java @@ -204,7 +204,8 @@ private PlanNode rewriteDeleteTableScan(PlanNode node, TableHandle handle) handle, scan.getOutputSymbols(), scan.getAssignments(), - scan.getEnforcedConstraint()); + scan.getEnforcedConstraint(), + scan.isForDelete()); } if (node instanceof FilterNode) { diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java index aeef6c94d28b..5813294039a0 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java @@ -284,7 +284,7 @@ public PlanAndMappings visitTableScan(TableScanNode node, UnaliasContext context }); return new PlanAndMappings( - new TableScanNode(node.getId(), node.getTable(), newOutputs, newAssignments, node.getEnforcedConstraint()), + new TableScanNode(node.getId(), node.getTable(), newOutputs, newAssignments, node.getEnforcedConstraint(), node.isForDelete()), mapping); } diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/plan/TableScanNode.java b/presto-main/src/main/java/io/prestosql/sql/planner/plan/TableScanNode.java index 8742ecfbf59a..8d611da1360f 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/plan/TableScanNode.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/plan/TableScanNode.java @@ -41,6 +41,7 @@ public class TableScanNode private final Map assignments; // symbol -> column private final TupleDomain enforcedConstraint; + private final boolean forDelete; // We need this factory method to disambiguate with the constructor used for deserializing // from a json object. The deserializer sets some fields which are never transported @@ -49,9 +50,10 @@ public static TableScanNode newInstance( PlanNodeId id, TableHandle table, List outputs, - Map assignments) + Map assignments, + boolean forDelete) { - return new TableScanNode(id, table, outputs, assignments, TupleDomain.all()); + return new TableScanNode(id, table, outputs, assignments, TupleDomain.all(), forDelete); } @JsonCreator @@ -59,7 +61,8 @@ public TableScanNode( @JsonProperty("id") PlanNodeId id, @JsonProperty("table") TableHandle table, @JsonProperty("outputSymbols") List outputs, - @JsonProperty("assignments") Map assignments) + @JsonProperty("assignments") Map assignments, + @JsonProperty("forDelete") boolean forDelete) { // This constructor is for JSON deserialization only. Do not use. super(id); @@ -68,6 +71,7 @@ public TableScanNode( this.assignments = ImmutableMap.copyOf(requireNonNull(assignments, "assignments is null")); checkArgument(assignments.keySet().containsAll(outputs), "assignments does not cover all of outputs"); this.enforcedConstraint = null; + this.forDelete = forDelete; } public TableScanNode( @@ -75,7 +79,8 @@ public TableScanNode( TableHandle table, List outputs, Map assignments, - TupleDomain enforcedConstraint) + TupleDomain enforcedConstraint, + boolean forDelete) { super(id); this.table = requireNonNull(table, "table is null"); @@ -83,6 +88,7 @@ public TableScanNode( this.assignments = ImmutableMap.copyOf(requireNonNull(assignments, "assignments is null")); checkArgument(assignments.keySet().containsAll(outputs), "assignments does not cover all of outputs"); this.enforcedConstraint = requireNonNull(enforcedConstraint, "enforcedConstraint is null"); + this.forDelete = forDelete; } @JsonProperty("table") @@ -119,6 +125,12 @@ public TupleDomain getEnforcedConstraint() return enforcedConstraint; } + @JsonProperty("forDelete") + public boolean isForDelete() + { + return forDelete; + } + @Override public List getSources() { @@ -139,6 +151,7 @@ public String toString() .add("outputSymbols", outputSymbols) .add("assignments", assignments) .add("enforcedConstraint", enforcedConstraint) + .add("forDelete", forDelete) .toString(); } diff --git a/presto-main/src/test/java/io/prestosql/cost/TestCostCalculator.java b/presto-main/src/test/java/io/prestosql/cost/TestCostCalculator.java index aac5dda9fab1..810f4e558179 100644 --- a/presto-main/src/test/java/io/prestosql/cost/TestCostCalculator.java +++ b/presto-main/src/test/java/io/prestosql/cost/TestCostCalculator.java @@ -796,7 +796,8 @@ private TableScanNode tableScan(String id, String... symbols) new TableHandle(new CatalogName("tpch"), tableHandle, INSTANCE, Optional.of(new TpchTableLayoutHandle(tableHandle, TupleDomain.all()))), symbolsList, assignments.build(), - TupleDomain.all()); + TupleDomain.all(), + false); } private PlanNode project(String id, PlanNode source, String symbol, Expression expression) diff --git a/presto-main/src/test/java/io/prestosql/execution/MockRemoteTaskFactory.java b/presto-main/src/test/java/io/prestosql/execution/MockRemoteTaskFactory.java index da7a779689cf..bb5aaecae9f5 100644 --- a/presto-main/src/test/java/io/prestosql/execution/MockRemoteTaskFactory.java +++ b/presto-main/src/test/java/io/prestosql/execution/MockRemoteTaskFactory.java @@ -110,7 +110,8 @@ public MockRemoteTask createTableScanTask(TaskId taskId, InternalNode newNode, L sourceId, TEST_TABLE_HANDLE, ImmutableList.of(symbol), - ImmutableMap.of(symbol, new TestingColumnHandle("column"))), + ImmutableMap.of(symbol, new TestingColumnHandle("column")), + false), ImmutableMap.of(symbol, VARCHAR), SOURCE_DISTRIBUTION, ImmutableList.of(sourceId), diff --git a/presto-main/src/test/java/io/prestosql/execution/TaskTestUtils.java b/presto-main/src/test/java/io/prestosql/execution/TaskTestUtils.java index 4ab808bb8f8a..17a16f672913 100644 --- a/presto-main/src/test/java/io/prestosql/execution/TaskTestUtils.java +++ b/presto-main/src/test/java/io/prestosql/execution/TaskTestUtils.java @@ -91,7 +91,8 @@ private TaskTestUtils() {} TABLE_SCAN_NODE_ID, TEST_TABLE_HANDLE, ImmutableList.of(SYMBOL), - ImmutableMap.of(SYMBOL, new TestingColumnHandle("column", 0, BIGINT))), + ImmutableMap.of(SYMBOL, new TestingColumnHandle("column", 0, BIGINT)), + false), ImmutableMap.of(SYMBOL, VARCHAR), SOURCE_DISTRIBUTION, ImmutableList.of(TABLE_SCAN_NODE_ID), diff --git a/presto-main/src/test/java/io/prestosql/execution/scheduler/TestPhasedExecutionSchedule.java b/presto-main/src/test/java/io/prestosql/execution/scheduler/TestPhasedExecutionSchedule.java index a5a2e24b41cf..1e409c2b765c 100644 --- a/presto-main/src/test/java/io/prestosql/execution/scheduler/TestPhasedExecutionSchedule.java +++ b/presto-main/src/test/java/io/prestosql/execution/scheduler/TestPhasedExecutionSchedule.java @@ -181,7 +181,8 @@ private static PlanFragment createBroadcastJoinPlanFragment(String name, PlanFra new PlanNodeId(name), TEST_TABLE_HANDLE, ImmutableList.of(symbol), - ImmutableMap.of(symbol, new TestingColumnHandle("column"))); + ImmutableMap.of(symbol, new TestingColumnHandle("column")), + false); RemoteSourceNode remote = new RemoteSourceNode(new PlanNodeId("build_id"), buildFragment.getId(), ImmutableList.of(), Optional.empty(), REPLICATE); PlanNode join = new JoinNode( @@ -232,7 +233,8 @@ private static PlanFragment createTableScanPlanFragment(String name) new PlanNodeId(name), TEST_TABLE_HANDLE, ImmutableList.of(symbol), - ImmutableMap.of(symbol, new TestingColumnHandle("column"))); + ImmutableMap.of(symbol, new TestingColumnHandle("column")), + false); return createFragment(planNode); } diff --git a/presto-main/src/test/java/io/prestosql/execution/scheduler/TestSourcePartitionedScheduler.java b/presto-main/src/test/java/io/prestosql/execution/scheduler/TestSourcePartitionedScheduler.java index 73a499110cf3..b0bd04652a42 100644 --- a/presto-main/src/test/java/io/prestosql/execution/scheduler/TestSourcePartitionedScheduler.java +++ b/presto-main/src/test/java/io/prestosql/execution/scheduler/TestSourcePartitionedScheduler.java @@ -566,7 +566,8 @@ private static StageExecutionPlan createPlan(ConnectorSplitSource splitSource) tableScanNodeId, TEST_TABLE_HANDLE, ImmutableList.of(symbol), - ImmutableMap.of(symbol, new TestingColumnHandle("column"))); + ImmutableMap.of(symbol, new TestingColumnHandle("column")), + false); FilterNode filterNode = new FilterNode( new PlanNodeId("filter_node_id"), tableScan, diff --git a/presto-main/src/test/java/io/prestosql/server/TestDynamicFilterService.java b/presto-main/src/test/java/io/prestosql/server/TestDynamicFilterService.java index 1adbaf9255ab..49fe6ddfdb0f 100644 --- a/presto-main/src/test/java/io/prestosql/server/TestDynamicFilterService.java +++ b/presto-main/src/test/java/io/prestosql/server/TestDynamicFilterService.java @@ -652,7 +652,8 @@ private static PlanFragment createPlan(DynamicFilterId dynamicFilterId, Partitio tableScanNodeId, TEST_TABLE_HANDLE, ImmutableList.of(symbol), - ImmutableMap.of(symbol, new TestingMetadata.TestingColumnHandle("column"))); + ImmutableMap.of(symbol, new TestingMetadata.TestingColumnHandle("column")), + false); FilterNode filterNode = new FilterNode( new PlanNodeId("filter_node_id"), tableScan, diff --git a/presto-main/src/test/java/io/prestosql/sql/analyzer/TestFeaturesConfig.java b/presto-main/src/test/java/io/prestosql/sql/analyzer/TestFeaturesConfig.java index ec7ec08abd87..ecbf352df26b 100644 --- a/presto-main/src/test/java/io/prestosql/sql/analyzer/TestFeaturesConfig.java +++ b/presto-main/src/test/java/io/prestosql/sql/analyzer/TestFeaturesConfig.java @@ -104,7 +104,8 @@ public void testDefaults() .setPredicatePushdownUseTableProperties(true) .setIgnoreDownstreamPreferences(false) .setOmitDateTimeTypePrecision(false) - .setIterativeRuleBasedColumnPruning(true)); + .setIterativeRuleBasedColumnPruning(true) + .setRewriteFilteringSemiJoinToInnerJoin(true)); } @Test @@ -174,6 +175,7 @@ public void testExplicitPropertyMappings() .put("optimizer.ignore-downstream-preferences", "true") .put("deprecated.omit-datetime-type-precision", "true") .put("optimizer.iterative-rule-based-column-pruning", "false") + .put("optimizer.rewrite-filtering-semi-join-to-inner-join", "false") .build(); FeaturesConfig expected = new FeaturesConfig() @@ -239,7 +241,8 @@ public void testExplicitPropertyMappings() .setPredicatePushdownUseTableProperties(false) .setIgnoreDownstreamPreferences(true) .setOmitDateTimeTypePrecision(true) - .setIterativeRuleBasedColumnPruning(false); + .setIterativeRuleBasedColumnPruning(false) + .setRewriteFilteringSemiJoinToInnerJoin(false); assertFullMapping(properties, expected); } } diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/AbstractPredicatePushdownTest.java b/presto-main/src/test/java/io/prestosql/sql/planner/AbstractPredicatePushdownTest.java index afa08101a4a4..184e96d758f2 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/AbstractPredicatePushdownTest.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/AbstractPredicatePushdownTest.java @@ -28,6 +28,7 @@ import java.util.List; import static io.prestosql.SystemSessionProperties.ENABLE_DYNAMIC_FILTERING; +import static io.prestosql.SystemSessionProperties.FILTERING_SEMI_JOIN_TO_INNER; import static io.prestosql.SystemSessionProperties.JOIN_REORDERING_STRATEGY; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.anyTree; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.assignUniqueId; @@ -80,6 +81,7 @@ public void testPushDownToLhsOfSemiJoin() { assertPlan("SELECT quantity FROM (SELECT * FROM lineitem WHERE orderkey IN (SELECT orderkey FROM orders)) " + "WHERE linenumber = 2", + noSemiJoinRewrite(), anyTree( semiJoin("LINE_ORDER_KEY", "ORDERS_ORDER_KEY", "SEMI_JOIN_RESULT", enableDynamicFiltering, anyTree( @@ -95,6 +97,7 @@ public void testPushDownToLhsOfSemiJoin() public void testNonDeterministicPredicatePropagatesOnlyToSourceSideOfSemiJoin() { assertPlan("SELECT * FROM lineitem WHERE orderkey IN (SELECT orderkey FROM orders) AND orderkey = random(5)", + noSemiJoinRewrite(), anyTree( semiJoin("LINE_ORDER_KEY", "ORDERS_ORDER_KEY", "SEMI_JOIN_RESULT", enableDynamicFiltering, anyTree( @@ -121,6 +124,7 @@ public void testNonDeterministicPredicatePropagatesOnlyToSourceSideOfSemiJoin() public void testNonDeterministicPredicateDoesNotPropagateFromFilteringSideToSourceSideOfSemiJoin() { assertPlan("SELECT * FROM lineitem WHERE orderkey IN (SELECT orderkey FROM orders WHERE orderkey = random(5))", + noSemiJoinRewrite(), anyTree( semiJoin("LINE_ORDER_KEY", "ORDERS_ORDER_KEY", "SEMI_JOIN_RESULT", enableDynamicFiltering, anyTree( @@ -136,6 +140,7 @@ public void testNonDeterministicPredicateDoesNotPropagateFromFilteringSideToSour public void testGreaterPredicateFromFilterSidePropagatesToSourceSideOfSemiJoin() { assertPlan("SELECT quantity FROM (SELECT * FROM lineitem WHERE orderkey IN (SELECT orderkey FROM orders WHERE orderkey > 2))", + noSemiJoinRewrite(), anyTree( semiJoin("LINE_ORDER_KEY", "ORDERS_ORDER_KEY", "SEMI_JOIN_RESULT", enableDynamicFiltering, anyTree( @@ -152,6 +157,7 @@ public void testGreaterPredicateFromFilterSidePropagatesToSourceSideOfSemiJoin() public void testEqualsPredicateFromFilterSidePropagatesToSourceSideOfSemiJoin() { assertPlan("SELECT quantity FROM (SELECT * FROM lineitem WHERE orderkey IN (SELECT orderkey FROM orders WHERE orderkey = 2))", + noSemiJoinRewrite(), anyTree( semiJoin("LINE_ORDER_KEY", "ORDERS_ORDER_KEY", "SEMI_JOIN_RESULT", enableDynamicFiltering, anyTree( @@ -185,6 +191,7 @@ public void testPredicateFromFilterSideNotPropagatesToSourceSideOfSemiJoinIfNotI public void testGreaterPredicateFromSourceSidePropagatesToFilterSideOfSemiJoin() { assertPlan("SELECT quantity FROM (SELECT * FROM lineitem WHERE orderkey IN (SELECT orderkey FROM orders) AND orderkey > 2)", + noSemiJoinRewrite(), anyTree( semiJoin("LINE_ORDER_KEY", "ORDERS_ORDER_KEY", "SEMI_JOIN_RESULT", enableDynamicFiltering, anyTree( @@ -201,6 +208,7 @@ public void testGreaterPredicateFromSourceSidePropagatesToFilterSideOfSemiJoin() public void testEqualPredicateFromSourceSidePropagatesToFilterSideOfSemiJoin() { assertPlan("SELECT quantity FROM (SELECT * FROM lineitem WHERE orderkey IN (SELECT orderkey FROM orders) AND orderkey = 2)", + noSemiJoinRewrite(), anyTree( semiJoin("LINE_ORDER_KEY", "ORDERS_ORDER_KEY", "SEMI_JOIN_RESULT", enableDynamicFiltering, anyTree( @@ -541,4 +549,11 @@ public void testTablePredicateIsExtracted() "orders", ImmutableMap.of("ORDERSTATUS", "orderstatus")))))); } + + private Session noSemiJoinRewrite() + { + return Session.builder(getQueryRunner().getDefaultSession()) + .setSystemProperty(FILTERING_SEMI_JOIN_TO_INNER, "false") + .build(); + } } diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/TestDereferencePushDown.java b/presto-main/src/test/java/io/prestosql/sql/planner/TestDereferencePushDown.java index 3ccfb5bc9e10..fcbdf0953764 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/TestDereferencePushDown.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/TestDereferencePushDown.java @@ -15,12 +15,14 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import io.prestosql.Session; import io.prestosql.sql.planner.assertions.BasePlanTest; import io.prestosql.sql.planner.assertions.PlanMatchPattern; import org.testng.annotations.Test; import java.util.Optional; +import static io.prestosql.SystemSessionProperties.FILTERING_SEMI_JOIN_TO_INNER; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.anyTree; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.equiJoinClause; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.expression; @@ -168,6 +170,9 @@ public void testDereferencePushdownSemiJoin() "FROM t " + "WHERE " + "msg.x IN (SELECT msg.z FROM t)", + Session.builder(getQueryRunner().getDefaultSession()) + .setSystemProperty(FILTERING_SEMI_JOIN_TO_INNER, "false") + .build(), anyTree( semiJoin("a_x", "b_z", "semi_join_symbol", anyTree( diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/TestDynamicFilter.java b/presto-main/src/test/java/io/prestosql/sql/planner/TestDynamicFilter.java index ac8d49bfc299..de4f6d8d80ce 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/TestDynamicFilter.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/TestDynamicFilter.java @@ -33,6 +33,7 @@ import java.util.Optional; import static io.prestosql.SystemSessionProperties.ENABLE_DYNAMIC_FILTERING; +import static io.prestosql.SystemSessionProperties.FILTERING_SEMI_JOIN_TO_INNER; import static io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE; import static io.prestosql.SystemSessionProperties.JOIN_REORDERING_STRATEGY; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.anyNot; @@ -407,6 +408,7 @@ public void testSemiJoin() { assertPlan( "SELECT * FROM orders WHERE orderkey IN (SELECT orderkey FROM lineitem WHERE linenumber % 4 = 0)", + noSemiJoinRewrite(), anyTree( filter("S", project( @@ -447,6 +449,7 @@ public void testSemiJoinWithStaticFiltering() { assertPlan( "SELECT * FROM orders WHERE orderkey IN (SELECT orderkey FROM lineitem WHERE linenumber % 4 = 0) AND orderkey > 0", + noSemiJoinRewrite(), anyTree( filter("S", project( @@ -464,6 +467,7 @@ public void testMultiSemiJoin() assertPlan( "SELECT part.partkey FROM part WHERE part.partkey IN " + "(SELECT lineitem.partkey FROM lineitem WHERE lineitem.orderkey IN (SELECT orders.orderkey FROM orders))", + noSemiJoinRewrite(), anyTree( filter("S0", project( @@ -487,6 +491,7 @@ public void testSemiJoinUnsupportedDynamicFilterRemoval() assertPlan( "WITH t AS (SELECT lineitem.partkey + 1000 partkey FROM lineitem) " + "SELECT t.partkey FROM t WHERE t.partkey IN (SELECT part.partkey FROM part)", + noSemiJoinRewrite(), anyTree( filter("S0", project( @@ -516,4 +521,11 @@ public MatchResult detailMatches(PlanNode node, StatsProvider stats, Session ses } }; } + + private Session noSemiJoinRewrite() + { + return Session.builder(getQueryRunner().getDefaultSession()) + .setSystemProperty(FILTERING_SEMI_JOIN_TO_INNER, "false") + .build(); + } } diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/TestEffectivePredicateExtractor.java b/presto-main/src/test/java/io/prestosql/sql/planner/TestEffectivePredicateExtractor.java index 85875a6db7a6..b11028ae0063 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/TestEffectivePredicateExtractor.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/TestEffectivePredicateExtractor.java @@ -224,7 +224,8 @@ public void setUp() newId(), makeTableHandle(TupleDomain.all()), ImmutableList.copyOf(assignments.keySet()), - assignments); + assignments, + false); expressionNormalizer = new ExpressionIdentityNormalizer(); } @@ -500,7 +501,8 @@ public void testTableScan() newId(), makeTableHandle(TupleDomain.all()), ImmutableList.copyOf(assignments.keySet()), - assignments); + assignments, + false); Expression effectivePredicate = effectivePredicateExtractor.extract(SESSION, node, TypeProvider.empty(), typeAnalyzer); assertEquals(effectivePredicate, BooleanLiteral.TRUE_LITERAL); @@ -509,7 +511,8 @@ public void testTableScan() makeTableHandle(TupleDomain.none()), ImmutableList.copyOf(assignments.keySet()), assignments, - TupleDomain.none()); + TupleDomain.none(), + false); effectivePredicate = effectivePredicateExtractor.extract(SESSION, node, TypeProvider.empty(), typeAnalyzer); assertEquals(effectivePredicate, FALSE_LITERAL); @@ -519,7 +522,8 @@ public void testTableScan() makeTableHandle(predicate), ImmutableList.copyOf(assignments.keySet()), assignments, - predicate); + predicate, + false); effectivePredicate = effectivePredicateExtractor.extract(SESSION, node, TypeProvider.empty(), typeAnalyzer); assertEquals(normalizeConjuncts(effectivePredicate), normalizeConjuncts(equals(bigintLiteral(1L), AE))); @@ -531,7 +535,8 @@ public void testTableScan() makeTableHandle(TupleDomain.withColumnDomains(ImmutableMap.of(scanAssignments.get(A), Domain.singleValue(BIGINT, 1L)))), ImmutableList.copyOf(assignments.keySet()), assignments, - predicate); + predicate, + false); effectivePredicate = effectivePredicateExtractorWithoutTableProperties.extract(SESSION, node, TypeProvider.empty(), typeAnalyzer); assertEquals(normalizeConjuncts(effectivePredicate), normalizeConjuncts(equals(bigintLiteral(2L), BE), equals(bigintLiteral(1L), AE))); @@ -540,7 +545,8 @@ public void testTableScan() makeTableHandle(predicate), ImmutableList.copyOf(assignments.keySet()), assignments, - TupleDomain.all()); + TupleDomain.all(), + false); effectivePredicate = effectivePredicateExtractor.extract(SESSION, node, TypeProvider.empty(), typeAnalyzer); assertEquals(effectivePredicate, and(equals(AE, bigintLiteral(1)), equals(BE, bigintLiteral(2)))); @@ -551,7 +557,8 @@ public void testTableScan() assignments, TupleDomain.withColumnDomains(ImmutableMap.of( scanAssignments.get(A), Domain.multipleValues(BIGINT, ImmutableList.of(1L, 2L, 3L)), - scanAssignments.get(B), Domain.multipleValues(BIGINT, ImmutableList.of(1L, 2L, 3L))))); + scanAssignments.get(B), Domain.multipleValues(BIGINT, ImmutableList.of(1L, 2L, 3L)))), + false); effectivePredicate = effectivePredicateExtractor.extract(SESSION, node, TypeProvider.empty(), typeAnalyzer); assertEquals(normalizeConjuncts(effectivePredicate), normalizeConjuncts(equals(bigintLiteral(2L), BE), equals(bigintLiteral(1L), AE))); @@ -560,7 +567,8 @@ public void testTableScan() makeTableHandle(TupleDomain.all()), ImmutableList.copyOf(assignments.keySet()), assignments, - TupleDomain.all()); + TupleDomain.all(), + false); effectivePredicate = effectivePredicateExtractor.extract(SESSION, node, TypeProvider.empty(), typeAnalyzer); assertEquals(effectivePredicate, BooleanLiteral.TRUE_LITERAL); } @@ -1119,7 +1127,8 @@ private static TableScanNode tableScanNode(Map scanAssignm makeTableHandle(TupleDomain.all()), ImmutableList.copyOf(scanAssignments.keySet()), scanAssignments, - TupleDomain.all()); + TupleDomain.all(), + false); } private static PlanNodeId newId() diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/TestLogicalPlanner.java b/presto-main/src/test/java/io/prestosql/sql/planner/TestLogicalPlanner.java index 319d1215fa8c..406623b2f363 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/TestLogicalPlanner.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/TestLogicalPlanner.java @@ -71,6 +71,7 @@ import static com.google.common.collect.MoreCollectors.toOptional; import static io.airlift.slice.Slices.utf8Slice; import static io.prestosql.SystemSessionProperties.DISTRIBUTED_SORT; +import static io.prestosql.SystemSessionProperties.FILTERING_SEMI_JOIN_TO_INNER; import static io.prestosql.SystemSessionProperties.FORCE_SINGLE_NODE_OUTPUT; import static io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE; import static io.prestosql.SystemSessionProperties.JOIN_REORDERING_STRATEGY; @@ -432,6 +433,7 @@ public void testUncorrelatedSubqueries() tableScan("lineitem", ImmutableMap.of("Y", "orderkey")))))))); assertPlan("SELECT * FROM orders WHERE orderkey IN (SELECT orderkey FROM lineitem WHERE linenumber % 4 = 0)", + noSemiJoinRewrite(), anyTree( filter("S", project( @@ -510,14 +512,14 @@ public void testSameInSubqueryIsAppliedOnlyOnce() // same IN query used for left, right and complex condition assertEquals( countOfMatchingNodes( - plan("SELECT * FROM orders o1 JOIN orders o2 ON o1.orderkey IN (SELECT 1) AND (o1.orderkey IN (SELECT 1) OR o1.orderkey IN (SELECT 1))"), + plan("SELECT * FROM orders o1 JOIN orders o2 ON o1.orderkey NOT IN (SELECT 1) AND (o1.orderkey NOT IN (SELECT 1) OR o1.orderkey NOT IN (SELECT 1))"), SemiJoinNode.class::isInstance), 1); // one subquery used for "1 IN (SELECT 1)", one subquery used for "2 IN (SELECT 1)" assertEquals( countOfMatchingNodes( - plan("SELECT 1 IN (SELECT 1), 2 IN (SELECT 1) WHERE 1 IN (SELECT 1)"), + plan("SELECT 1 NOT IN (SELECT 1), 2 NOT IN (SELECT 1) WHERE 1 NOT IN (SELECT 1)"), SemiJoinNode.class::isInstance), 2); } @@ -1083,10 +1085,32 @@ public void testDistributedSort() public void testRemoveAggregationInSemiJoin() { assertPlanDoesNotContain( - "SELECT custkey FROM orders WHERE custkey IN (SELECT distinct custkey FROM customer)", + "SELECT custkey FROM orders WHERE custkey NOT IN (SELECT distinct custkey FROM customer)", AggregationNode.class); } + @Test + public void testFilteringSemiJoinRewriteToInnerJoin() + { + assertPlan( + "SELECT custkey FROM orders WHERE custkey IN (SELECT custkey FROM customer)", + any( + join( + INNER, + ImmutableList.of(equiJoinClause("CUSTOMER_CUSTKEY", "ORDER_CUSTKEY")), + project( + aggregation( + singleGroupingSet("CUSTOMER_CUSTKEY"), + ImmutableMap.of(), + ImmutableMap.of(), + Optional.empty(), + FINAL, + anyTree( + tableScan("customer", ImmutableMap.of("CUSTOMER_CUSTKEY", "custkey"))))), + anyTree( + tableScan("orders", ImmutableMap.of("ORDER_CUSTKEY", "custkey")))))); + } + @Test public void testOrderByFetch() { @@ -1508,7 +1532,7 @@ public void testSizeBasedJoin() public void testSizeBasedSemiJoin() { // both local.sf100000.nation and local.sf100000.orders don't provide stats, therefore no reordering happens - assertDistributedPlan("SELECT custkey FROM local.\"sf42.5\".orders WHERE orders.custkey IN (SELECT nationkey FROM local.\"sf42.5\".nation)", + assertDistributedPlan("SELECT custkey FROM local.\"sf42.5\".orders WHERE orders.custkey NOT IN (SELECT nationkey FROM local.\"sf42.5\".nation)", automaticJoinDistribution(), output( anyTree( @@ -1519,7 +1543,7 @@ public void testSizeBasedSemiJoin() tableScan("nation", ImmutableMap.of("NATIONKEY", "nationkey"))))))); // values node provides stats - assertDistributedPlan("SELECT custkey FROM local.\"sf42.5\".orders WHERE orders.custkey IN (SELECT t.a FROM (VALUES CAST(1 AS BIGINT), CAST(2 AS BIGINT)) t(a))", + assertDistributedPlan("SELECT custkey FROM local.\"sf42.5\".orders WHERE orders.custkey NOT IN (SELECT t.a FROM (VALUES CAST(1 AS BIGINT), CAST(2 AS BIGINT)) t(a))", automaticJoinDistribution(), output( anyTree( @@ -1555,4 +1579,11 @@ private Session automaticJoinDistribution() .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()) .build(); } + + private Session noSemiJoinRewrite() + { + return Session.builder(getQueryRunner().getDefaultSession()) + .setSystemProperty(FILTERING_SEMI_JOIN_TO_INNER, "false") + .build(); + } } diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/TestQuantifiedComparison.java b/presto-main/src/test/java/io/prestosql/sql/planner/TestQuantifiedComparison.java index cad7885458c8..628796402205 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/TestQuantifiedComparison.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/TestQuantifiedComparison.java @@ -13,6 +13,7 @@ */ package io.prestosql.sql.planner; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.prestosql.sql.planner.assertions.BasePlanTest; import io.prestosql.sql.planner.plan.AggregationNode; @@ -21,12 +22,15 @@ import org.testng.annotations.Test; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.anyTree; +import static io.prestosql.sql.planner.assertions.PlanMatchPattern.equiJoinClause; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.filter; +import static io.prestosql.sql.planner.assertions.PlanMatchPattern.join; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.node; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.project; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.semiJoin; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.tableScan; import static io.prestosql.sql.planner.assertions.PlanMatchPattern.values; +import static io.prestosql.sql.planner.plan.JoinNode.Type.INNER; public class TestQuantifiedComparison extends BasePlanTest @@ -36,11 +40,11 @@ public void testQuantifiedComparisonEqualsAny() { String query = "SELECT orderkey, custkey FROM orders WHERE orderkey = ANY (VALUES ROW(CAST(5 as BIGINT)), ROW(CAST(3 as BIGINT)))"; assertPlan(query, anyTree( - filter("S", - project( - semiJoin("X", "Y", "S", - anyTree(tableScan("orders", ImmutableMap.of("X", "orderkey"))), - anyTree(values(ImmutableMap.of("Y", 0)))))))); + join( + INNER, + ImmutableList.of(equiJoinClause("Y", "X")), + anyTree(values(ImmutableMap.of("Y", 0))), + anyTree(tableScan("orders", ImmutableMap.of("X", "orderkey")))))); } @Test diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/TestTypeValidator.java b/presto-main/src/test/java/io/prestosql/sql/planner/TestTypeValidator.java index 4a790e710c78..274524f284b8 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/TestTypeValidator.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/TestTypeValidator.java @@ -101,7 +101,8 @@ public void setUp() TEST_TABLE_HANDLE, ImmutableList.copyOf(assignments.keySet()), assignments, - TupleDomain.all()); + TupleDomain.all(), + false); } @Test diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/iterative/rule/TestTransformFilteringSemiJoinToInnerJoin.java b/presto-main/src/test/java/io/prestosql/sql/planner/iterative/rule/TestTransformFilteringSemiJoinToInnerJoin.java new file mode 100644 index 000000000000..eefe051aa92c --- /dev/null +++ b/presto-main/src/test/java/io/prestosql/sql/planner/iterative/rule/TestTransformFilteringSemiJoinToInnerJoin.java @@ -0,0 +1,182 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.sql.planner.iterative.rule; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.prestosql.sql.planner.Symbol; +import io.prestosql.sql.planner.assertions.PlanMatchPattern; +import io.prestosql.sql.planner.iterative.rule.test.BaseRuleTest; +import io.prestosql.sql.planner.plan.Assignments; +import org.testng.annotations.Test; + +import java.util.Optional; + +import static io.prestosql.sql.planner.assertions.PlanMatchPattern.aggregation; +import static io.prestosql.sql.planner.assertions.PlanMatchPattern.equiJoinClause; +import static io.prestosql.sql.planner.assertions.PlanMatchPattern.join; +import static io.prestosql.sql.planner.assertions.PlanMatchPattern.project; +import static io.prestosql.sql.planner.assertions.PlanMatchPattern.singleGroupingSet; +import static io.prestosql.sql.planner.assertions.PlanMatchPattern.values; +import static io.prestosql.sql.planner.iterative.rule.test.PlanBuilder.expression; +import static io.prestosql.sql.planner.plan.AggregationNode.Step.SINGLE; +import static io.prestosql.sql.planner.plan.JoinNode.Type.INNER; + +public class TestTransformFilteringSemiJoinToInnerJoin + extends BaseRuleTest +{ + @Test + public void testTransformSemiJoinToInnerJoin() + { + tester().assertThat(new TransformFilteringSemiJoinToInnerJoin()) + .on(p -> { + Symbol a = p.symbol("a"); + Symbol b = p.symbol("b"); + Symbol aInB = p.symbol("a_in_b"); + return p.filter( + expression("a_in_b AND a > 5"), + p.semiJoin( + p.values(a), + p.values(b), + a, + b, + aInB, + Optional.empty(), + Optional.empty(), + Optional.empty())); + }) + .matches(project( + ImmutableMap.of("a", PlanMatchPattern.expression("a"), "a_in_b", PlanMatchPattern.expression("true")), + join( + INNER, + ImmutableList.of(equiJoinClause("a", "b")), + Optional.of("a > 5"), + values("a"), + aggregation( + singleGroupingSet("b"), + ImmutableMap.of(), + ImmutableMap.of(), + Optional.empty(), + SINGLE, + values("b"))))); + } + + @Test + public void testRemoveRedundantFilter() + { + tester().assertThat(new TransformFilteringSemiJoinToInnerJoin()) + .on(p -> { + Symbol a = p.symbol("a"); + Symbol b = p.symbol("b"); + Symbol aInB = p.symbol("a_in_b"); + return p.filter( + expression("a_in_b"), + p.semiJoin( + p.values(a), + p.values(b), + a, + b, + aInB, + Optional.empty(), + Optional.empty(), + Optional.empty())); + }) + .matches(project( + ImmutableMap.of("a", PlanMatchPattern.expression("a"), "a_in_b", PlanMatchPattern.expression("true")), + join( + INNER, + ImmutableList.of(equiJoinClause("a", "b")), + Optional.empty(), + values("a"), + aggregation( + singleGroupingSet("b"), + ImmutableMap.of(), + ImmutableMap.of(), + Optional.empty(), + SINGLE, + values("b"))))); + } + + @Test + public void testFilterNotMatching() + { + tester().assertThat(new TransformFilteringSemiJoinToInnerJoin()) + .on(p -> { + Symbol a = p.symbol("a"); + Symbol b = p.symbol("b"); + Symbol aInB = p.symbol("a_in_b"); + return p.filter( + expression("a > 5"), + p.semiJoin( + p.values(a), + p.values(b), + a, + b, + aInB, + Optional.empty(), + Optional.empty(), + Optional.empty())); + }) + .doesNotFire(); + } + + @Test + public void testDoNotRewriteInContextOfDelete() + { + tester().assertThat(new TransformFilteringSemiJoinToInnerJoin()) + .on(p -> { + Symbol a = p.symbol("a"); + Symbol b = p.symbol("b"); + Symbol aInB = p.symbol("a_in_b"); + return p.filter( + expression("a_in_b"), + p.semiJoin( + p.tableScan( + ImmutableList.of(a), + true), + p.values(b), + a, + b, + aInB, + Optional.empty(), + Optional.empty(), + Optional.empty())); + }) + .doesNotFire(); + + tester().assertThat(new TransformFilteringSemiJoinToInnerJoin()) + .on(p -> { + Symbol a = p.symbol("a"); + Symbol b = p.symbol("b"); + Symbol c = p.symbol("c"); + Symbol aInB = p.symbol("a_in_b"); + return p.filter( + expression("a_in_b"), + p.semiJoin( + p.project( + Assignments.of(a, expression("c")), + p.tableScan( + ImmutableList.of(c), + true)), + p.values(b), + a, + b, + aInB, + Optional.empty(), + Optional.empty(), + Optional.empty())); + }) + .doesNotFire(); + } +} diff --git a/presto-main/src/test/java/io/prestosql/sql/planner/iterative/rule/test/PlanBuilder.java b/presto-main/src/test/java/io/prestosql/sql/planner/iterative/rule/test/PlanBuilder.java index e41cca71a2d9..36433f7ab22f 100644 --- a/presto-main/src/test/java/io/prestosql/sql/planner/iterative/rule/test/PlanBuilder.java +++ b/presto-main/src/test/java/io/prestosql/sql/planner/iterative/rule/test/PlanBuilder.java @@ -91,6 +91,7 @@ import io.prestosql.sql.tree.FunctionCall; import io.prestosql.sql.tree.NullLiteral; import io.prestosql.testing.TestingHandle; +import io.prestosql.testing.TestingMetadata; import io.prestosql.testing.TestingMetadata.TestingTableHandle; import io.prestosql.testing.TestingTransactionHandle; @@ -482,6 +483,17 @@ public CorrelatedJoinNode correlatedJoin(List correlation, PlanNode inpu return new CorrelatedJoinNode(idAllocator.getNextId(), input, subquery, correlation, type, filter, originSubquery); } + public TableScanNode tableScan(List symbols, boolean forDelete) + { + return new TableScanNode( + idAllocator.getNextId(), + new TableHandle(new CatalogName("testConnector"), new TestingTableHandle(), TestingTransactionHandle.create(), Optional.of(TestingHandle.INSTANCE)), + symbols, + symbols.stream().collect(toImmutableMap(identity(), symbol -> new TestingMetadata.TestingColumnHandle(symbol.getName()))), + TupleDomain.all(), + forDelete); + } + public TableScanNode tableScan(List symbols, Map assignments) { return tableScan( @@ -509,7 +521,8 @@ public TableScanNode tableScan( tableHandle, symbols, assignments, - enforcedConstraint); + enforcedConstraint, + false); } public TableFinishNode tableDelete(SchemaTableName schemaTableName, PlanNode deleteSource, Symbol deleteRowId)