@@ -2,26 +2,26 @@ name: "Q1"
22description : " TPC-DS Query 1 optimizer test"
33
44sql : |
5- WITH customer_total_return
6- AS (SELECT sr_customer_sk AS ctr_customer_sk,
7- sr_store_sk AS ctr_store_sk,
8- Sum(sr_return_amt) AS ctr_total_return
9- FROM store_returns,
10- date_dim
11- WHERE sr_returned_date_sk = d_date_sk
12- AND d_year = 2001
13- GROUP BY sr_customer_sk,
14- sr_store_sk)
15- SELECT c_customer_id
16- FROM customer_total_return ctr1,
17- store,
18- customer
19- WHERE ctr1.ctr_total_return > (SELECT Avg(ctr_total_return) * 1.2
20- FROM customer_total_return ctr2
21- WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk)
22- AND s_store_sk = ctr1.ctr_store_sk
23- AND s_state = 'TN'
24- AND ctr1.ctr_customer_sk = c_customer_sk
5+ WITH customer_total_return
6+ AS (SELECT sr_customer_sk AS ctr_customer_sk,
7+ sr_store_sk AS ctr_store_sk,
8+ Sum(sr_return_amt) AS ctr_total_return
9+ FROM store_returns,
10+ date_dim
11+ WHERE sr_returned_date_sk = d_date_sk
12+ AND d_year = 2001
13+ GROUP BY sr_customer_sk,
14+ sr_store_sk)
15+ SELECT c_customer_id
16+ FROM customer_total_return ctr1,
17+ store,
18+ customer
19+ WHERE ctr1.ctr_total_return > (SELECT Avg(ctr_total_return) * 1.2
20+ FROM customer_total_return ctr2
21+ WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk)
22+ AND s_store_sk = ctr1.ctr_store_sk
23+ AND s_state = 'TN'
24+ AND ctr1.ctr_customer_sk = c_customer_sk
2525 ORDER BY c_customer_id
2626 LIMIT 100
2727
@@ -31,17 +31,17 @@ table_statistics:
3131 num_rows : 73049 # Estimated based on typical date dimension cardinality
3232 data_size : 2138624 # Directly from snow_plan: "bytes: 2,138,624"
3333 number_of_segments : 1 # From snow_plan: "partitions: 1/1"
34-
34+
3535 store_returns :
3636 num_rows : 287000000 # Estimated based on data size and typical row size
3737 data_size : 124763446272 # Directly from snow_plan: "bytes: 124,763,446,272"
3838 number_of_segments : 7070 # From snow_plan: "partitions: 7070/7070"
39-
39+
4040 store :
4141 num_rows : 1002 # Estimated based on typical store dimension cardinality
4242 data_size : 135680 # Directly from snow_plan: "bytes: 135,680"
4343 number_of_segments : 1 # From snow_plan: "partitions: 1/1"
44-
44+
4545 customer :
4646 num_rows : 12000000 # Estimated based on typical customer dimension size
4747 data_size : 2328538624 # Directly from snow_plan: "bytes: 2,328,538,624"
@@ -55,58 +55,58 @@ column_statistics:
5555 max : 2010 # Typical range for TPC-DS
5656 ndv : 21 # Based on min/max range (2010-1990+1)
5757 null_count : 0 # Primary dimension columns typically don't have nulls
58-
58+
5959 date_dim.d_date_sk :
6060 min : 1 # Typical starting value for surrogate key
6161 max : 73049 # Based on table row count
6262 ndv : 73049 # Primary key, so NDV equals row count
6363 null_count : 0 # Primary key cannot be null
64-
64+
6565 # Store returns columns used in the query
6666 store_returns.sr_returned_date_sk :
6767 min : 1 # Matches date_dim.d_date_sk min
6868 max : 73049 # Matches date_dim.d_date_sk max
6969 ndv : 73049 # Foreign key to date_dim
7070 null_count : 287998 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_RETURNED_DATE_SK IS NOT NULL"
71-
71+
7272 store_returns.sr_customer_sk :
7373 min : 1 # Typical starting value for surrogate key
7474 max : 12000000 # Matches customer.c_customer_sk max
7575 ndv : 11000000 # Estimated as slightly less than customer table cardinality
7676 null_count : 143500 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_CUSTOMER_SK IS NOT NULL"
77-
77+
7878 store_returns.sr_store_sk :
7979 min : 1 # Typical starting value for surrogate key
8080 max : 1002 # Matches store.s_store_sk max
8181 ndv : 1002 # Foreign key to store table
8282 null_count : 143500 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_STORE_SK IS NOT NULL"
83-
83+
8484 store_returns.sr_return_amt :
8585 min : 0.01 # Minimum reasonable return amount
8686 max : 10000.00 # Maximum reasonable return amount
8787 ndv : 100000 # Estimated based on typical distribution
8888 null_count : 0 # Return amount is typically not null
89-
89+
9090 # Store columns used in the query
9191 store.s_store_sk :
9292 min : 1 # Typical starting value for surrogate key
9393 max : 1002 # Based on estimated row count
9494 ndv : 1002 # Primary key, so NDV equals row count
9595 null_count : 0 # Primary key cannot be null
96-
96+
9797 store.s_state :
9898 min : " AK" # Alaska (alphabetically first US state)
9999 max : " WY" # Wyoming (alphabetically last US state)
100100 ndv : 50 # Number of US states
101101 null_count : 0 # State is typically not null
102-
102+
103103 # Customer columns used in the query
104104 customer.c_customer_sk :
105105 min : 1 # Typical starting value for surrogate key
106106 max : 12000000 # Based on estimated row count
107107 ndv : 12000000 # Primary key, so NDV equals row count
108108 null_count : 0 # Primary key cannot be null
109-
109+
110110 customer.c_customer_id :
111111 min : " AAAAAAAAAAAAAA" # Lexicographically smallest possible customer ID
112112 max : " ZZZZZZZZZZZZZZ" # Lexicographically largest possible customer ID
0 commit comments