Skip to content

Commit df47934

Browse files
committed
add q1.yaml
1 parent 7f85d94 commit df47934

File tree

2 files changed

+283
-1
lines changed

2 files changed

+283
-1
lines changed
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
name: "Q1"
2+
description: "TPC-DS Query 1 optimizer test"
3+
4+
sql: |
5+
WITH customer_total_return
6+
AS (SELECT sr_customer_sk AS ctr_customer_sk,
7+
sr_store_sk AS ctr_store_sk,
8+
Sum(sr_return_amt) AS ctr_total_return
9+
FROM store_returns,
10+
date_dim
11+
WHERE sr_returned_date_sk = d_date_sk
12+
AND d_year = 2001
13+
GROUP BY sr_customer_sk,
14+
sr_store_sk)
15+
SELECT c_customer_id
16+
FROM customer_total_return ctr1,
17+
store,
18+
customer
19+
WHERE ctr1.ctr_total_return > (SELECT Avg(ctr_total_return) * 1.2
20+
FROM customer_total_return ctr2
21+
WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk)
22+
AND s_store_sk = ctr1.ctr_store_sk
23+
AND s_state = 'TN'
24+
AND ctr1.ctr_customer_sk = c_customer_sk
25+
ORDER BY c_customer_id
26+
LIMIT 100
27+
28+
# Table statistics derived from snow_plan's TableScan information
29+
table_statistics:
30+
date_dim:
31+
num_rows: 73049 # Estimated based on typical date dimension cardinality
32+
data_size: 2138624 # Directly from snow_plan: "bytes: 2,138,624"
33+
number_of_segments: 1 # From snow_plan: "partitions: 1/1"
34+
35+
store_returns:
36+
num_rows: 287000000 # Estimated based on data size and typical row size
37+
data_size: 124763446272 # Directly from snow_plan: "bytes: 124,763,446,272"
38+
number_of_segments: 7070 # From snow_plan: "partitions: 7070/7070"
39+
40+
store:
41+
num_rows: 1002 # Estimated based on typical store dimension cardinality
42+
data_size: 135680 # Directly from snow_plan: "bytes: 135,680"
43+
number_of_segments: 1 # From snow_plan: "partitions: 1/1"
44+
45+
customer:
46+
num_rows: 12000000 # Estimated based on typical customer dimension size
47+
data_size: 2328538624 # Directly from snow_plan: "bytes: 2,328,538,624"
48+
number_of_segments: 261 # From snow_plan: "partitions: 261/261"
49+
50+
# Column statistics derived from query predicates and typical TPC-DS data distributions
51+
column_statistics:
52+
# Date dimension columns used in the query
53+
date_dim.d_year:
54+
min: 1990 # Typical range for TPC-DS
55+
max: 2010 # Typical range for TPC-DS
56+
ndv: 21 # Based on min/max range (2010-1990+1)
57+
null_count: 0 # Primary dimension columns typically don't have nulls
58+
59+
date_dim.d_date_sk:
60+
min: 1 # Typical starting value for surrogate key
61+
max: 73049 # Based on table row count
62+
ndv: 73049 # Primary key, so NDV equals row count
63+
null_count: 0 # Primary key cannot be null
64+
65+
# Store returns columns used in the query
66+
store_returns.sr_returned_date_sk:
67+
min: 1 # Matches date_dim.d_date_sk min
68+
max: 73049 # Matches date_dim.d_date_sk max
69+
ndv: 73049 # Foreign key to date_dim
70+
null_count: 287998 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_RETURNED_DATE_SK IS NOT NULL"
71+
72+
store_returns.sr_customer_sk:
73+
min: 1 # Typical starting value for surrogate key
74+
max: 12000000 # Matches customer.c_customer_sk max
75+
ndv: 11000000 # Estimated as slightly less than customer table cardinality
76+
null_count: 143500 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_CUSTOMER_SK IS NOT NULL"
77+
78+
store_returns.sr_store_sk:
79+
min: 1 # Typical starting value for surrogate key
80+
max: 1002 # Matches store.s_store_sk max
81+
ndv: 1002 # Foreign key to store table
82+
null_count: 143500 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_STORE_SK IS NOT NULL"
83+
84+
store_returns.sr_return_amt:
85+
min: 0.01 # Minimum reasonable return amount
86+
max: 10000.00 # Maximum reasonable return amount
87+
ndv: 100000 # Estimated based on typical distribution
88+
null_count: 0 # Return amount is typically not null
89+
90+
# Store columns used in the query
91+
store.s_store_sk:
92+
min: 1 # Typical starting value for surrogate key
93+
max: 1002 # Based on estimated row count
94+
ndv: 1002 # Primary key, so NDV equals row count
95+
null_count: 0 # Primary key cannot be null
96+
97+
store.s_state:
98+
min: "AK" # Alaska (alphabetically first US state)
99+
max: "WY" # Wyoming (alphabetically last US state)
100+
ndv: 50 # Number of US states
101+
null_count: 0 # State is typically not null
102+
103+
# Customer columns used in the query
104+
customer.c_customer_sk:
105+
min: 1 # Typical starting value for surrogate key
106+
max: 12000000 # Based on estimated row count
107+
ndv: 12000000 # Primary key, so NDV equals row count
108+
null_count: 0 # Primary key cannot be null
109+
110+
customer.c_customer_id:
111+
min: "AAAAAAAAAAAAAA" # Lexicographically smallest possible customer ID
112+
max: "ZZZZZZZZZZZZZZ" # Lexicographically largest possible customer ID
113+
ndv: 12000000 # Same as c_customer_sk (1:1 relationship)
114+
null_count: 0 # Customer ID is typically not null
115+
116+
raw_plan: |
117+
Limit
118+
├── limit: [100]
119+
├── offset: [0]
120+
└── Sort
121+
├── sort keys: [default.customer.c_customer_id (#79) ASC]
122+
├── limit: [NONE]
123+
└── EvalScalar
124+
├── scalars: [customer.c_customer_id (#79) AS (#79)]
125+
└── Filter
126+
├── filters: [gt(ctr1.ctr_total_return (#48), SUBQUERY), eq(store.s_store_sk (#49), ctr1.ctr_store_sk (#7)), eq(store.s_state (#73), 'TN'), eq(ctr1.ctr_customer_sk (#3), customer.c_customer_sk (#78))]
127+
└── Join(Cross)
128+
├── build keys: []
129+
├── probe keys: []
130+
├── other filters: []
131+
├── Join(Cross)
132+
│ ├── build keys: []
133+
│ ├── probe keys: []
134+
│ ├── other filters: []
135+
│ ├── EvalScalar
136+
│ │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), Sum(sr_return_amt) (#48) AS (#48)]
137+
│ │ └── Aggregate(Initial)
138+
│ │ ├── group items: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)]
139+
│ │ ├── aggregate functions: [Sum(sr_return_amt) (#48)]
140+
│ │ └── EvalScalar
141+
│ │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), store_returns.sr_return_amt (#11) AS (#11)]
142+
│ │ └── Filter
143+
│ │ ├── filters: [eq(store_returns.sr_returned_date_sk (#0), date_dim.d_date_sk (#20)), eq(date_dim.d_year (#26), 2001)]
144+
│ │ └── Join(Cross)
145+
│ │ ├── build keys: []
146+
│ │ ├── probe keys: []
147+
│ │ ├── other filters: []
148+
│ │ ├── Scan
149+
│ │ │ ├── table: default.store_returns
150+
│ │ │ ├── filters: []
151+
│ │ │ ├── order by: []
152+
│ │ │ └── limit: NONE
153+
│ │ └── Scan
154+
│ │ ├── table: default.date_dim
155+
│ │ ├── filters: []
156+
│ │ ├── order by: []
157+
│ │ └── limit: NONE
158+
│ └── Scan
159+
│ ├── table: default.store
160+
│ ├── filters: []
161+
│ ├── order by: []
162+
│ └── limit: NONE
163+
└── Scan
164+
├── table: default.customer
165+
├── filters: []
166+
├── order by: []
167+
└── limit: NONE
168+
169+
optimized_plan: |
170+
Limit
171+
├── limit: [100]
172+
├── offset: [0]
173+
└── Sort
174+
├── sort keys: [default.customer.c_customer_id (#79) ASC]
175+
├── limit: [100]
176+
└── EvalScalar
177+
├── scalars: [customer.c_customer_id (#79) AS (#79), ctr1.ctr_total_return (#48) AS (#154), scalar_subquery_147 (#147) AS (#155), store.s_store_sk (#49) AS (#156), ctr1.ctr_store_sk (#7) AS (#157), store.s_state (#73) AS (#158), ctr1.ctr_customer_sk (#3) AS (#159), customer.c_customer_sk (#78) AS (#160)]
178+
└── Join(Inner)
179+
├── build keys: [sr_store_sk (#103)]
180+
├── probe keys: [sr_store_sk (#7)]
181+
├── other filters: [gt(ctr1.ctr_total_return (#48), scalar_subquery_147 (#147))]
182+
├── Join(Inner)
183+
│ ├── build keys: [customer.c_customer_sk (#78)]
184+
│ ├── probe keys: [ctr1.ctr_customer_sk (#3)]
185+
│ ├── other filters: []
186+
│ ├── Aggregate(Final)
187+
│ │ ├── group items: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)]
188+
│ │ ├── aggregate functions: [Sum(sr_return_amt) (#48)]
189+
│ │ └── Aggregate(Partial)
190+
│ │ ├── group items: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)]
191+
│ │ ├── aggregate functions: [Sum(sr_return_amt) (#48)]
192+
│ │ └── EvalScalar
193+
│ │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), store_returns.sr_return_amt (#11) AS (#11), store_returns.sr_returned_date_sk (#0) AS (#148), date_dim.d_date_sk (#20) AS (#149), date_dim.d_year (#26) AS (#150)]
194+
│ │ └── Join(Inner)
195+
│ │ ├── build keys: [date_dim.d_date_sk (#20)]
196+
│ │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)]
197+
│ │ ├── other filters: []
198+
│ │ ├── Scan
199+
│ │ │ ├── table: default.store_returns
200+
│ │ │ ├── filters: []
201+
│ │ │ ├── order by: []
202+
│ │ │ └── limit: NONE
203+
│ │ └── Scan
204+
│ │ ├── table: default.date_dim
205+
│ │ ├── filters: [eq(date_dim.d_year (#26), 2001)]
206+
│ │ ├── order by: []
207+
│ │ └── limit: NONE
208+
│ └── Scan
209+
│ ├── table: default.customer
210+
│ ├── filters: []
211+
│ ├── order by: []
212+
│ └── limit: NONE
213+
└── Join(Inner)
214+
├── build keys: [sr_store_sk (#103)]
215+
├── probe keys: [store.s_store_sk (#49)]
216+
├── other filters: []
217+
├── Scan
218+
│ ├── table: default.store
219+
│ ├── filters: [eq(store.s_state (#73), 'TN')]
220+
│ ├── order by: []
221+
│ └── limit: NONE
222+
└── EvalScalar
223+
├── scalars: [sr_store_sk (#103) AS (#103), multiply(divide(sum(ctr_total_return) (#145), if(eq(count(ctr_total_return) (#146), 0), 1, count(ctr_total_return) (#146))), 1.2) AS (#147)]
224+
└── Aggregate(Final)
225+
├── group items: [subquery_103 (#103)]
226+
├── aggregate functions: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146)]
227+
└── Aggregate(Partial)
228+
├── group items: [subquery_103 (#103)]
229+
├── aggregate functions: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146)]
230+
└── Aggregate(Final)
231+
├── group items: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)]
232+
├── aggregate functions: [Sum(sr_return_amt) (#144)]
233+
└── Aggregate(Partial)
234+
├── group items: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)]
235+
├── aggregate functions: [Sum(sr_return_amt) (#144)]
236+
└── EvalScalar
237+
├── scalars: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103), store_returns.sr_return_amt (#107) AS (#107), store_returns.sr_returned_date_sk (#96) AS (#151), date_dim.d_date_sk (#116) AS (#152), date_dim.d_year (#122) AS (#153)]
238+
└── Join(Inner)
239+
├── build keys: [date_dim.d_date_sk (#116)]
240+
├── probe keys: [store_returns.sr_returned_date_sk (#96)]
241+
├── other filters: []
242+
├── Scan
243+
│ ├── table: default.store_returns
244+
│ ├── filters: []
245+
│ ├── order by: []
246+
│ └── limit: NONE
247+
└── Scan
248+
├── table: default.date_dim
249+
├── filters: [eq(date_dim.d_year (#122), 2001)]
250+
├── order by: []
251+
└── limit: NONE
252+
253+
# Converted from tabular format to tree format based on parent-child relationships
254+
snow_plan: |
255+
Result
256+
└── SortWithLimit [sortKey: (CUSTOMER.C_CUSTOMER_ID ASC NULLS LAST), rowCount: 100]
257+
└── InnerJoin [joinKey: (CTR1.CTR_CUSTOMER_SK = CUSTOMER.C_CUSTOMER_SK)]
258+
├── InnerJoin [joinKey: (STORE.S_STORE_SK = CTR1.CTR_STORE_SK)]
259+
│ ├── Filter [STORE.S_STATE = 'TN']
260+
│ │ └── TableScan [SNOWFLAKE_SAMPLE_DATA.TPCDS_SF10TCL.STORE] [S_STORE_SK, S_STATE] [partitions: 1/1, bytes: 135,680]
261+
│ └── InnerJoin [joinKey: (CTR2.CTR_STORE_SK = CTR1.CTR_STORE_SK), joinFilter: (CTR1.CTR_TOTAL_RETURN) > (((SUM(CTR2.CTR_TOTAL_RETURN)) / (NVL(COUNT(CTR2.CTR_TOTAL_RETURN), 0))) * 1.2)]
262+
│ ├── Filter [(SUM(CTR2.CTR_TOTAL_RETURN) IS NOT NULL) AND (COUNT(CTR2.CTR_TOTAL_RETURN) IS NOT NULL)]
263+
│ │ └── Aggregate [aggExprs: [SUM(CTR2.CTR_TOTAL_RETURN), COUNT(CTR2.CTR_TOTAL_RETURN)], groupKeys: [CTR2.CTR_STORE_SK]]
264+
│ │ └── JoinFilter [joinKey: (STORE.S_STORE_SK = CTR1.CTR_STORE_SK)]
265+
│ │ └── WithReference [CTR2]
266+
│ │ └── Filter [STORE_RETURNS.SR_STORE_SK IS NOT NULL]
267+
│ │ └── WithClause [CUSTOMER_TOTAL_RETURN]
268+
│ │ └── Aggregate [aggExprs: [SUM(SUM(SUM(STORE_RETURNS.SR_RETURN_AMT)))], groupKeys: [STORE_RETURNS.SR_CUSTOMER_SK, STORE_RETURNS.SR_STORE_SK]]
269+
│ │ └── Aggregate [aggExprs: [SUM(SUM(STORE_RETURNS.SR_RETURN_AMT))], groupKeys: [STORE_RETURNS.SR_CUSTOMER_SK, STORE_RETURNS.SR_STORE_SK]]
270+
│ │ └── InnerJoin [joinKey: (DATE_DIM.D_DATE_SK = STORE_RETURNS.SR_RETURNED_DATE_SK)]
271+
│ │ ├── Filter [DATE_DIM.D_YEAR = 2001]
272+
│ │ │ └── TableScan [SNOWFLAKE_SAMPLE_DATA.TPCDS_SF10TCL.DATE_DIM] [D_DATE_SK, D_YEAR] [partitions: 1/1, bytes: 2,138,624]
273+
│ │ └── Aggregate [aggExprs: [SUM(STORE_RETURNS.SR_RETURN_AMT)], groupKeys: [STORE_RETURNS.SR_CUSTOMER_SK, STORE_RETURNS.SR_STORE_SK, STORE_RETURNS.SR_RETURNED_DATE_SK]]
274+
│ │ └── Filter [STORE_RETURNS.SR_RETURNED_DATE_SK IS NOT NULL]
275+
│ │ └── JoinFilter [joinKey: (DATE_DIM.D_DATE_SK = STORE_RETURNS.SR_RETURNED_DATE_SK)]
276+
│ │ └── TableScan [SNOWFLAKE_SAMPLE_DATA.TPCDS_SF10TCL.STORE_RETURNS] [SR_RETURNED_DATE_SK, SR_CUSTOMER_SK, SR_STORE_SK, SR_RETURN_AMT] [partitions: 7070/7070, bytes: 124,763,446,272]
277+
│ └── JoinFilter [joinKey: (STORE.S_STORE_SK = CTR1.CTR_STORE_SK)]
278+
│ └── WithReference [CTR1]
279+
│ └── Filter [(STORE_RETURNS.SR_STORE_SK IS NOT NULL) AND (STORE_RETURNS.SR_CUSTOMER_SK IS NOT NULL)]
280+
│ └── WithClause [CUSTOMER_TOTAL_RETURN] (reference to earlier WITH clause)
281+
└── JoinFilter [joinKey: (CTR1.CTR_CUSTOMER_SK = CUSTOMER.C_CUSTOMER_SK)]
282+
└── TableScan [SNOWFLAKE_SAMPLE_DATA.TPCDS_SF10TCL.CUSTOMER] [C_CUSTOMER_SK, C_CUSTOMER_ID] [partitions: 261/261, bytes: 2,328,538,624]

src/query/service/tests/it/sql/planner/optimizer/tpcds_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ async fn test_tpcds_optimizer() -> Result<()> {
286286
let optimized_plan = optimize_plan(&ctx, raw_plan).await?;
287287
let optimized_plan_str = optimized_plan.format_indent(false)?;
288288
println!("Optimized plan:\n{}", optimized_plan_str);
289-
289+
290290
// Verify the optimized plan matches expected output
291291
let actual_optimized = optimized_plan_str.trim();
292292
let expected_optimized = test.expected_plan.trim();

0 commit comments

Comments
 (0)