|
| 1 | +name: "Q1" |
| 2 | +description: "TPC-DS Query 1 optimizer test" |
| 3 | + |
| 4 | +sql: | |
| 5 | + WITH customer_total_return |
| 6 | + AS (SELECT sr_customer_sk AS ctr_customer_sk, |
| 7 | + sr_store_sk AS ctr_store_sk, |
| 8 | + Sum(sr_return_amt) AS ctr_total_return |
| 9 | + FROM store_returns, |
| 10 | + date_dim |
| 11 | + WHERE sr_returned_date_sk = d_date_sk |
| 12 | + AND d_year = 2001 |
| 13 | + GROUP BY sr_customer_sk, |
| 14 | + sr_store_sk) |
| 15 | + SELECT c_customer_id |
| 16 | + FROM customer_total_return ctr1, |
| 17 | + store, |
| 18 | + customer |
| 19 | + WHERE ctr1.ctr_total_return > (SELECT Avg(ctr_total_return) * 1.2 |
| 20 | + FROM customer_total_return ctr2 |
| 21 | + WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) |
| 22 | + AND s_store_sk = ctr1.ctr_store_sk |
| 23 | + AND s_state = 'TN' |
| 24 | + AND ctr1.ctr_customer_sk = c_customer_sk |
| 25 | + ORDER BY c_customer_id |
| 26 | + LIMIT 100 |
| 27 | +
|
| 28 | +# Table statistics derived from snow_plan's TableScan information |
| 29 | +table_statistics: |
| 30 | + date_dim: |
| 31 | + num_rows: 73049 # Estimated based on typical date dimension cardinality |
| 32 | + data_size: 2138624 # Directly from snow_plan: "bytes: 2,138,624" |
| 33 | + number_of_segments: 1 # From snow_plan: "partitions: 1/1" |
| 34 | + |
| 35 | + store_returns: |
| 36 | + num_rows: 287000000 # Estimated based on data size and typical row size |
| 37 | + data_size: 124763446272 # Directly from snow_plan: "bytes: 124,763,446,272" |
| 38 | + number_of_segments: 7070 # From snow_plan: "partitions: 7070/7070" |
| 39 | + |
| 40 | + store: |
| 41 | + num_rows: 1002 # Estimated based on typical store dimension cardinality |
| 42 | + data_size: 135680 # Directly from snow_plan: "bytes: 135,680" |
| 43 | + number_of_segments: 1 # From snow_plan: "partitions: 1/1" |
| 44 | + |
| 45 | + customer: |
| 46 | + num_rows: 12000000 # Estimated based on typical customer dimension size |
| 47 | + data_size: 2328538624 # Directly from snow_plan: "bytes: 2,328,538,624" |
| 48 | + number_of_segments: 261 # From snow_plan: "partitions: 261/261" |
| 49 | + |
| 50 | +# Column statistics derived from query predicates and typical TPC-DS data distributions |
| 51 | +column_statistics: |
| 52 | + # Date dimension columns used in the query |
| 53 | + date_dim.d_year: |
| 54 | + min: 1990 # Typical range for TPC-DS |
| 55 | + max: 2010 # Typical range for TPC-DS |
| 56 | + ndv: 21 # Based on min/max range (2010-1990+1) |
| 57 | + null_count: 0 # Primary dimension columns typically don't have nulls |
| 58 | + |
| 59 | + date_dim.d_date_sk: |
| 60 | + min: 1 # Typical starting value for surrogate key |
| 61 | + max: 73049 # Based on table row count |
| 62 | + ndv: 73049 # Primary key, so NDV equals row count |
| 63 | + null_count: 0 # Primary key cannot be null |
| 64 | + |
| 65 | + # Store returns columns used in the query |
| 66 | + store_returns.sr_returned_date_sk: |
| 67 | + min: 1 # Matches date_dim.d_date_sk min |
| 68 | + max: 73049 # Matches date_dim.d_date_sk max |
| 69 | + ndv: 73049 # Foreign key to date_dim |
| 70 | + null_count: 287998 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_RETURNED_DATE_SK IS NOT NULL" |
| 71 | + |
| 72 | + store_returns.sr_customer_sk: |
| 73 | + min: 1 # Typical starting value for surrogate key |
| 74 | + max: 12000000 # Matches customer.c_customer_sk max |
| 75 | + ndv: 11000000 # Estimated as slightly less than customer table cardinality |
| 76 | + null_count: 143500 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_CUSTOMER_SK IS NOT NULL" |
| 77 | + |
| 78 | + store_returns.sr_store_sk: |
| 79 | + min: 1 # Typical starting value for surrogate key |
| 80 | + max: 1002 # Matches store.s_store_sk max |
| 81 | + ndv: 1002 # Foreign key to store table |
| 82 | + null_count: 143500 # Inferred from filter in snow_plan: "STORE_RETURNS.SR_STORE_SK IS NOT NULL" |
| 83 | + |
| 84 | + store_returns.sr_return_amt: |
| 85 | + min: 0.01 # Minimum reasonable return amount |
| 86 | + max: 10000.00 # Maximum reasonable return amount |
| 87 | + ndv: 100000 # Estimated based on typical distribution |
| 88 | + null_count: 0 # Return amount is typically not null |
| 89 | + |
| 90 | + # Store columns used in the query |
| 91 | + store.s_store_sk: |
| 92 | + min: 1 # Typical starting value for surrogate key |
| 93 | + max: 1002 # Based on estimated row count |
| 94 | + ndv: 1002 # Primary key, so NDV equals row count |
| 95 | + null_count: 0 # Primary key cannot be null |
| 96 | + |
| 97 | + store.s_state: |
| 98 | + min: "AK" # Alaska (alphabetically first US state) |
| 99 | + max: "WY" # Wyoming (alphabetically last US state) |
| 100 | + ndv: 50 # Number of US states |
| 101 | + null_count: 0 # State is typically not null |
| 102 | + |
| 103 | + # Customer columns used in the query |
| 104 | + customer.c_customer_sk: |
| 105 | + min: 1 # Typical starting value for surrogate key |
| 106 | + max: 12000000 # Based on estimated row count |
| 107 | + ndv: 12000000 # Primary key, so NDV equals row count |
| 108 | + null_count: 0 # Primary key cannot be null |
| 109 | + |
| 110 | + customer.c_customer_id: |
| 111 | + min: "AAAAAAAAAAAAAA" # Lexicographically smallest possible customer ID |
| 112 | + max: "ZZZZZZZZZZZZZZ" # Lexicographically largest possible customer ID |
| 113 | + ndv: 12000000 # Same as c_customer_sk (1:1 relationship) |
| 114 | + null_count: 0 # Customer ID is typically not null |
| 115 | + |
| 116 | +raw_plan: | |
| 117 | + Limit |
| 118 | + ├── limit: [100] |
| 119 | + ├── offset: [0] |
| 120 | + └── Sort |
| 121 | + ├── sort keys: [default.customer.c_customer_id (#79) ASC] |
| 122 | + ├── limit: [NONE] |
| 123 | + └── EvalScalar |
| 124 | + ├── scalars: [customer.c_customer_id (#79) AS (#79)] |
| 125 | + └── Filter |
| 126 | + ├── filters: [gt(ctr1.ctr_total_return (#48), SUBQUERY), eq(store.s_store_sk (#49), ctr1.ctr_store_sk (#7)), eq(store.s_state (#73), 'TN'), eq(ctr1.ctr_customer_sk (#3), customer.c_customer_sk (#78))] |
| 127 | + └── Join(Cross) |
| 128 | + ├── build keys: [] |
| 129 | + ├── probe keys: [] |
| 130 | + ├── other filters: [] |
| 131 | + ├── Join(Cross) |
| 132 | + │ ├── build keys: [] |
| 133 | + │ ├── probe keys: [] |
| 134 | + │ ├── other filters: [] |
| 135 | + │ ├── EvalScalar |
| 136 | + │ │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), Sum(sr_return_amt) (#48) AS (#48)] |
| 137 | + │ │ └── Aggregate(Initial) |
| 138 | + │ │ ├── group items: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] |
| 139 | + │ │ ├── aggregate functions: [Sum(sr_return_amt) (#48)] |
| 140 | + │ │ └── EvalScalar |
| 141 | + │ │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), store_returns.sr_return_amt (#11) AS (#11)] |
| 142 | + │ │ └── Filter |
| 143 | + │ │ ├── filters: [eq(store_returns.sr_returned_date_sk (#0), date_dim.d_date_sk (#20)), eq(date_dim.d_year (#26), 2001)] |
| 144 | + │ │ └── Join(Cross) |
| 145 | + │ │ ├── build keys: [] |
| 146 | + │ │ ├── probe keys: [] |
| 147 | + │ │ ├── other filters: [] |
| 148 | + │ │ ├── Scan |
| 149 | + │ │ │ ├── table: default.store_returns |
| 150 | + │ │ │ ├── filters: [] |
| 151 | + │ │ │ ├── order by: [] |
| 152 | + │ │ │ └── limit: NONE |
| 153 | + │ │ └── Scan |
| 154 | + │ │ ├── table: default.date_dim |
| 155 | + │ │ ├── filters: [] |
| 156 | + │ │ ├── order by: [] |
| 157 | + │ │ └── limit: NONE |
| 158 | + │ └── Scan |
| 159 | + │ ├── table: default.store |
| 160 | + │ ├── filters: [] |
| 161 | + │ ├── order by: [] |
| 162 | + │ └── limit: NONE |
| 163 | + └── Scan |
| 164 | + ├── table: default.customer |
| 165 | + ├── filters: [] |
| 166 | + ├── order by: [] |
| 167 | + └── limit: NONE |
| 168 | +
|
| 169 | +optimized_plan: | |
| 170 | + Limit |
| 171 | + ├── limit: [100] |
| 172 | + ├── offset: [0] |
| 173 | + └── Sort |
| 174 | + ├── sort keys: [default.customer.c_customer_id (#79) ASC] |
| 175 | + ├── limit: [100] |
| 176 | + └── EvalScalar |
| 177 | + ├── scalars: [customer.c_customer_id (#79) AS (#79), ctr1.ctr_total_return (#48) AS (#154), scalar_subquery_147 (#147) AS (#155), store.s_store_sk (#49) AS (#156), ctr1.ctr_store_sk (#7) AS (#157), store.s_state (#73) AS (#158), ctr1.ctr_customer_sk (#3) AS (#159), customer.c_customer_sk (#78) AS (#160)] |
| 178 | + └── Join(Inner) |
| 179 | + ├── build keys: [sr_store_sk (#103)] |
| 180 | + ├── probe keys: [sr_store_sk (#7)] |
| 181 | + ├── other filters: [gt(ctr1.ctr_total_return (#48), scalar_subquery_147 (#147))] |
| 182 | + ├── Join(Inner) |
| 183 | + │ ├── build keys: [customer.c_customer_sk (#78)] |
| 184 | + │ ├── probe keys: [ctr1.ctr_customer_sk (#3)] |
| 185 | + │ ├── other filters: [] |
| 186 | + │ ├── Aggregate(Final) |
| 187 | + │ │ ├── group items: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] |
| 188 | + │ │ ├── aggregate functions: [Sum(sr_return_amt) (#48)] |
| 189 | + │ │ └── Aggregate(Partial) |
| 190 | + │ │ ├── group items: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] |
| 191 | + │ │ ├── aggregate functions: [Sum(sr_return_amt) (#48)] |
| 192 | + │ │ └── EvalScalar |
| 193 | + │ │ ├── scalars: [store_returns.sr_customer_sk (#3) AS (#3), store_returns.sr_store_sk (#7) AS (#7), store_returns.sr_return_amt (#11) AS (#11), store_returns.sr_returned_date_sk (#0) AS (#148), date_dim.d_date_sk (#20) AS (#149), date_dim.d_year (#26) AS (#150)] |
| 194 | + │ │ └── Join(Inner) |
| 195 | + │ │ ├── build keys: [date_dim.d_date_sk (#20)] |
| 196 | + │ │ ├── probe keys: [store_returns.sr_returned_date_sk (#0)] |
| 197 | + │ │ ├── other filters: [] |
| 198 | + │ │ ├── Scan |
| 199 | + │ │ │ ├── table: default.store_returns |
| 200 | + │ │ │ ├── filters: [] |
| 201 | + │ │ │ ├── order by: [] |
| 202 | + │ │ │ └── limit: NONE |
| 203 | + │ │ └── Scan |
| 204 | + │ │ ├── table: default.date_dim |
| 205 | + │ │ ├── filters: [eq(date_dim.d_year (#26), 2001)] |
| 206 | + │ │ ├── order by: [] |
| 207 | + │ │ └── limit: NONE |
| 208 | + │ └── Scan |
| 209 | + │ ├── table: default.customer |
| 210 | + │ ├── filters: [] |
| 211 | + │ ├── order by: [] |
| 212 | + │ └── limit: NONE |
| 213 | + └── Join(Inner) |
| 214 | + ├── build keys: [sr_store_sk (#103)] |
| 215 | + ├── probe keys: [store.s_store_sk (#49)] |
| 216 | + ├── other filters: [] |
| 217 | + ├── Scan |
| 218 | + │ ├── table: default.store |
| 219 | + │ ├── filters: [eq(store.s_state (#73), 'TN')] |
| 220 | + │ ├── order by: [] |
| 221 | + │ └── limit: NONE |
| 222 | + └── EvalScalar |
| 223 | + ├── scalars: [sr_store_sk (#103) AS (#103), multiply(divide(sum(ctr_total_return) (#145), if(eq(count(ctr_total_return) (#146), 0), 1, count(ctr_total_return) (#146))), 1.2) AS (#147)] |
| 224 | + └── Aggregate(Final) |
| 225 | + ├── group items: [subquery_103 (#103)] |
| 226 | + ├── aggregate functions: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146)] |
| 227 | + └── Aggregate(Partial) |
| 228 | + ├── group items: [subquery_103 (#103)] |
| 229 | + ├── aggregate functions: [sum(ctr_total_return) (#145), count(ctr_total_return) (#146)] |
| 230 | + └── Aggregate(Final) |
| 231 | + ├── group items: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] |
| 232 | + ├── aggregate functions: [Sum(sr_return_amt) (#144)] |
| 233 | + └── Aggregate(Partial) |
| 234 | + ├── group items: [store_returns.sr_customer_sk (#99), store_returns.sr_store_sk (#103)] |
| 235 | + ├── aggregate functions: [Sum(sr_return_amt) (#144)] |
| 236 | + └── EvalScalar |
| 237 | + ├── scalars: [store_returns.sr_customer_sk (#99) AS (#99), store_returns.sr_store_sk (#103) AS (#103), store_returns.sr_return_amt (#107) AS (#107), store_returns.sr_returned_date_sk (#96) AS (#151), date_dim.d_date_sk (#116) AS (#152), date_dim.d_year (#122) AS (#153)] |
| 238 | + └── Join(Inner) |
| 239 | + ├── build keys: [date_dim.d_date_sk (#116)] |
| 240 | + ├── probe keys: [store_returns.sr_returned_date_sk (#96)] |
| 241 | + ├── other filters: [] |
| 242 | + ├── Scan |
| 243 | + │ ├── table: default.store_returns |
| 244 | + │ ├── filters: [] |
| 245 | + │ ├── order by: [] |
| 246 | + │ └── limit: NONE |
| 247 | + └── Scan |
| 248 | + ├── table: default.date_dim |
| 249 | + ├── filters: [eq(date_dim.d_year (#122), 2001)] |
| 250 | + ├── order by: [] |
| 251 | + └── limit: NONE |
| 252 | +
|
| 253 | +# Converted from tabular format to tree format based on parent-child relationships |
| 254 | +snow_plan: | |
| 255 | + Result |
| 256 | + └── SortWithLimit [sortKey: (CUSTOMER.C_CUSTOMER_ID ASC NULLS LAST), rowCount: 100] |
| 257 | + └── InnerJoin [joinKey: (CTR1.CTR_CUSTOMER_SK = CUSTOMER.C_CUSTOMER_SK)] |
| 258 | + ├── InnerJoin [joinKey: (STORE.S_STORE_SK = CTR1.CTR_STORE_SK)] |
| 259 | + │ ├── Filter [STORE.S_STATE = 'TN'] |
| 260 | + │ │ └── TableScan [SNOWFLAKE_SAMPLE_DATA.TPCDS_SF10TCL.STORE] [S_STORE_SK, S_STATE] [partitions: 1/1, bytes: 135,680] |
| 261 | + │ └── InnerJoin [joinKey: (CTR2.CTR_STORE_SK = CTR1.CTR_STORE_SK), joinFilter: (CTR1.CTR_TOTAL_RETURN) > (((SUM(CTR2.CTR_TOTAL_RETURN)) / (NVL(COUNT(CTR2.CTR_TOTAL_RETURN), 0))) * 1.2)] |
| 262 | + │ ├── Filter [(SUM(CTR2.CTR_TOTAL_RETURN) IS NOT NULL) AND (COUNT(CTR2.CTR_TOTAL_RETURN) IS NOT NULL)] |
| 263 | + │ │ └── Aggregate [aggExprs: [SUM(CTR2.CTR_TOTAL_RETURN), COUNT(CTR2.CTR_TOTAL_RETURN)], groupKeys: [CTR2.CTR_STORE_SK]] |
| 264 | + │ │ └── JoinFilter [joinKey: (STORE.S_STORE_SK = CTR1.CTR_STORE_SK)] |
| 265 | + │ │ └── WithReference [CTR2] |
| 266 | + │ │ └── Filter [STORE_RETURNS.SR_STORE_SK IS NOT NULL] |
| 267 | + │ │ └── WithClause [CUSTOMER_TOTAL_RETURN] |
| 268 | + │ │ └── Aggregate [aggExprs: [SUM(SUM(SUM(STORE_RETURNS.SR_RETURN_AMT)))], groupKeys: [STORE_RETURNS.SR_CUSTOMER_SK, STORE_RETURNS.SR_STORE_SK]] |
| 269 | + │ │ └── Aggregate [aggExprs: [SUM(SUM(STORE_RETURNS.SR_RETURN_AMT))], groupKeys: [STORE_RETURNS.SR_CUSTOMER_SK, STORE_RETURNS.SR_STORE_SK]] |
| 270 | + │ │ └── InnerJoin [joinKey: (DATE_DIM.D_DATE_SK = STORE_RETURNS.SR_RETURNED_DATE_SK)] |
| 271 | + │ │ ├── Filter [DATE_DIM.D_YEAR = 2001] |
| 272 | + │ │ │ └── TableScan [SNOWFLAKE_SAMPLE_DATA.TPCDS_SF10TCL.DATE_DIM] [D_DATE_SK, D_YEAR] [partitions: 1/1, bytes: 2,138,624] |
| 273 | + │ │ └── Aggregate [aggExprs: [SUM(STORE_RETURNS.SR_RETURN_AMT)], groupKeys: [STORE_RETURNS.SR_CUSTOMER_SK, STORE_RETURNS.SR_STORE_SK, STORE_RETURNS.SR_RETURNED_DATE_SK]] |
| 274 | + │ │ └── Filter [STORE_RETURNS.SR_RETURNED_DATE_SK IS NOT NULL] |
| 275 | + │ │ └── JoinFilter [joinKey: (DATE_DIM.D_DATE_SK = STORE_RETURNS.SR_RETURNED_DATE_SK)] |
| 276 | + │ │ └── TableScan [SNOWFLAKE_SAMPLE_DATA.TPCDS_SF10TCL.STORE_RETURNS] [SR_RETURNED_DATE_SK, SR_CUSTOMER_SK, SR_STORE_SK, SR_RETURN_AMT] [partitions: 7070/7070, bytes: 124,763,446,272] |
| 277 | + │ └── JoinFilter [joinKey: (STORE.S_STORE_SK = CTR1.CTR_STORE_SK)] |
| 278 | + │ └── WithReference [CTR1] |
| 279 | + │ └── Filter [(STORE_RETURNS.SR_STORE_SK IS NOT NULL) AND (STORE_RETURNS.SR_CUSTOMER_SK IS NOT NULL)] |
| 280 | + │ └── WithClause [CUSTOMER_TOTAL_RETURN] (reference to earlier WITH clause) |
| 281 | + └── JoinFilter [joinKey: (CTR1.CTR_CUSTOMER_SK = CUSTOMER.C_CUSTOMER_SK)] |
| 282 | + └── TableScan [SNOWFLAKE_SAMPLE_DATA.TPCDS_SF10TCL.CUSTOMER] [C_CUSTOMER_SK, C_CUSTOMER_ID] [partitions: 261/261, bytes: 2,328,538,624] |
0 commit comments