Commit 03545ce
[SPARK-24638][SQL] StringStartsWith support push down
## What changes were proposed in this pull request?
`StringStartsWith` support push down. About 50% savings in compute time.
## How was this patch tested?
unit tests, manual tests and performance test:
```scala
cat <<EOF > SPARK-24638.scala
def benchmark(func: () => Unit): Long = {
val start = System.currentTimeMillis()
for(i <- 0 until 100) { func() }
val end = System.currentTimeMillis()
end - start
}
val path = "/tmp/spark/parquet/string/"
spark.range(10000000).selectExpr("concat(id, 'str', id) as id").coalesce(1).write.mode("overwrite").option("parquet.block.size", 1048576).parquet(path)
val df = spark.read.parquet(path)
spark.sql("set spark.sql.parquet.filterPushdown.string.startsWith=true")
val pushdownEnable = benchmark(() => df.where("id like '999998%'").count())
spark.sql("set spark.sql.parquet.filterPushdown.string.startsWith=false")
val pushdownDisable = benchmark(() => df.where("id like '999998%'").count())
val improvements = pushdownDisable - pushdownEnable
println(s"improvements: $improvements")
EOF
bin/spark-shell -i SPARK-24638.scala
```
result:
```scala
Loading SPARK-24638.scala...
benchmark: (func: () => Unit)Long
path: String = /tmp/spark/parquet/string/
df: org.apache.spark.sql.DataFrame = [id: string]
res1: org.apache.spark.sql.DataFrame = [key: string, value: string]
pushdownEnable: Long = 11608
res2: org.apache.spark.sql.DataFrame = [key: string, value: string]
pushdownDisable: Long = 31981
improvements: Long = 20373
```
Author: Yuming Wang <[email protected]>
Closes #21623 from wangyum/SPARK-24638.1 parent f71e8da commit 03545ce
File tree
4 files changed
+130
-4
lines changed- sql
- catalyst/src/main/scala/org/apache/spark/sql/internal
- core/src
- main/scala/org/apache/spark/sql/execution/datasources/parquet
- test/scala/org/apache/spark/sql/execution/datasources/parquet
4 files changed
+130
-4
lines changedLines changed: 11 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
378 | 378 | | |
379 | 379 | | |
380 | 380 | | |
| 381 | + | |
| 382 | + | |
| 383 | + | |
| 384 | + | |
| 385 | + | |
| 386 | + | |
| 387 | + | |
| 388 | + | |
381 | 389 | | |
382 | 390 | | |
383 | 391 | | |
| |||
1459 | 1467 | | |
1460 | 1468 | | |
1461 | 1469 | | |
| 1470 | + | |
| 1471 | + | |
| 1472 | + | |
1462 | 1473 | | |
1463 | 1474 | | |
1464 | 1475 | | |
| |||
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
Lines changed: 3 additions & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
348 | 348 | | |
349 | 349 | | |
350 | 350 | | |
| 351 | + | |
351 | 352 | | |
352 | 353 | | |
353 | 354 | | |
| |||
358 | 359 | | |
359 | 360 | | |
360 | 361 | | |
361 | | - | |
| 362 | + | |
| 363 | + | |
362 | 364 | | |
363 | 365 | | |
364 | 366 | | |
| |||
Lines changed: 34 additions & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
22 | 22 | | |
23 | 23 | | |
24 | 24 | | |
| 25 | + | |
25 | 26 | | |
26 | 27 | | |
27 | 28 | | |
28 | 29 | | |
29 | 30 | | |
| 31 | + | |
30 | 32 | | |
31 | 33 | | |
32 | 34 | | |
33 | 35 | | |
34 | | - | |
| 36 | + | |
35 | 37 | | |
36 | 38 | | |
37 | 39 | | |
| |||
270 | 272 | | |
271 | 273 | | |
272 | 274 | | |
| 275 | + | |
| 276 | + | |
| 277 | + | |
| 278 | + | |
| 279 | + | |
| 280 | + | |
| 281 | + | |
| 282 | + | |
| 283 | + | |
| 284 | + | |
| 285 | + | |
| 286 | + | |
| 287 | + | |
| 288 | + | |
| 289 | + | |
| 290 | + | |
| 291 | + | |
| 292 | + | |
| 293 | + | |
| 294 | + | |
| 295 | + | |
| 296 | + | |
| 297 | + | |
| 298 | + | |
| 299 | + | |
| 300 | + | |
| 301 | + | |
| 302 | + | |
| 303 | + | |
| 304 | + | |
| 305 | + | |
273 | 306 | | |
274 | 307 | | |
275 | 308 | | |
| |||
Lines changed: 82 additions & 2 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
55 | 55 | | |
56 | 56 | | |
57 | 57 | | |
58 | | - | |
| 58 | + | |
| 59 | + | |
59 | 60 | | |
60 | 61 | | |
61 | 62 | | |
| |||
82 | 83 | | |
83 | 84 | | |
84 | 85 | | |
| 86 | + | |
85 | 87 | | |
86 | 88 | | |
87 | 89 | | |
| |||
140 | 142 | | |
141 | 143 | | |
142 | 144 | | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
| 163 | + | |
| 164 | + | |
| 165 | + | |
| 166 | + | |
| 167 | + | |
| 168 | + | |
| 169 | + | |
143 | 170 | | |
144 | 171 | | |
145 | 172 | | |
| |||
574 | 601 | | |
575 | 602 | | |
576 | 603 | | |
577 | | - | |
578 | 604 | | |
579 | 605 | | |
580 | 606 | | |
| |||
660 | 686 | | |
661 | 687 | | |
662 | 688 | | |
| 689 | + | |
| 690 | + | |
| 691 | + | |
| 692 | + | |
| 693 | + | |
| 694 | + | |
| 695 | + | |
| 696 | + | |
| 697 | + | |
| 698 | + | |
| 699 | + | |
| 700 | + | |
| 701 | + | |
| 702 | + | |
| 703 | + | |
| 704 | + | |
| 705 | + | |
| 706 | + | |
| 707 | + | |
| 708 | + | |
| 709 | + | |
| 710 | + | |
| 711 | + | |
| 712 | + | |
| 713 | + | |
| 714 | + | |
| 715 | + | |
| 716 | + | |
| 717 | + | |
| 718 | + | |
| 719 | + | |
| 720 | + | |
| 721 | + | |
| 722 | + | |
| 723 | + | |
| 724 | + | |
| 725 | + | |
| 726 | + | |
| 727 | + | |
| 728 | + | |
| 729 | + | |
| 730 | + | |
| 731 | + | |
| 732 | + | |
| 733 | + | |
| 734 | + | |
| 735 | + | |
| 736 | + | |
| 737 | + | |
| 738 | + | |
| 739 | + | |
| 740 | + | |
| 741 | + | |
| 742 | + | |
663 | 743 | | |
664 | 744 | | |
665 | 745 | | |
| |||
0 commit comments