Commit 2b8d89e
[SPARK-2523] [SQL] Hadoop table scan bug fixing
In HiveTableScan.scala, ObjectInspector was created for all of the partition based records, which probably causes ClassCastException if the object inspector is not identical among table & partitions.
This is the follow up with:
#1408
#1390
I've run a micro benchmark in my local with 15000000 records totally, and got the result as below:
With This Patch | Partition-Based Table | Non-Partition-Based Table
------------ | ------------- | -------------
No | 1927 ms | 1885 ms
Yes | 1541 ms | 1524 ms
It showed this patch will also improve the performance.
PS: the benchmark code is also attached. (thanks liancheng )
```
package org.apache.spark.sql.hive
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql._
object HiveTableScanPrepare extends App {
case class Record(key: String, value: String)
val sparkContext = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName(getClass.getSimpleName.stripSuffix("$")))
val hiveContext = new LocalHiveContext(sparkContext)
val rdd = sparkContext.parallelize((1 to 3000000).map(i => Record(s"$i", s"val_$i")))
import hiveContext._
hql("SHOW TABLES")
hql("DROP TABLE if exists part_scan_test")
hql("DROP TABLE if exists scan_test")
hql("DROP TABLE if exists records")
rdd.registerAsTable("records")
hql("""CREATE TABLE part_scan_test (key STRING, value STRING) PARTITIONED BY (part1 string, part2 STRING)
| ROW FORMAT SERDE
| 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
| STORED AS RCFILE
""".stripMargin)
hql("""CREATE TABLE scan_test (key STRING, value STRING)
| ROW FORMAT SERDE
| 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'
| STORED AS RCFILE
""".stripMargin)
for (part1 <- 2000 until 2001) {
for (part2 <- 1 to 5) {
hql(s"""from records
| insert into table part_scan_test PARTITION (part1='$part1', part2='2010-01-$part2')
| select key, value
""".stripMargin)
hql(s"""from records
| insert into table scan_test select key, value
""".stripMargin)
}
}
}
object HiveTableScanTest extends App {
val sparkContext = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName(getClass.getSimpleName.stripSuffix("$")))
val hiveContext = new LocalHiveContext(sparkContext)
import hiveContext._
hql("SHOW TABLES")
val part_scan_test = hql("select key, value from part_scan_test")
val scan_test = hql("select key, value from scan_test")
val r_part_scan_test = (0 to 5).map(i => benchmark(part_scan_test))
val r_scan_test = (0 to 5).map(i => benchmark(scan_test))
println("Scanning Partition-Based Table")
r_part_scan_test.foreach(printResult)
println("Scanning Non-Partition-Based Table")
r_scan_test.foreach(printResult)
def printResult(result: (Long, Long)) {
println(s"Duration: ${result._1} ms Result: ${result._2}")
}
def benchmark(srdd: SchemaRDD) = {
val begin = System.currentTimeMillis()
val result = srdd.count()
val end = System.currentTimeMillis()
((end - begin), result)
}
}
```
Author: Cheng Hao <[email protected]>
Closes #1439 from chenghao-intel/hadoop_table_scan and squashes the following commits:
888968f [Cheng Hao] Fix issues in code style
27540ba [Cheng Hao] Fix the TableScan Bug while partition serde differs
40a24a7 [Cheng Hao] Add Unit Test1 parent a7d145e commit 2b8d89e
File tree
4 files changed
+138
-115
lines changed- sql/hive/src
- main/scala/org/apache/spark/sql/hive
- execution
- test
- resources/golden
- scala/org/apache/spark/sql/hive/execution
4 files changed
+138
-115
lines changedLines changed: 81 additions & 32 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
24 | 24 | | |
25 | 25 | | |
26 | 26 | | |
| 27 | + | |
| 28 | + | |
27 | 29 | | |
28 | 30 | | |
29 | 31 | | |
30 | 32 | | |
31 | 33 | | |
32 | 34 | | |
33 | 35 | | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
34 | 39 | | |
35 | 40 | | |
36 | 41 | | |
37 | 42 | | |
38 | | - | |
| 43 | + | |
39 | 44 | | |
40 | | - | |
| 45 | + | |
41 | 46 | | |
42 | 47 | | |
43 | 48 | | |
| |||
46 | 51 | | |
47 | 52 | | |
48 | 53 | | |
49 | | - | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
50 | 58 | | |
51 | 59 | | |
52 | 60 | | |
| |||
63 | 71 | | |
64 | 72 | | |
65 | 73 | | |
66 | | - | |
| 74 | + | |
67 | 75 | | |
68 | 76 | | |
69 | | - | |
| 77 | + | |
70 | 78 | | |
71 | 79 | | |
72 | 80 | | |
| |||
81 | 89 | | |
82 | 90 | | |
83 | 91 | | |
84 | | - | |
| 92 | + | |
85 | 93 | | |
86 | 94 | | |
87 | 95 | | |
88 | 96 | | |
89 | 97 | | |
90 | 98 | | |
91 | | - | |
| 99 | + | |
92 | 100 | | |
93 | 101 | | |
94 | 102 | | |
| |||
99 | 107 | | |
100 | 108 | | |
101 | 109 | | |
| 110 | + | |
| 111 | + | |
102 | 112 | | |
103 | 113 | | |
104 | 114 | | |
105 | 115 | | |
106 | 116 | | |
107 | | - | |
108 | | - | |
109 | | - | |
110 | | - | |
111 | | - | |
112 | | - | |
| 117 | + | |
113 | 118 | | |
114 | 119 | | |
115 | 120 | | |
116 | 121 | | |
117 | 122 | | |
118 | | - | |
| 123 | + | |
119 | 124 | | |
120 | 125 | | |
121 | 126 | | |
| |||
132 | 137 | | |
133 | 138 | | |
134 | 139 | | |
135 | | - | |
136 | | - | |
137 | | - | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
138 | 143 | | |
139 | 144 | | |
140 | 145 | | |
| |||
156 | 161 | | |
157 | 162 | | |
158 | 163 | | |
159 | | - | |
| 164 | + | |
160 | 165 | | |
161 | 166 | | |
| 167 | + | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
| 171 | + | |
| 172 | + | |
| 173 | + | |
| 174 | + | |
| 175 | + | |
| 176 | + | |
| 177 | + | |
| 178 | + | |
| 179 | + | |
| 180 | + | |
| 181 | + | |
| 182 | + | |
| 183 | + | |
| 184 | + | |
162 | 185 | | |
163 | 186 | | |
164 | 187 | | |
165 | 188 | | |
166 | | - | |
167 | | - | |
168 | | - | |
169 | | - | |
170 | | - | |
171 | 189 | | |
172 | 190 | | |
173 | 191 | | |
174 | | - | |
175 | | - | |
176 | | - | |
177 | | - | |
178 | | - | |
179 | | - | |
| 192 | + | |
| 193 | + | |
180 | 194 | | |
181 | 195 | | |
182 | 196 | | |
183 | 197 | | |
184 | 198 | | |
185 | | - | |
| 199 | + | |
186 | 200 | | |
187 | 201 | | |
188 | 202 | | |
| |||
225 | 239 | | |
226 | 240 | | |
227 | 241 | | |
228 | | - | |
229 | 242 | | |
230 | 243 | | |
231 | | - | |
| 244 | + | |
232 | 245 | | |
233 | 246 | | |
234 | 247 | | |
| |||
241 | 254 | | |
242 | 255 | | |
243 | 256 | | |
| 257 | + | |
| 258 | + | |
| 259 | + | |
| 260 | + | |
| 261 | + | |
| 262 | + | |
| 263 | + | |
| 264 | + | |
| 265 | + | |
| 266 | + | |
| 267 | + | |
| 268 | + | |
| 269 | + | |
| 270 | + | |
| 271 | + | |
| 272 | + | |
| 273 | + | |
| 274 | + | |
| 275 | + | |
| 276 | + | |
| 277 | + | |
| 278 | + | |
| 279 | + | |
| 280 | + | |
| 281 | + | |
| 282 | + | |
| 283 | + | |
| 284 | + | |
| 285 | + | |
| 286 | + | |
| 287 | + | |
| 288 | + | |
| 289 | + | |
| 290 | + | |
| 291 | + | |
| 292 | + | |
244 | 293 | | |
Lines changed: 7 additions & 83 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
34 | 34 | | |
35 | 35 | | |
36 | 36 | | |
37 | | - | |
38 | 37 | | |
39 | 38 | | |
40 | 39 | | |
| |||
50 | 49 | | |
51 | 50 | | |
52 | 51 | | |
53 | | - | |
54 | | - | |
| 52 | + | |
55 | 53 | | |
56 | 54 | | |
57 | 55 | | |
| |||
67 | 65 | | |
68 | 66 | | |
69 | 67 | | |
70 | | - | |
71 | | - | |
72 | | - | |
73 | | - | |
74 | | - | |
75 | | - | |
76 | | - | |
77 | | - | |
78 | | - | |
79 | | - | |
80 | | - | |
81 | | - | |
82 | | - | |
83 | | - | |
84 | | - | |
85 | | - | |
86 | | - | |
87 | | - | |
88 | | - | |
89 | | - | |
90 | | - | |
91 | | - | |
92 | | - | |
93 | | - | |
94 | | - | |
95 | | - | |
96 | | - | |
97 | | - | |
98 | | - | |
99 | | - | |
100 | | - | |
101 | | - | |
102 | | - | |
103 | | - | |
104 | | - | |
105 | | - | |
| 68 | + | |
106 | 69 | | |
107 | 70 | | |
108 | 71 | | |
| |||
114 | 77 | | |
115 | 78 | | |
116 | 79 | | |
| 80 | + | |
117 | 81 | | |
118 | 82 | | |
119 | 83 | | |
| |||
140 | 104 | | |
141 | 105 | | |
142 | 106 | | |
143 | | - | |
144 | | - | |
145 | | - | |
146 | | - | |
147 | | - | |
148 | | - | |
149 | 107 | | |
150 | 108 | | |
151 | 109 | | |
| |||
169 | 127 | | |
170 | 128 | | |
171 | 129 | | |
172 | | - | |
173 | | - | |
174 | | - | |
175 | | - | |
176 | | - | |
177 | | - | |
178 | | - | |
179 | | - | |
180 | | - | |
181 | | - | |
182 | | - | |
183 | | - | |
184 | | - | |
185 | | - | |
186 | | - | |
187 | | - | |
188 | | - | |
189 | | - | |
190 | | - | |
191 | | - | |
192 | | - | |
193 | | - | |
194 | | - | |
195 | | - | |
196 | | - | |
197 | | - | |
198 | | - | |
199 | | - | |
200 | | - | |
201 | | - | |
202 | | - | |
203 | | - | |
204 | | - | |
205 | | - | |
206 | | - | |
207 | | - | |
208 | | - | |
209 | | - | |
| 130 | + | |
| 131 | + | |
| 132 | + | |
| 133 | + | |
210 | 134 | | |
211 | 135 | | |
212 | 136 | | |
| |||
Lines changed: 2 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
0 commit comments