Skip to content

Commit fea2124

Browse files
committed
Add test case with workaround for reading partitioned avro files.
1 parent c419e4f commit fea2124

File tree

3 files changed

+75
-1
lines changed

3 files changed

+75
-1
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,71 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
269269
|)
270270
""".stripMargin.cmd,
271271
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}' INTO TABLE episodes".cmd
272-
)
272+
),
273+
// THIS TABLE IS NOT THE SAME AS THE HIVE TEST TABLE episodes_partitioned AS DYNAMIC PARITIONING
274+
// IS NOT YET SUPPORTED
275+
TestTable("episodes_part",
276+
s"""CREATE TABLE episodes_part (title STRING, air_date STRING, doctor INT)
277+
|PARTITIONED BY (doctor_pt INT)
278+
|ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
279+
|STORED AS
280+
|INPUTFORMAT '${classOf[AvroContainerInputFormat].getCanonicalName}'
281+
|OUTPUTFORMAT '${classOf[AvroContainerOutputFormat].getCanonicalName}'
282+
|TBLPROPERTIES (
283+
| 'avro.schema.literal'='{
284+
| "type": "record",
285+
| "name": "episodes",
286+
| "namespace": "testing.hive.avro.serde",
287+
| "fields": [
288+
| {
289+
| "name": "title",
290+
| "type": "string",
291+
| "doc": "episode title"
292+
| },
293+
| {
294+
| "name": "air_date",
295+
| "type": "string",
296+
| "doc": "initial date"
297+
| },
298+
| {
299+
| "name": "doctor",
300+
| "type": "int",
301+
| "doc": "main actor playing the Doctor in episode"
302+
| }
303+
| ]
304+
| }'
305+
|)
306+
""".stripMargin.cmd,
307+
// WORKAROUND: Required to pass schema to SerDe for partitioned tables.
308+
// TODO: Pass this automatically from the table to partitions.
309+
s"""
310+
|ALTER TABLE episodes_part SET SERDEPROPERTIES (
311+
| 'avro.schema.literal'='{
312+
| "type": "record",
313+
| "name": "episodes",
314+
| "namespace": "testing.hive.avro.serde",
315+
| "fields": [
316+
| {
317+
| "name": "title",
318+
| "type": "string",
319+
| "doc": "episode title"
320+
| },
321+
| {
322+
| "name": "air_date",
323+
| "type": "string",
324+
| "doc": "initial date"
325+
| },
326+
| {
327+
| "name": "doctor",
328+
| "type": "int",
329+
| "doc": "main actor playing the Doctor in episode"
330+
| }
331+
| ]
332+
| }'
333+
|)
334+
""".stripMargin.cmd,
335+
s"INSERT OVERWRITE TABLE episodes_part PARTITION (doctor_pt=1) SELECT title, air_date, doctor FROM episodes".cmd
336+
)
273337
)
274338

275339
hiveQTestUtilTables.foreach(registerTestTable)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
The Eleventh Hour 3 April 2010 11 1
2+
The Doctor's Wife 14 May 2011 11 1
3+
Horror of Fang Rock 3 September 1977 4 1
4+
An Unearthly Child 23 November 1963 1 1
5+
The Mysterious Planet 6 September 1986 6 1
6+
Rose 26 March 2005 9 1
7+
The Power of the Daleks 5 November 1966 2 1
8+
Castrolava 4 January 1982 5 1

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,6 @@ class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
3737
createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
3838

3939
createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
40+
41+
createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")
4042
}

0 commit comments

Comments
 (0)