From 9722230e29609102e48e763984c7dfd9c972f1f4 Mon Sep 17 00:00:00 2001 From: Kevin Yu Date: Tue, 7 Jan 2020 22:58:05 -0800 Subject: [PATCH 1/5] rebase on spark 3.0 --- .../sql/hive/HiveParquetSourceSuite.scala | 112 +++++++++++++ .../sql/hive/orc/HiveOrcSourceSuite.scala | 151 ++++++++++++++++++ 2 files changed, 263 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala index 071f0fec9766..40bc17bdd311 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.hive import java.io.File +import java.io.IOException import org.apache.spark.sql.{Row, SaveMode} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { + Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { + withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) + val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) + val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) + someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") + someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") + someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + + val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin + sql(topDirStatement) + if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl1"), Nil) + } else { + intercept[IOException](sql("select * from tbl1").show()) + } + + val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin + sql(l1DirStatement) + if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl2"), + (1 to 2).map(i => Row(i, i, s"parq$i"))) + } else { + intercept[IOException](sql("select * from tbl2").show()) + } + + val l2DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin + sql(l2DirStatement) + if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl3"), + (3 to 4).map(i => Row(i, i, s"parq$i"))) + } else { + intercept[IOException](sql("select * from tbl3").show()) + } + + val wildcardTopDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${new File(s"${path}/*").toURI}'""".stripMargin + sql(wildcardTopDirStatement) + if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl4"), + (1 to 2).map(i => Row(i, i, s"parq$i"))) + } else { + intercept[IOException](sql("select * from tbl4").show()) + } + + val wildcardL1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl5( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${new File(s"${path}/l1/*").toURI}'""".stripMargin + sql(wildcardL1DirStatement) + if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl5"), + (1 to 4).map(i => Row(i, i, s"parq$i"))) + } else { + intercept[IOException](sql("select * from tbl5").show()) + } + + val wildcardL2DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl6( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${new File(s"${path}/l1/l2/*").toURI}'""".stripMargin + sql(wildcardL2DirStatement) + checkAnswer(sql("select * from tbl6"), + (3 to 6).map(i => Row(i, i, s"parq$i"))) + } + } + } + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 0ea941c8e0d8..00310b62bb5f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -170,4 +170,155 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { + Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { + withTempDir { dir => + try { + hiveClient.runSqlHive("USE default") + hiveClient.runSqlHive( + """ + |CREATE EXTERNAL TABLE hive_orc( + | C1 INT, + | C2 INT, + | C3 STRING) + |STORED AS orc""".stripMargin) + // Hive throws an exception if I assign the location in the create table statement. + hiveClient.runSqlHive( + s"ALTER TABLE hive_orc SET LOCATION " + + s"'${new File(s"${dir.getCanonicalPath}/l1/").toURI}'") + hiveClient.runSqlHive( + """ + |INSERT INTO TABLE hive_orc + |VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin) + + hiveClient.runSqlHive( + s"ALTER TABLE hive_orc SET LOCATION " + + s"'${new File(s"${dir.getCanonicalPath}/l1/l2/").toURI}'") + hiveClient.runSqlHive( + """ + |INSERT INTO TABLE hive_orc + |VALUES (3, 3, 'orc3'), (4, 4, 'orc4')""".stripMargin) + + hiveClient.runSqlHive( + s"ALTER TABLE hive_orc SET LOCATION " + + s"'${new File(s"${dir.getCanonicalPath}/l1/l2/l3/").toURI}'") + hiveClient.runSqlHive( + """ + |INSERT INTO TABLE hive_orc + |VALUES (5, 5, 'orc5'), (6, 6, 'orc6')""".stripMargin) + + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin + sql(topDirStatement) + val topDirSqlStatement = s"select * from tbl1" + if (convertMetastore) { + checkAnswer(sql(topDirSqlStatement), Nil) + } else { + checkAnswer(sql(topDirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin + sql(l1DirStatement) + val l1DirSqlStatement = s"select * from tbl2" + if (convertMetastore) { + checkAnswer(sql(l1DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { + checkAnswer(sql(l1DirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l2DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin + sql(l2DirStatement) + val l2DirSqlStatement = s"select * from tbl3" + if (convertMetastore) { + checkAnswer(sql(l2DirSqlStatement), + (3 to 4).map(i => Row(i, i, s"orc$i"))) + } else { + checkAnswer(sql(l2DirSqlStatement), + (3 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val wildcardTopDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin + sql(wildcardTopDirStatement) + val wildcardTopDirSqlStatement = s"select * from tbl4" + if (convertMetastore) { + checkAnswer(sql(wildcardTopDirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { + checkAnswer(sql(wildcardTopDirSqlStatement), Nil) + } + + val wildcardL1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl5( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${new File(s"${dir}/l1/*").toURI}'""".stripMargin + sql(wildcardL1DirStatement) + val wildcardL1DirSqlStatement = s"select * from tbl5" + if (convertMetastore) { + checkAnswer(sql(wildcardL1DirSqlStatement), + (1 to 4).map(i => Row(i, i, s"orc$i"))) + } else { + checkAnswer(sql(wildcardL1DirSqlStatement), Nil) + } + + val wildcardL2Statement = + s""" + |CREATE EXTERNAL TABLE tbl6( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${new File(s"${dir}/l1/l2/*").toURI}'""".stripMargin + sql(wildcardL2Statement) + val wildcardL2SqlStatement = s"select * from tbl6" + if (convertMetastore) { + checkAnswer(sql(wildcardL2SqlStatement), + (3 to 6).map(i => Row(i, i, s"orc$i"))) + } else { + checkAnswer(sql(wildcardL2SqlStatement), Nil) + } + } + } finally { + hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") + } + } + } + } + } } From 9bc32ab1679633255a7129630a62d32f351c7d20 Mon Sep 17 00:00:00 2001 From: Kevin Yu Date: Thu, 9 Jan 2020 14:45:18 -0800 Subject: [PATCH 2/5] address comments --- .../sql/hive/HiveParquetSourceSuite.scala | 34 +++++++++++++------ .../sql/hive/orc/HiveOrcSourceSuite.scala | 27 ++++++++------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala index 40bc17bdd311..7da0eaa9d89e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala @@ -249,9 +249,11 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin sql(topDirStatement) if (parquetConversion == "true") { - checkAnswer(sql("select * from tbl1"), Nil) + checkAnswer(sql("SELECT * FROM tbl1"), Nil) } else { - intercept[IOException](sql("select * from tbl1").show()) + val msg = intercept[IOException] {sql("SELECT * FROM tbl1").show() + }.getMessage + assert(msg.contains("Not a file:")) } val l1DirStatement = @@ -264,10 +266,13 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin sql(l1DirStatement) if (parquetConversion == "true") { - checkAnswer(sql("select * from tbl2"), + checkAnswer(sql("SELECT * FROM tbl2"), (1 to 2).map(i => Row(i, i, s"parq$i"))) } else { - intercept[IOException](sql("select * from tbl2").show()) + val msg = intercept[IOException] { + sql("SELECT * FROM tbl2").show() + }.getMessage + assert(msg.contains("Not a file:")) } val l2DirStatement = @@ -280,10 +285,12 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin sql(l2DirStatement) if (parquetConversion == "true") { - checkAnswer(sql("select * from tbl3"), + checkAnswer(sql("SELECT * FROM tbl3"), (3 to 4).map(i => Row(i, i, s"parq$i"))) } else { - intercept[IOException](sql("select * from tbl3").show()) + val msg = intercept[IOException] {sql("SELECT * FROM tbl3").show() + }.getMessage + assert(msg.contains("Not a file:")) } val wildcardTopDirStatement = @@ -296,10 +303,13 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${new File(s"${path}/*").toURI}'""".stripMargin sql(wildcardTopDirStatement) if (parquetConversion == "true") { - checkAnswer(sql("select * from tbl4"), + checkAnswer(sql("SELECT * FROM tbl4"), (1 to 2).map(i => Row(i, i, s"parq$i"))) } else { - intercept[IOException](sql("select * from tbl4").show()) + val msg = intercept[IOException] { + sql("SELECT * FROM tbl4").show() + }.getMessage + assert(msg.contains("Not a file:")) } val wildcardL1DirStatement = @@ -312,10 +322,12 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${new File(s"${path}/l1/*").toURI}'""".stripMargin sql(wildcardL1DirStatement) if (parquetConversion == "true") { - checkAnswer(sql("select * from tbl5"), + checkAnswer(sql("SELECT * FROM tbl5"), (1 to 4).map(i => Row(i, i, s"parq$i"))) } else { - intercept[IOException](sql("select * from tbl5").show()) + val msg = intercept[IOException] {sql("SELECT * FROM tbl5").show() + }.getMessage + assert(msg.contains("Not a file:")) } val wildcardL2DirStatement = @@ -327,7 +339,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |STORED AS parquet |LOCATION '${new File(s"${path}/l1/l2/*").toURI}'""".stripMargin sql(wildcardL2DirStatement) - checkAnswer(sql("select * from tbl6"), + checkAnswer(sql("SELECT * FROM tbl6"), (3 to 6).map(i => Row(i, i, s"parq$i"))) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 00310b62bb5f..319c324ff797 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -176,24 +176,25 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { withTempDir { dir => try { - hiveClient.runSqlHive("USE default") - hiveClient.runSqlHive( + sql("USE default") + sql( """ |CREATE EXTERNAL TABLE hive_orc( | C1 INT, | C2 INT, | C3 STRING) - |STORED AS orc""".stripMargin) + |STORED AS orc + |LOCATION '${new File(s"${dir.getCanonicalPath}").toURI}'""".stripMargin) + // Hive throws an exception if I assign the location in the create table statement. - hiveClient.runSqlHive( - s"ALTER TABLE hive_orc SET LOCATION " + + sql(s"ALTER TABLE hive_orc SET LOCATION " + s"'${new File(s"${dir.getCanonicalPath}/l1/").toURI}'") hiveClient.runSqlHive( """ |INSERT INTO TABLE hive_orc |VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin) - hiveClient.runSqlHive( + sql( s"ALTER TABLE hive_orc SET LOCATION " + s"'${new File(s"${dir.getCanonicalPath}/l1/l2/").toURI}'") hiveClient.runSqlHive( @@ -201,7 +202,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { |INSERT INTO TABLE hive_orc |VALUES (3, 3, 'orc3'), (4, 4, 'orc4')""".stripMargin) - hiveClient.runSqlHive( + sql( s"ALTER TABLE hive_orc SET LOCATION " + s"'${new File(s"${dir.getCanonicalPath}/l1/l2/l3/").toURI}'") hiveClient.runSqlHive( @@ -219,7 +220,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { |STORED AS orc |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin sql(topDirStatement) - val topDirSqlStatement = s"select * from tbl1" + val topDirSqlStatement = s"SELECT * FROM tbl1" if (convertMetastore) { checkAnswer(sql(topDirSqlStatement), Nil) } else { @@ -236,7 +237,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { |STORED AS orc |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin sql(l1DirStatement) - val l1DirSqlStatement = s"select * from tbl2" + val l1DirSqlStatement = s"SELECT * FROM tbl2" if (convertMetastore) { checkAnswer(sql(l1DirSqlStatement), (1 to 2).map(i => Row(i, i, s"orc$i"))) @@ -254,7 +255,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { |STORED AS orc |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin sql(l2DirStatement) - val l2DirSqlStatement = s"select * from tbl3" + val l2DirSqlStatement = s"SELECT * FROM tbl3" if (convertMetastore) { checkAnswer(sql(l2DirSqlStatement), (3 to 4).map(i => Row(i, i, s"orc$i"))) @@ -272,7 +273,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { |STORED AS orc |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin sql(wildcardTopDirStatement) - val wildcardTopDirSqlStatement = s"select * from tbl4" + val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4" if (convertMetastore) { checkAnswer(sql(wildcardTopDirSqlStatement), (1 to 2).map(i => Row(i, i, s"orc$i"))) @@ -289,7 +290,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { |STORED AS orc |LOCATION '${new File(s"${dir}/l1/*").toURI}'""".stripMargin sql(wildcardL1DirStatement) - val wildcardL1DirSqlStatement = s"select * from tbl5" + val wildcardL1DirSqlStatement = s"SELECT * FROM tbl5" if (convertMetastore) { checkAnswer(sql(wildcardL1DirSqlStatement), (1 to 4).map(i => Row(i, i, s"orc$i"))) @@ -306,7 +307,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { |STORED AS orc |LOCATION '${new File(s"${dir}/l1/l2/*").toURI}'""".stripMargin sql(wildcardL2Statement) - val wildcardL2SqlStatement = s"select * from tbl6" + val wildcardL2SqlStatement = s"SELECT * FROM tbl6" if (convertMetastore) { checkAnswer(sql(wildcardL2SqlStatement), (3 to 6).map(i => Row(i, i, s"orc$i"))) From 2cf3e26b410f94a78be6e12021c9dc7418548cc7 Mon Sep 17 00:00:00 2001 From: Kevin Yu Date: Sat, 11 Jan 2020 22:56:17 -0800 Subject: [PATCH 3/5] address comments --- .../sql/hive/HiveParquetSourceSuite.scala | 26 +++---- .../sql/hive/orc/HiveOrcSourceSuite.scala | 78 +++++++++++-------- 2 files changed, 56 insertions(+), 48 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala index 7da0eaa9d89e..72931fec7445 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala @@ -251,9 +251,10 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { if (parquetConversion == "true") { checkAnswer(sql("SELECT * FROM tbl1"), Nil) } else { - val msg = intercept[IOException] {sql("SELECT * FROM tbl1").show() + val msg = intercept[IOException] { + sql("SELECT * FROM tbl1").show() }.getMessage - assert(msg.contains("Not a file:")) + assert(msg.contains("Not a file:")) } val l1DirStatement = @@ -266,8 +267,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin sql(l1DirStatement) if (parquetConversion == "true") { - checkAnswer(sql("SELECT * FROM tbl2"), - (1 to 2).map(i => Row(i, i, s"parq$i"))) + checkAnswer(sql("SELECT * FROM tbl2"), (1 to 2).map(i => Row(i, i, s"parq$i"))) } else { val msg = intercept[IOException] { sql("SELECT * FROM tbl2").show() @@ -285,10 +285,10 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin sql(l2DirStatement) if (parquetConversion == "true") { - checkAnswer(sql("SELECT * FROM tbl3"), - (3 to 4).map(i => Row(i, i, s"parq$i"))) + checkAnswer(sql("SELECT * FROM tbl3"), (3 to 4).map(i => Row(i, i, s"parq$i"))) } else { - val msg = intercept[IOException] {sql("SELECT * FROM tbl3").show() + val msg = intercept[IOException] { + sql("SELECT * FROM tbl3").show() }.getMessage assert(msg.contains("Not a file:")) } @@ -303,8 +303,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${new File(s"${path}/*").toURI}'""".stripMargin sql(wildcardTopDirStatement) if (parquetConversion == "true") { - checkAnswer(sql("SELECT * FROM tbl4"), - (1 to 2).map(i => Row(i, i, s"parq$i"))) + checkAnswer(sql("SELECT * FROM tbl4"), (1 to 2).map(i => Row(i, i, s"parq$i"))) } else { val msg = intercept[IOException] { sql("SELECT * FROM tbl4").show() @@ -322,10 +321,10 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |LOCATION '${new File(s"${path}/l1/*").toURI}'""".stripMargin sql(wildcardL1DirStatement) if (parquetConversion == "true") { - checkAnswer(sql("SELECT * FROM tbl5"), - (1 to 4).map(i => Row(i, i, s"parq$i"))) + checkAnswer(sql("SELECT * FROM tbl5"), (1 to 4).map(i => Row(i, i, s"parq$i"))) } else { - val msg = intercept[IOException] {sql("SELECT * FROM tbl5").show() + val msg = intercept[IOException] { + sql("SELECT * FROM tbl5").show() }.getMessage assert(msg.contains("Not a file:")) } @@ -339,8 +338,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { |STORED AS parquet |LOCATION '${new File(s"${path}/l1/l2/*").toURI}'""".stripMargin sql(wildcardL2DirStatement) - checkAnswer(sql("SELECT * FROM tbl6"), - (3 to 6).map(i => Row(i, i, s"parq$i"))) + checkAnswer(sql("SELECT * FROM tbl6"), (3 to 6).map(i => Row(i, i, s"parq$i"))) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 319c324ff797..455c9c85c5a8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -176,39 +176,47 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { withTempDir { dir => try { - sql("USE default") - sql( - """ - |CREATE EXTERNAL TABLE hive_orc( - | C1 INT, - | C2 INT, - | C3 STRING) - |STORED AS orc - |LOCATION '${new File(s"${dir.getCanonicalPath}").toURI}'""".stripMargin) - - // Hive throws an exception if I assign the location in the create table statement. - sql(s"ALTER TABLE hive_orc SET LOCATION " + - s"'${new File(s"${dir.getCanonicalPath}/l1/").toURI}'") - hiveClient.runSqlHive( - """ - |INSERT INTO TABLE hive_orc - |VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin) - - sql( - s"ALTER TABLE hive_orc SET LOCATION " + - s"'${new File(s"${dir.getCanonicalPath}/l1/l2/").toURI}'") - hiveClient.runSqlHive( - """ - |INSERT INTO TABLE hive_orc - |VALUES (3, 3, 'orc3'), (4, 4, 'orc4')""".stripMargin) - - sql( - s"ALTER TABLE hive_orc SET LOCATION " + - s"'${new File(s"${dir.getCanonicalPath}/l1/l2/l3/").toURI}'") - hiveClient.runSqlHive( - """ - |INSERT INTO TABLE hive_orc - |VALUES (5, 5, 'orc5'), (6, 6, 'orc6')""".stripMargin) + val orcTblStatement1 = + s""" + |CREATE EXTERNAL TABLE orc_tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin + sql(orcTblStatement1) + + val orcTblInsertL1 = + s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 'orc2')".stripMargin + sql(orcTblInsertL1) + + val orcTblStatement2 = + s""" + |CREATE EXTERNAL TABLE orc_tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin + sql(orcTblStatement2) + + val orcTblInsertL2 = + s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 'orc4')".stripMargin + sql(orcTblInsertL2) + + val orcTblStatement3 = + s""" + |CREATE EXTERNAL TABLE orc_tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin + sql(orcTblStatement3) + + val orcTblInsertL3 = + s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 'orc6')".stripMargin + sql(orcTblInsertL3) withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { val topDirStatement = @@ -316,7 +324,9 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { } } } finally { - hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") + sql("DROP TABLE IF EXISTS orc_tbl1") + sql("DROP TABLE IF EXISTS orc_tbl2") + sql("DROP TABLE IF EXISTS orc_tbl3") } } } From 0bb628f93399c2331155508eec60730277e5426c Mon Sep 17 00:00:00 2001 From: Kevin Yu Date: Thu, 16 Jan 2020 21:01:36 -0800 Subject: [PATCH 4/5] address comments --- .../sql/hive/HiveParquetSourceSuite.scala | 53 +++++++++++++++---- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala index 72931fec7445..b557fe73f115 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala @@ -228,16 +228,49 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { Seq("true", "false").foreach { parquetConversion => withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { withTempPath { path => - withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { - val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). - toDF("c1", "c2", "c3").repartition(1) - val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). - toDF("c1", "c2", "c3").repartition(1) - val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). - toDF("c1", "c2", "c3").repartition(1) - someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") - someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") - someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + withTable("parq_tbl1", "parq_tbl2", "parq_tbl3", + "tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val parquetTblStatement1 = + s""" + |CREATE EXTERNAL TABLE parq_tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin + sql(parquetTblStatement1) + + val parquetTblInsertL1 = + s"INSERT INTO TABLE parq_tbl1 VALUES (1, 1, 'parq1'), (2, 2, 'parq2')".stripMargin + sql(parquetTblInsertL1) + + val parquetTblStatement2 = + s""" + |CREATE EXTERNAL TABLE parq_tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin + sql(parquetTblStatement2) + + val parquetTblInsertL2 = + s"INSERT INTO TABLE parq_tbl2 VALUES (3, 3, 'parq3'), (4, 4, 'parq4')".stripMargin + sql(parquetTblInsertL2) + + val parquetTblStatement3 = + s""" + |CREATE EXTERNAL TABLE parq_tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin + sql(parquetTblStatement3) + + val parquetTblInsertL3 = + s"INSERT INTO TABLE parq_tbl3 VALUES (5, 5, 'parq5'), (6, 6, 'parq6')".stripMargin + sql(parquetTblInsertL3) val topDirStatement = s""" From 39f271f23278c334a8230408703201276e7292ac Mon Sep 17 00:00:00 2001 From: Kevin Yu Date: Fri, 17 Jan 2020 14:43:59 -0800 Subject: [PATCH 5/5] address comments --- .../sql/hive/orc/HiveOrcSourceSuite.scala | 30 ++++++------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 455c9c85c5a8..f3e712d6c0a4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -175,7 +175,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { Seq(true, false).foreach { convertMetastore => withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { withTempDir { dir => - try { + withTable("orc_tbl1", "orc_tbl2", "orc_tbl3") { val orcTblStatement1 = s""" |CREATE EXTERNAL TABLE orc_tbl1( @@ -232,8 +232,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { if (convertMetastore) { checkAnswer(sql(topDirSqlStatement), Nil) } else { - checkAnswer(sql(topDirSqlStatement), - (1 to 6).map(i => Row(i, i, s"orc$i"))) + checkAnswer(sql(topDirSqlStatement), (1 to 6).map(i => Row(i, i, s"orc$i"))) } val l1DirStatement = @@ -247,11 +246,9 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { sql(l1DirStatement) val l1DirSqlStatement = s"SELECT * FROM tbl2" if (convertMetastore) { - checkAnswer(sql(l1DirSqlStatement), - (1 to 2).map(i => Row(i, i, s"orc$i"))) + checkAnswer(sql(l1DirSqlStatement), (1 to 2).map(i => Row(i, i, s"orc$i"))) } else { - checkAnswer(sql(l1DirSqlStatement), - (1 to 6).map(i => Row(i, i, s"orc$i"))) + checkAnswer(sql(l1DirSqlStatement), (1 to 6).map(i => Row(i, i, s"orc$i"))) } val l2DirStatement = @@ -265,11 +262,9 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { sql(l2DirStatement) val l2DirSqlStatement = s"SELECT * FROM tbl3" if (convertMetastore) { - checkAnswer(sql(l2DirSqlStatement), - (3 to 4).map(i => Row(i, i, s"orc$i"))) + checkAnswer(sql(l2DirSqlStatement), (3 to 4).map(i => Row(i, i, s"orc$i"))) } else { - checkAnswer(sql(l2DirSqlStatement), - (3 to 6).map(i => Row(i, i, s"orc$i"))) + checkAnswer(sql(l2DirSqlStatement), (3 to 6).map(i => Row(i, i, s"orc$i"))) } val wildcardTopDirStatement = @@ -283,8 +278,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { sql(wildcardTopDirStatement) val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4" if (convertMetastore) { - checkAnswer(sql(wildcardTopDirSqlStatement), - (1 to 2).map(i => Row(i, i, s"orc$i"))) + checkAnswer(sql(wildcardTopDirSqlStatement), (1 to 2).map(i => Row(i, i, s"orc$i"))) } else { checkAnswer(sql(wildcardTopDirSqlStatement), Nil) } @@ -300,8 +294,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { sql(wildcardL1DirStatement) val wildcardL1DirSqlStatement = s"SELECT * FROM tbl5" if (convertMetastore) { - checkAnswer(sql(wildcardL1DirSqlStatement), - (1 to 4).map(i => Row(i, i, s"orc$i"))) + checkAnswer(sql(wildcardL1DirSqlStatement), (1 to 4).map(i => Row(i, i, s"orc$i"))) } else { checkAnswer(sql(wildcardL1DirSqlStatement), Nil) } @@ -317,16 +310,11 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { sql(wildcardL2Statement) val wildcardL2SqlStatement = s"SELECT * FROM tbl6" if (convertMetastore) { - checkAnswer(sql(wildcardL2SqlStatement), - (3 to 6).map(i => Row(i, i, s"orc$i"))) + checkAnswer(sql(wildcardL2SqlStatement), (3 to 6).map(i => Row(i, i, s"orc$i"))) } else { checkAnswer(sql(wildcardL2SqlStatement), Nil) } } - } finally { - sql("DROP TABLE IF EXISTS orc_tbl1") - sql("DROP TABLE IF EXISTS orc_tbl2") - sql("DROP TABLE IF EXISTS orc_tbl3") } } }