apache · rajeshbalamohan · Aug 8, 2016 · Aug 9, 2016 · Aug 9, 2016 · Aug 22, 2016
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -237,21 +237,24 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
           new Path(metastoreRelation.catalogTable.storage.locationUri.get),
           partitionSpec)
 
-        val inferredSchema = if (fileType.equals("parquet")) {
-          val inferredSchema =
-            defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles())
-          inferredSchema.map { inferred =>
-            ParquetFileFormat.mergeMetastoreParquetSchema(metastoreSchema, inferred)
+        val inferredSchema =
+          defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles())
+        val schema = if (fileType == "parquet") {
+          // For Parquet, get correct schema by merging Metastore schema data types
+          // and Parquet schema field names.
+          inferredSchema.map { schema =>
+            ParquetFileFormat.mergeMetastoreParquetSchema(metastoreSchema, schema)
           }.getOrElse(metastoreSchema)
         } else {
-          defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles()).get
+          // For others (e.g orc), fall back to metastore schema if needed.
+          inferredSchema.getOrElse(metastoreSchema)
         }
 
         val relation = HadoopFsRelation(
           sparkSession = sparkSession,
           location = fileCatalog,
           partitionSchema = partitionSchema,
-          dataSchema = inferredSchema,
+          dataSchema = schema,
           bucketSpec = bucketSpec,
           fileFormat = defaultSource,
           options = options)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -17,9 +17,12 @@
 
 package org.apache.spark.sql.hive.orc
 
+import java.io.FileNotFoundException
 import java.net.URI
 import java.util.Properties
 
+import scala.util.Try
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
@@ -54,10 +57,12 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
       sparkSession: SparkSession,
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
-    OrcFileOperator.readSchema(
-      files.map(_.getPath.toUri.toString),
-      Some(sparkSession.sessionState.newHadoopConf())
-    )
+    // Safe to ignore FileNotFoundException in case no files are found.
+    val schema = Try(OrcFileOperator.readSchema(
+        files.map(_.getPath.toUri.toString),
+        Some(sparkSession.sessionState.newHadoopConf())))
+      .recover { case _: FileNotFoundException => None }
+    schema.get
   }
 
   override def prepareWrite(

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -372,6 +372,48 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
     }
   }
 
+  test("SPARK-16948. Check empty orc partitioned tables in ORC") {
+    withSQLConf((HiveUtils.CONVERT_METASTORE_ORC.key, "true")) {
+      withTempPath { dir =>
+        withTable("empty_orc_partitioned") {
+          spark.sql(
+            s"""CREATE TABLE empty_orc_partitioned(key INT, value STRING)
+                | PARTITIONED BY (p INT) STORED AS ORC
+              """.stripMargin)
+
+          val emptyDF = Seq.empty[(Int, String)].toDF("key", "value").coalesce(1)
+          emptyDF.createOrReplaceTempView("empty")
+
+          // Query empty table
+          val df = spark.sql(
+            s"""SELECT key, value FROM empty_orc_partitioned
+                | WHERE key > 10
+                      """.stripMargin)
+          checkAnswer(df, emptyDF)
+        }
+      }
+
+      withTempPath { dir =>
+        withTable("empty_text_partitioned") {
+          spark.sql(
+            s"""CREATE TABLE empty_text_partitioned(key INT, value STRING)
+                | PARTITIONED BY (p INT) STORED AS TEXTFILE
+              """.stripMargin)
+
+          val emptyDF = Seq.empty[(Int, String)].toDF("key", "value").coalesce(1)
+          emptyDF.createOrReplaceTempView("empty_text")
+
+          // Query empty table
+          val df = spark.sql(
+            s"""SELECT key, value FROM empty_text_partitioned
+                | WHERE key > 10
+                      """.stripMargin)
+          checkAnswer(df, emptyDF)
+        }
+      }
+    }
+  }
+
   test("SPARK-10623 Enable ORC PPD") {
     withTempPath { dir =>
       withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") {