apache · windpiger · Jan 11, 2017 · Jan 12, 2017 · Jan 16, 2017 · Jan 16, 2017
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -112,20 +112,23 @@ case class AnalyzeCreateTable(sparkSession: SparkSession) extends Rule[LogicalPl
         throw new AnalysisException("Saving data into a view is not allowed.")
       }
 
-      if (DDLUtils.isHiveTable(existingTable)) {
-        throw new AnalysisException(s"Saving data in the Hive serde table $tableName is " +
-          "not supported yet. Please use the insertInto() API as an alternative.")
+      val (isProviderMatch, existingProvider, specifiedProvider) =
+        DDLUtils.isHiveTable(existingTable) match {
+        case false =>
+          val existing = DataSource.lookupDataSource(existingTable.provider.get)
+          val specified = DataSource.lookupDataSource(tableDesc.provider.get)
+          (existing == specified, existing.getSimpleName, specified.getSimpleName)
+        case true =>
+          val existing = existingTable.provider.get
+          val specified = tableDesc.provider.get
+          (existing == specified, existing, specified)
       }
-
       // Check if the specified data source match the data source of the existing table.
-      val existingProvider = DataSource.lookupDataSource(existingTable.provider.get)
-      val specifiedProvider = DataSource.lookupDataSource(tableDesc.provider.get)
       // TODO: Check that options from the resolved relation match the relation that we are
       // inserting into (i.e. using the same compression).
-      if (existingProvider != specifiedProvider) {
+      if (!isProviderMatch) {
         throw new AnalysisException(s"The format of the existing table $tableName is " +
-          s"`${existingProvider.getSimpleName}`. It doesn't match the specified format " +
-          s"`${specifiedProvider.getSimpleName}`.")
+          s"`$existingProvider`. It doesn't match the specified format `$specifiedProvider`.")
       }
 
       if (analyzedQuery.schema.length != existingTable.schema.length) {

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -93,18 +93,11 @@ class HiveAnalysis(session: SparkSession) extends Rule[LogicalPlan] {
       InsertIntoHiveTable(table, partSpec, query, overwrite, ifNotExists)
 
     case CreateTable(tableDesc, mode, Some(query)) if DDLUtils.isHiveTable(tableDesc) =>
-      // Currently `DataFrameWriter.saveAsTable` doesn't support the Append mode of hive serde
-      // tables yet.
-      if (mode == SaveMode.Append) {
-        throw new AnalysisException(
-          "CTAS for hive serde tables does not support append semantics.")
-      }
-
       val dbName = tableDesc.identifier.database.getOrElse(session.catalog.currentDatabase)
       CreateHiveTableAsSelectCommand(
         tableDesc.copy(identifier = tableDesc.identifier.copy(database = Some(dbName))),
         query,
-        mode == SaveMode.Ignore)
+        mode)
   }
 
   /**

diff --git a/...e/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/...e/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.execution
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SimpleCatalogRelation}
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.execution.command.RunnableCommand
@@ -31,13 +31,12 @@ import org.apache.spark.sql.hive.MetastoreRelation
  *
  * @param tableDesc the Table Describe, which may contains serde, storage handler etc.
  * @param query the query whose result will be insert into the new relation
- * @param ignoreIfExists allow continue working if it's already exists, otherwise
- *                      raise exception
+ * @param mode SaveMode
  */
 case class CreateHiveTableAsSelectCommand(
     tableDesc: CatalogTable,
     query: LogicalPlan,
-    ignoreIfExists: Boolean)
+    mode: SaveMode)
   extends RunnableCommand {
 
   private val tableIdentifier = tableDesc.identifier
@@ -69,7 +68,7 @@ case class CreateHiveTableAsSelectCommand(
         withFormat
       }
 
-      sparkSession.sessionState.catalog.createTable(withSchema, ignoreIfExists = false)
+      sparkSession.sessionState.catalog.createTable(withSchema, ignoreIfExists = true)
 
       // Get the Metastore Relation
       sparkSession.sessionState.catalog.lookupRelation(tableIdentifier) match {
@@ -82,11 +81,18 @@ case class CreateHiveTableAsSelectCommand(
     // add the relation into catalog, just in case of failure occurs while data
     // processing.
     if (sparkSession.sessionState.catalog.tableExists(tableIdentifier)) {
-      if (ignoreIfExists) {
-        // table already exists, will do nothing, to keep consistent with Hive
-      } else {
+      assert(mode != SaveMode.Overwrite,
+        s"Expect the table $tableIdentifier has been dropped when the save mode is Overwrite")
+
+      if (mode == SaveMode.ErrorIfExists) {
         throw new AnalysisException(s"$tableIdentifier already exists.")
       }
+      if (mode == SaveMode.Ignore) {
+        // Since the table already exists and the save mode is Ignore, we will just return.
+        return Seq.empty
+      }
+      sparkSession.sessionState.executePlan(InsertIntoTable(
+        metastoreRelation, Map(), query, overwrite = false, ifNotExists = false)).toRdd
     } else {
       try {
         sparkSession.sessionState.executePlan(InsertIntoTable(

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -419,12 +419,6 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       sql(s"CREATE TABLE $tableName STORED AS SEQUENCEFILE AS SELECT 1 AS key, 'abc' AS value")
 
       val df = sql(s"SELECT key, value FROM $tableName")
-      val e = intercept[AnalysisException] {
-        df.write.mode(SaveMode.Append).saveAsTable(tableName)
-      }.getMessage
-      assert(e.contains("Saving data in the Hive serde table default.tab1 is not supported " +
-        "yet. Please use the insertInto() API as an alternative."))
-
       df.write.insertInto(tableName)
       checkAnswer(
         sql(s"SELECT * FROM $tableName"),

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1353,6 +1353,15 @@ class HiveDDLSuite
       sql("INSERT INTO t SELECT 2, 'b'")
       checkAnswer(spark.table("t"), Row(9, "x") :: Row(2, "b") :: Nil)
 
+      Seq(10 -> "y").toDF("i", "j")
+        .write.format("hive").mode("append").saveAsTable("t")
+      checkAnswer(spark.table("t"), Row(9, "x") :: Row(2, "b") :: Row(10, "y") :: Nil)
+
+      Seq("y" -> 10).toDF("i", "j")
+        .write.format("hive").mode("append").saveAsTable("t")
+      checkAnswer(spark.table("t"), Row(9, "x") :: Row(2, "b")
+        :: Row(10, "y") :: Row(null, "10") :: Nil)
+
       val e = intercept[AnalysisException] {
         Seq(1 -> "a").toDF("i", "j").write.format("hive").partitionBy("i").saveAsTable("t2")
       }