apache
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala‎
Lines changed: 29 additions & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala‎
Lines changed: 29 additions & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala‎
Lines changed: 2 additions & 17 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala‎
Lines changed: 2 additions & 17 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala‎
Lines changed: 9 additions & 6 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala‎
Lines changed: 8 additions & 3 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala‎
Lines changed: 148 additions & 0 deletions b/‎sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala‎
Lines changed: 148 additions & 0 deletions
diff --git a/‎sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala‎
Lines changed: 7 additions & 4 deletions b/‎sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎sql/core/src/test/scala/org/apache/spark/sql/TestData.scala‎
Lines changed: 2 additions & 1 deletion b/‎sql/core/src/test/scala/org/apache/spark/sql/TestData.scala‎
Lines changed: 2 additions & 1 deletion
@@ -20,17 +20,25 @@ package org.apache.spark.sql
 import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
 
+import org.apache.hadoop.conf.Configuration
+
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.{AlphaComponent, Experimental}
 import org.apache.spark.rdd.RDD
+
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.dsl
+import org.apache.spark.sql.catalyst.{ScalaReflection, dsl}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.plans.logical.{Subquery, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
 import org.apache.spark.sql.columnar.InMemoryColumnarTableScan
+
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.SparkStrategies
+
+import org.apache.spark.sql.parquet.ParquetRelation
 
 /**
  * :: AlphaComponent ::
@@ -88,6 +96,26 @@ class SQLContext(@transient val sparkContext: SparkContext)
   def parquetFile(path: String): SchemaRDD =
     new SchemaRDD(this, parquet.ParquetRelation(path))
 
+  /**
+   * :: Experimental ::
+   *
+   * Creates an empty parquet file with the schema of class `A`, which can be registered as a table.
+   * This registered table can be used as the target of future `insertInto` operations.
+   *
+   * @param path
+   * @param allowExisting
+   * @param conf
+   * @tparam A
+   */
+  @Experimental
+  def createParquetFile[A <: Product : TypeTag](
+      path: String,
+      allowExisting: Boolean = true,
+      conf: Configuration = new Configuration()): SchemaRDD = {
+    new SchemaRDD(
+      this,
+      ParquetRelation.createEmpty(path, ScalaReflection.attributesFor[A], allowExisting, conf))
+  }
 
   /**
    * Registers the given RDD as a temporary table in the catalog.  Temporary tables exist only
 
@@ -272,8 +272,8 @@ class SchemaRDD(
    *              an `OUTER JOIN` in SQL.  When no output rows are produced by the generator for a
    *              given row, a single row will be output, with `NULL` values for each of the
    *              generated columns.
-   * @param alias an optional alias that can be used as qualif for the attributes that are produced
-   *              by this generate operation.
+   * @param alias an optional alias that can be used as qualifier for the attributes that are
+   *              produced by this generate operation.
    *
    * @group Query
    */
@@ -285,21 +285,6 @@ class SchemaRDD(
       alias: Option[String] = None) =
     new SchemaRDD(sqlContext, Generate(generator, join, outer, None, logicalPlan))
 
-  /**
-   * :: Experimental ::
-   * Adds the rows from this RDD to the specified table.  Note in a standard [[SQLContext]] there is
-   * no notion of persistent tables, and thus queries that contain this operator will fail to
-   * optimize.  When working with an extension of a SQLContext that has a persistent catalog, such
-   * as a `HiveContext`, this operation will result in insertions to the table specified.
-   *
-   * @group schema
-   */
-  @Experimental
-  def insertInto(tableName: String, overwrite: Boolean = false) =
-    new SchemaRDD(
-      sqlContext,
-      InsertIntoTable(UnresolvedRelation(None, tableName), Map.empty, logicalPlan, overwrite))
-
   /**
    * Returns this RDD as a SchemaRDD.
    * @group schema
 
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.plans.logical._
 
@@ -66,27 +67,29 @@ trait SchemaRDDLike {
   }
 
   /**
-   * <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>
+   * :: Experimental ::
    *
    * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
    *
    * @group schema
    */
+  @Experimental
   def insertInto(tableName: String, overwrite: Boolean): Unit =
     sqlContext.executePlan(
       InsertIntoTable(UnresolvedRelation(None, tableName), Map.empty, logicalPlan, overwrite)).toRdd
 
   /**
-   * <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>
+   * :: Experimental ::
    *
    * Appends the rows from this RDD to the specified table.
    *
    * @group schema
    */
+  @Experimental
   def insertInto(tableName: String): Unit = insertInto(tableName, false)
 
   /**
-   * <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>
+   * :: Experimental ::
    *
    * Creates a table from the the contents of this SchemaRDD.  This will fail if the table already
    * exists.
@@ -98,7 +101,7 @@ trait SchemaRDDLike {
    *
    * @param tableName
    */
-  def createTableAs(tableName: String) =
-    sqlContext.executePlan(
-      InsertIntoCreatedTable(None, tableName, logicalPlan))
+  @Experimental
+  def createTableAs(tableName: String): Unit =
+    sqlContext.executePlan(InsertIntoCreatedTable(None, tableName, logicalPlan)).toRdd
 }
@@ -119,7 +119,7 @@ private[sql] object ParquetRelation {
         child,
         "Attempt to create Parquet table from unresolved child (when schema is not available)")
     }
-    createEmpty(pathString, child.output, conf)
+    createEmpty(pathString, child.output, false, conf)
   }
 
   /**
@@ -133,8 +133,9 @@ private[sql] object ParquetRelation {
    */
   def createEmpty(pathString: String,
                   attributes: Seq[Attribute],
+                  allowExisting: Boolean,
                   conf: Configuration): ParquetRelation = {
-    val path = checkPath(pathString, conf)
+    val path = checkPath(pathString, allowExisting, conf)
     if (conf.get(ParquetOutputFormat.COMPRESSION) == null) {
       conf.set(ParquetOutputFormat.COMPRESSION, ParquetRelation.defaultCompression.name())
     }
@@ -143,7 +144,7 @@ private[sql] object ParquetRelation {
     new ParquetRelation(path.toString)
   }
 
-  private def checkPath(pathStr: String, conf: Configuration): Path = {
+  private def checkPath(pathStr: String, allowExisting: Boolean, conf: Configuration): Path = {
     if (pathStr == null) {
       throw new IllegalArgumentException("Unable to create ParquetRelation: path is null")
     }
@@ -154,6 +155,10 @@ private[sql] object ParquetRelation {
         s"Unable to create ParquetRelation: incorrectly formatted path $pathStr")
     }
     val path = origPath.makeQualified(fs)
+    if (!allowExisting && fs.exists(path)) {
+      sys.error(s"File $pathStr already exists.")
+    }
+
     if (fs.exists(path) &&
         !fs.getFileStatus(path)
         .getPermission
 
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.File
+
+/* Implicits */
+import org.apache.spark.sql.test.TestSQLContext._
+
+class InsertIntoSuite extends QueryTest {
+  TestData // Initialize TestData
+  import TestData._
+
+  test("insertInto() created parquet file") {
+    val testFilePath = File.createTempFile("sparkSql", "pqt")
+    testFilePath.delete()
+    val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
+    testFile.registerAsTable("createAndInsertTest")
+
+    // Add some data.
+    testData.insertInto("createAndInsertTest")
+
+    // Make sure its there for a new instance of parquet file.
+    checkAnswer(
+      parquetFile(testFilePath.getCanonicalPath),
+      testData.collect().toSeq
+    )
+
+    // Make sure the registered table has also been updated.
+    checkAnswer(
+      sql("SELECT * FROM createAndInsertTest"),
+      testData.collect().toSeq
+    )
+
+    // Add more data.
+    testData.insertInto("createAndInsertTest")
+
+    // Make sure all data is there for a new instance of parquet file.
+    checkAnswer(
+      parquetFile(testFilePath.getCanonicalPath),
+      testData.collect().toSeq ++ testData.collect().toSeq
+    )
+
+    // Make sure the registered table has also been updated.
+    checkAnswer(
+      sql("SELECT * FROM createAndInsertTest"),
+      testData.collect().toSeq ++ testData.collect().toSeq
+    )
+
+    // Now overwrite.
+    testData.insertInto("createAndInsertTest", overwrite = true)
+
+    // Make sure its there for a new instance of parquet file.
+    checkAnswer(
+      parquetFile(testFilePath.getCanonicalPath),
+      testData.collect().toSeq
+    )
+
+    // Make sure the registered table has also been updated.
+    checkAnswer(
+      sql("SELECT * FROM createAndInsertTest"),
+      testData.collect().toSeq
+    )
+  }
+
+  test("INSERT INTO parquet table") {
+    val testFilePath = File.createTempFile("sparkSql", "pqt")
+    testFilePath.delete()
+    val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
+    testFile.registerAsTable("createAndInsertSQLTest")
+
+    sql("INSERT INTO createAndInsertSQLTest SELECT * FROM testData")
+
+    // Make sure its there for a new instance of parquet file.
+    checkAnswer(
+      parquetFile(testFilePath.getCanonicalPath),
+      testData.collect().toSeq
+    )
+
+    // Make sure the registered table has also been updated.
+    checkAnswer(
+      sql("SELECT * FROM createAndInsertSQLTest"),
+      testData.collect().toSeq
+    )
+
+    // Append more data.
+    sql("INSERT INTO createAndInsertSQLTest SELECT * FROM testData")
+
+    // Make sure all data is there for a new instance of parquet file.
+    checkAnswer(
+      parquetFile(testFilePath.getCanonicalPath),
+      testData.collect().toSeq ++ testData.collect().toSeq
+    )
+
+    // Make sure the registered table has also been updated.
+    checkAnswer(
+      sql("SELECT * FROM createAndInsertSQLTest"),
+      testData.collect().toSeq ++ testData.collect().toSeq
+    )
+
+    sql("INSERT OVERWRITE INTO createAndInsertSQLTest SELECT * FROM testData")
+
+    // Make sure its there for a new instance of parquet file.
+    checkAnswer(
+      parquetFile(testFilePath.getCanonicalPath),
+      testData.collect().toSeq
+    )
+
+    // Make sure the registered table has also been updated.
+    checkAnswer(
+      sql("SELECT * FROM createAndInsertSQLTest"),
+      testData.collect().toSeq
+    )
+  }
+
+  test("Double create fails when allowExisting = false") {
+    val testFilePath = File.createTempFile("sparkSql", "pqt")
+    testFilePath.delete()
+    val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
+
+    intercept[RuntimeException] {
+      createParquetFile[TestData](testFilePath.getCanonicalPath, allowExisting = false)
+    }
+  }
+
+  test("Double create does not fail when allowExisting = true") {
+    val testFilePath = File.createTempFile("sparkSql", "pqt")
+    testFilePath.delete()
+    val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
+
+    createParquetFile[TestData](testFilePath.getCanonicalPath, allowExisting = true)
+  }
+}
@@ -49,18 +49,21 @@ class QueryTest extends FunSuite {
             |$e
           """.stripMargin)
     }
+
     if(prepareAnswer(convertedAnswer) != prepareAnswer(sparkAnswer)) {
       fail(s"""
         |Results do not match for query:
         |${rdd.logicalPlan}
         |== Analyzed Plan ==
         |${rdd.queryExecution.analyzed}
-        |== RDD ==
-        |$rdd
+        |== Physical Plan ==
+        |${rdd.queryExecution.executedPlan}
         |== Results ==
         |${sideBySide(
-            prepareAnswer(convertedAnswer).map(_.toString),
-            prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
+            s"== Correct Answer - ${convertedAnswer.size} ==" +:
+              prepareAnswer(convertedAnswer).map(_.toString),
+            s"== Spark Answer - ${sparkAnswer.size} ==" +:
+              prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
       """.stripMargin)
     }
   }
 
@@ -23,8 +23,9 @@ import org.apache.spark.sql.test._
 /* Implicits */
 import TestSQLContext._
 
+case class TestData(key: Int, value: String)
+
 object TestData {
-  case class TestData(key: Int, value: String)
   val testData: SchemaRDD = TestSQLContext.sparkContext.parallelize(
     (1 to 100).map(i => TestData(i, i.toString)))
   testData.registerAsTable("testData")
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@`
`17`	`17`
`18`	`18`	`package org.apache.spark.sql`
`19`	`19`
	`20`	`+import org.apache.spark.annotation.Experimental`
`20`	`21`	`import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation`
`21`	`22`	`import org.apache.spark.sql.catalyst.plans.logical._`
`22`	`23`
`@@ -66,27 +67,29 @@ trait SchemaRDDLike {`
`66`	`67`	`}`
`67`	`68`
`68`	`69`	`/**`
`69`		`- * <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>`
	`70`	`+ * :: Experimental ::`
`70`	`71`	`*`
`71`	`72`	`* Adds the rows from this RDD to the specified table, optionally overwriting the existing data.`
`72`	`73`	`*`
`73`	`74`	`* @group schema`
`74`	`75`	`*/`
	`76`	`+ @Experimental`
`75`	`77`	`def insertInto(tableName: String, overwrite: Boolean): Unit =`
`76`	`78`	`sqlContext.executePlan(`
`77`	`79`	`InsertIntoTable(UnresolvedRelation(None, tableName), Map.empty, logicalPlan, overwrite)).toRdd`
`78`	`80`
`79`	`81`	`/**`
`80`		`- * <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>`
	`82`	`+ * :: Experimental ::`
`81`	`83`	`*`
`82`	`84`	`* Appends the rows from this RDD to the specified table.`
`83`	`85`	`*`
`84`	`86`	`* @group schema`
`85`	`87`	`*/`
	`88`	`+ @Experimental`
`86`	`89`	`def insertInto(tableName: String): Unit = insertInto(tableName, false)`
`87`	`90`
`88`	`91`	`/**`
`89`		`- * <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>`
	`92`	`+ * :: Experimental ::`
`90`	`93`	`*`
`91`	`94`	`* Creates a table from the the contents of this SchemaRDD. This will fail if the table already`
`92`	`95`	`* exists.`
`@@ -98,7 +101,7 @@ trait SchemaRDDLike {`
`98`	`101`	`*`
`99`	`102`	`* @param tableName`
`100`	`103`	`*/`
`101`		`- def createTableAs(tableName: String) =`
`102`		`- sqlContext.executePlan(`
`103`		`- InsertIntoCreatedTable(None, tableName, logicalPlan))`
	`104`	`+ @Experimental`
	`105`	`+ def createTableAs(tableName: String): Unit =`
	`106`	`+ sqlContext.executePlan(InsertIntoCreatedTable(None, tableName, logicalPlan)).toRdd`
`104`	`107`	`}`