Apply Xiao's review

maropu · maropu · commit ec5452fb6a3c · 2017-03-25T10:28:13.000+09:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -104,7 +104,11 @@ object DataType {
 
   def fromJson(json: String): DataType = parseDataType(parse(json))
 
-  def fromDdl(ddl: String): DataType = CatalystSqlParser.parseTableSchema(ddl)
+  /**
+   * Creates DataType for a given SQL DDL string, which is a comma separated list of field
+   * definitions, e.g., a INT, b STRING.
+   */
+  def fromDDL(ddl: String): DataType = CatalystSqlParser.parseTableSchema(ddl)
 
   private val nonDecimalNameToType = {
     Seq(NullType, DateType, TimestampType, BinaryType, IntegerType, BooleanType, LongType,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -169,22 +169,24 @@ class DataTypeSuite extends SparkFunSuite {
     assert(!arrayType.existsRecursively(_.isInstanceOf[IntegerType]))
   }
 
-  // Test json formats only for the types that DDL formats do not support
   def checkDataTypeFromJson(dataType: DataType): Unit = {
-    test(s"from json - $dataType") {
+    test(s"from Json - $dataType") {
       assert(DataType.fromJson(dataType.json) === dataType)
     }
   }
 
+  def checkDataTypeFromDDL(dataType: DataType): Unit = {
+    test(s"from DDL - $dataType") {
+      assert(DataType.fromDDL(s"a ${dataType.sql}") === new StructType().add("a", dataType))
+    }
+  }
+
   def checkDataTypeFromText(dataType: DataType): Unit = {
     checkDataTypeFromJson(dataType)
-
-    // Test DDL formats
-    test(s"from ddl - $dataType") {
-      assert(DataType.fromDdl(s"a ${dataType.sql}") === new StructType().add("a", dataType))
-    }
+    checkDataTypeFromDDL(dataType)
   }
 
+  // In some types, check json formats only because the types do not support DDL formats.
   checkDataTypeFromJson(NullType)
 
   checkDataTypeFromText(BooleanType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3056,19 +3056,18 @@ object functions {
    * with the specified schema. Returns `null`, in the case of an unparseable string.
    *
    * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string as a json string
+   * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1,
+   *               the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL
+   *               format is also supported for the schema.
    *
    * @group collection_funcs
    * @since 2.1.0
    */
   def from_json(e: Column, schema: String, options: java.util.Map[String, String]): Column = {
-    // Until Spark-2.1, we use json strings for defining schemas here. Since we add an user-friendly
-    // API in the DDL parser, we employ DDL formats for the case. To keep back-compatibility,
-    // we use `fromJson` first, and then try the new API.
     val dataType = try {
       DataType.fromJson(schema)
     } catch {
-      case NonFatal(_) => DataType.fromDdl(schema)
+      case NonFatal(_) => DataType.fromDDL(schema)
     }
     from_json(e, dataType, options)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
@@ -60,10 +60,6 @@ class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest with Predicat
         spark.read.format(dataSourceName)
           .option("dataSchema", dataSchemaWithPartition.json)
           .load(file.getCanonicalPath))
-      checkQueries(
-        spark.read.format(dataSourceName)
-          .option("dataSchema", "a INT, b STRING, p1 INT")
-          .load(file.getCanonicalPath))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.sources
 
-import scala.util.control.NonFatal
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
@@ -38,13 +36,7 @@ class SimpleTextSource extends TextBasedFileFormat with DataSourceRegister {
       sparkSession: SparkSession,
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
-    val schemaAsString = options("dataSchema")
-    val schema = try {
-      DataType.fromJson(schemaAsString)
-    } catch {
-      case NonFatal(_) => DataType.fromDdl(schemaAsString)
-    }
-    Some(schema.asInstanceOf[StructType])
+    Some(DataType.fromJson(options("dataSchema")).asInstanceOf[StructType])
   }
 
   override def prepareWrite(

Original file line number	Diff line number	Diff line change
`@@ -3056,19 +3056,18 @@ object functions {`
`3056`	`3056`	* with the specified schema. Returns `null`, in the case of an unparseable string.
`3057`	`3057`	`*`
`3058`	`3058`	`* @param e a string column containing JSON data.`
`3059`		`- * @param schema the schema to use when parsing the json string as a json string`
	`3059`	`+ * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1,`
	`3060`	`+ * the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL`
	`3061`	`+ * format is also supported for the schema.`
`3060`	`3062`	`*`
`3061`	`3063`	`* @group collection_funcs`
`3062`	`3064`	`* @since 2.1.0`
`3063`	`3065`	`*/`
`3064`	`3066`	`def from_json(e: Column, schema: String, options: java.util.Map[String, String]): Column = {`
`3065`		`- // Until Spark-2.1, we use json strings for defining schemas here. Since we add an user-friendly`
`3066`		`- // API in the DDL parser, we employ DDL formats for the case. To keep back-compatibility,`
`3067`		- // we use `fromJson` first, and then try the new API.
`3068`	`3067`	`val dataType = try {`
`3069`	`3068`	`DataType.fromJson(schema)`
`3070`	`3069`	`} catch {`
`3071`		`- case NonFatal(_) => DataType.fromDdl(schema)`
	`3070`	`+ case NonFatal(_) => DataType.fromDDL(schema)`
`3072`	`3071`	`}`
`3073`	`3072`	`from_json(e, dataType, options)`
`3074`	`3073`	`}`
Original file line number	Diff line number	Diff line change
`@@ -60,10 +60,6 @@ class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest with Predicat`
`60`	`60`	`spark.read.format(dataSourceName)`
`61`	`61`	`.option("dataSchema", dataSchemaWithPartition.json)`
`62`	`62`	`.load(file.getCanonicalPath))`
`63`		`- checkQueries(`
`64`		`- spark.read.format(dataSourceName)`
`65`		`- .option("dataSchema", "a INT, b STRING, p1 INT")`
`66`		`- .load(file.getCanonicalPath))`
`67`	`63`	`}`
`68`	`64`	`}`
`69`	`65`