Skip to content

Commit ec5452f

Browse files
committed
Apply Xiao's review
1 parent fb61535 commit ec5452f

File tree

5 files changed

+19
-26
lines changed

5 files changed

+19
-26
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,11 @@ object DataType {
104104

105105
def fromJson(json: String): DataType = parseDataType(parse(json))
106106

107-
def fromDdl(ddl: String): DataType = CatalystSqlParser.parseTableSchema(ddl)
107+
/**
108+
* Creates DataType for a given SQL DDL string, which is a comma separated list of field
109+
* definitions, e.g., a INT, b STRING.
110+
*/
111+
def fromDDL(ddl: String): DataType = CatalystSqlParser.parseTableSchema(ddl)
108112

109113
private val nonDecimalNameToType = {
110114
Seq(NullType, DateType, TimestampType, BinaryType, IntegerType, BooleanType, LongType,

sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -169,22 +169,24 @@ class DataTypeSuite extends SparkFunSuite {
169169
assert(!arrayType.existsRecursively(_.isInstanceOf[IntegerType]))
170170
}
171171

172-
// Test json formats only for the types that DDL formats do not support
173172
def checkDataTypeFromJson(dataType: DataType): Unit = {
174-
test(s"from json - $dataType") {
173+
test(s"from Json - $dataType") {
175174
assert(DataType.fromJson(dataType.json) === dataType)
176175
}
177176
}
178177

178+
def checkDataTypeFromDDL(dataType: DataType): Unit = {
179+
test(s"from DDL - $dataType") {
180+
assert(DataType.fromDDL(s"a ${dataType.sql}") === new StructType().add("a", dataType))
181+
}
182+
}
183+
179184
def checkDataTypeFromText(dataType: DataType): Unit = {
180185
checkDataTypeFromJson(dataType)
181-
182-
// Test DDL formats
183-
test(s"from ddl - $dataType") {
184-
assert(DataType.fromDdl(s"a ${dataType.sql}") === new StructType().add("a", dataType))
185-
}
186+
checkDataTypeFromDDL(dataType)
186187
}
187188

189+
// In some types, check json formats only because the types do not support DDL formats.
188190
checkDataTypeFromJson(NullType)
189191

190192
checkDataTypeFromText(BooleanType)

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3056,19 +3056,18 @@ object functions {
30563056
* with the specified schema. Returns `null`, in the case of an unparseable string.
30573057
*
30583058
* @param e a string column containing JSON data.
3059-
* @param schema the schema to use when parsing the json string as a json string
3059+
* @param schema the schema to use when parsing the json string as a json string. In Spark 2.1,
3060+
* the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL
3061+
* format is also supported for the schema.
30603062
*
30613063
* @group collection_funcs
30623064
* @since 2.1.0
30633065
*/
30643066
def from_json(e: Column, schema: String, options: java.util.Map[String, String]): Column = {
3065-
// Until Spark-2.1, we use json strings for defining schemas here. Since we add an user-friendly
3066-
// API in the DDL parser, we employ DDL formats for the case. To keep back-compatibility,
3067-
// we use `fromJson` first, and then try the new API.
30683067
val dataType = try {
30693068
DataType.fromJson(schema)
30703069
} catch {
3071-
case NonFatal(_) => DataType.fromDdl(schema)
3070+
case NonFatal(_) => DataType.fromDDL(schema)
30723071
}
30733072
from_json(e, dataType, options)
30743073
}

sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,6 @@ class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest with Predicat
6060
spark.read.format(dataSourceName)
6161
.option("dataSchema", dataSchemaWithPartition.json)
6262
.load(file.getCanonicalPath))
63-
checkQueries(
64-
spark.read.format(dataSourceName)
65-
.option("dataSchema", "a INT, b STRING, p1 INT")
66-
.load(file.getCanonicalPath))
6763
}
6864
}
6965

sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package org.apache.spark.sql.sources
1919

20-
import scala.util.control.NonFatal
21-
2220
import org.apache.hadoop.conf.Configuration
2321
import org.apache.hadoop.fs.{FileStatus, Path}
2422
import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
@@ -38,13 +36,7 @@ class SimpleTextSource extends TextBasedFileFormat with DataSourceRegister {
3836
sparkSession: SparkSession,
3937
options: Map[String, String],
4038
files: Seq[FileStatus]): Option[StructType] = {
41-
val schemaAsString = options("dataSchema")
42-
val schema = try {
43-
DataType.fromJson(schemaAsString)
44-
} catch {
45-
case NonFatal(_) => DataType.fromDdl(schemaAsString)
46-
}
47-
Some(schema.asInstanceOf[StructType])
39+
Some(DataType.fromJson(options("dataSchema")).asInstanceOf[StructType])
4840
}
4941

5042
override def prepareWrite(

0 commit comments

Comments
 (0)