diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index e530b4c9407a6..16e19141c3bb6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -505,7 +505,8 @@ class SparkSqlAstBuilder extends AstBuilder { } else { None } - (Seq.empty, Option(name), props.toSeq, recordHandler) + val finalProps = props ++ Seq("field.delim" -> props.getOrElse("field.delim", "\t")) + (Seq.empty, Option(name), finalProps.toSeq, recordHandler) case null => // Use default (serde) format. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala index 0876709c31899..266c526b1a24b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.scalatest.exceptions.TestFailedException import org.apache.spark.{SparkException, TestUtils} +import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} import org.apache.spark.sql.execution._ import org.apache.spark.sql.functions._ @@ -438,4 +439,66 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T assert(e2.contains("array cannot be converted to Hive TypeInfo")) } } + + test("SPARK-32685: When use specified serde, filed.delim's default value is '\t'") { + val query1 = sql( + """ + |SELECT split(value, "\t") FROM ( + |SELECT TRANSFORM(a, b, c) + |USING 'cat' + |FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t + |) temp; + """.stripMargin) + checkAnswer(query1, identity, Row(Seq("2", "3")) :: Nil) + + val query2 = sql( + """ + |SELECT split(value, "\t") FROM ( + |SELECT TRANSFORM(a, b, c) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'serialization.last.column.takes.rest' = 'true' + | ) + |FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t + |) temp; + """.stripMargin) + checkAnswer(query2, identity, Row(Seq("2", "3")) :: Nil) + + val query3 = sql( + """ + |SELECT split(value, "&") FROM ( + |SELECT TRANSFORM(a, b, c) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '&' + | ) + |USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'serialization.last.column.takes.rest' = 'true', + | 'field.delim' = '&' + | ) + |FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t + |) temp; + """.stripMargin) + checkAnswer(query3, identity, Row(Seq("2", "3")) :: Nil) + + val query4 = sql( + """ + |SELECT split(value, "&") FROM ( + |SELECT TRANSFORM(a, b, c) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'serialization.last.column.takes.rest' = 'true', + | 'field.delim' = '&' + | ) + |FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t + |) temp; + """.stripMargin) + checkAnswer(query4, identity, Row(null) :: Nil) + } }