diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index a36423e67d750..dac6abb6ef919 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2225,7 +2225,7 @@ def json_tuple(col, *fields): >>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')] >>> df = spark.createDataFrame(data, ("key", "jstring")) >>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect() - [Row(key=u'1', c0=u'value1', c1=u'value2'), Row(key=u'2', c0=u'value12', c1=None)] + [Row(key=u'1', col1=u'value1', col2=u'value2'), Row(key=u'2', col1=u'value12', col2=None)] """ sc = SparkContext._active_spark_context jc = sc._jvm.functions.json_tuple(_to_java_column(col), _to_seq(sc, fields)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index 6b6da1c8b4142..2028b3ee5b297 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -179,7 +179,7 @@ case class Stack(children: Seq[Expression]) extends Generator { override def elementSchema: StructType = StructType(children.tail.take(numFields).zipWithIndex.map { - case (e, index) => StructField(s"col$index", e.dataType) + case (e, index) => StructField(s"col${index + 1}", e.dataType) }) override def eval(input: InternalRow): TraversableOnce[InternalRow] = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 655e44e4e4919..71d8f45e98f23 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -372,7 +372,7 @@ case class JsonTuple(children: Seq[Expression]) @transient private lazy val constantFields: Int = foldableFieldNames.count(_ != null) override def elementSchema: StructType = StructType(fieldExpressions.zipWithIndex.map { - case (_, idx) => StructField(s"c$idx", StringType, nullable = true) + case (_, idx) => StructField(s"col${idx + 1}", StringType, nullable = true) }) override def prettyName: String = "json_tuple" diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index ca0cd90d94fa7..cfa0eb2496d7b 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -163,7 +163,7 @@ Invalid number of arguments for function from_json. Expected: one of 2 and 3; Fo -- !query 18 SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL AS STRING), 'a') -- !query 18 schema -struct +struct -- !query 18 output NULL 2 NULL 1 @@ -179,7 +179,7 @@ struct<> -- !query 20 SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable -- !query 20 schema -struct +struct -- !query 20 output 2 NULL 1 diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out index 31ee700a8db95..9009a1b6e4c65 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out @@ -248,7 +248,7 @@ NULL -- !query 30 select json_tuple(a, a) from t -- !query 30 schema -struct +struct -- !query 30 output NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 24e7564259c83..f3290364e7fd5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -91,8 +91,8 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { val df: DataFrame = tuples.toDF("key", "jstring") val expr = df .select(functions.json_tuple($"jstring", "f1", "f2")) - .where($"c0".isNotNull) - .groupBy($"c1") + .where($"col1".isNotNull) + .groupBy($"col2") .count() val expected = Row(null, 1) :: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b8c4d73f1b2b4..16c640294299f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -141,6 +141,13 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil) } + test("SPARK-26879 Standardize One-Based column indexing for stack and json_tuple function") { + val dfstack = sql("SELECT stack(2, 1, 2, 3)") + assert(dfstack.columns(0) == "col1" && dfstack.columns(1) == "col2") + val dfjson_tuple = sql("SELECT json_tuple('{\"a\":1, \"b\":2}', 'a', 'b')") + assert(dfjson_tuple.columns(0) == "col1" && dfjson_tuple.columns(1) == "col2") + } + test("support table.star") { checkAnswer( sql( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index ce7661a914559..b436b8c8797ba 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1554,7 +1554,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { // we should use `c0`, `c1`... as the name of fields if no alias is provided, to follow hive. checkAnswer(sql( """ - |SELECT c0, c1 + |SELECT col1, col2 |FROM (SELECT '{"f1": "value1", "f2": 12}' json) test |LATERAL VIEW json_tuple(json, 'f1', 'f2') jt """.stripMargin), Row("value1", "12"))