diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index fe56bcb99117e..baf46b3c54c55 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -934,14 +934,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("SPARK-37646: lit") { assert(lit($"foo") == $"foo") - assert(lit('foo) == $"foo") + assert(lit(Symbol("foo")) == $"foo") assert(lit(1) == Column(Literal(1))) assert(lit(null) == Column(Literal(null, NullType))) } test("typedLit") { assert(typedLit($"foo") == $"foo") - assert(typedLit('foo) == $"foo") + assert(typedLit(Symbol("foo")) == $"foo") assert(typedLit(1) == Column(Literal(1))) assert(typedLit[String](null) == Column(Literal(null, StringType))) @@ -1029,17 +1029,17 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should throw an exception if any intermediate structs don't exist") { intercept[AnalysisException] { - structLevel2.withColumn("a", 'a.withField("x.b", lit(2))) + structLevel2.withColumn("a", Symbol("a").withField("x.b", lit(2))) }.getMessage should include("No such struct field x in a") intercept[AnalysisException] { - structLevel3.withColumn("a", 'a.withField("a.x.b", lit(2))) + structLevel3.withColumn("a", Symbol("a").withField("a.x.b", lit(2))) }.getMessage should include("No such struct field x in a") } test("withField should throw an exception if intermediate field is not a struct") { intercept[AnalysisException] { - structLevel1.withColumn("a", 'a.withField("b.a", lit(2))) + structLevel1.withColumn("a", Symbol("a").withField("b.a", lit(2))) }.getMessage should include("struct argument should be struct type, got: int") } @@ -1053,7 +1053,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructField("a", structType, nullable = false))), nullable = false)))) - structLevel2.withColumn("a", 'a.withField("a.b", lit(2))) + structLevel2.withColumn("a", Symbol("a").withField("a.b", lit(2))) }.getMessage should include("Ambiguous reference to fields") } @@ -1072,7 +1072,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add field to struct") { checkAnswer( - structLevel1.withColumn("a", 'a.withField("d", lit(4))), + structLevel1.withColumn("a", Symbol("a").withField("d", lit(4))), Row(Row(1, null, 3, 4)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1113,7 +1113,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add null field to struct") { checkAnswer( - structLevel1.withColumn("a", 'a.withField("d", lit(null).cast(IntegerType))), + structLevel1.withColumn("a", Symbol("a").withField("d", lit(null).cast(IntegerType))), Row(Row(1, null, 3, null)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1126,7 +1126,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add multiple fields to struct") { checkAnswer( - structLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("e", lit(5))), + structLevel1.withColumn("a", Symbol("a").withField("d", lit(4)).withField("e", lit(5))), Row(Row(1, null, 3, 4, 5)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1140,7 +1140,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add multiple fields to nullable struct") { checkAnswer( - nullableStructLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("e", lit(5))), + nullableStructLevel1.withColumn("a", Symbol("a") + .withField("d", lit(4)).withField("e", lit(5))), Row(null) :: Row(Row(1, null, 3, 4, 5)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1154,8 +1155,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add field to nested struct") { Seq( - structLevel2.withColumn("a", 'a.withField("a.d", lit(4))), - structLevel2.withColumn("a", 'a.withField("a", $"a.a".withField("d", lit(4)))) + structLevel2.withColumn("a", Symbol("a").withField("a.d", lit(4))), + structLevel2.withColumn("a", Symbol("a").withField("a", $"a.a".withField("d", lit(4)))) ).foreach { df => checkAnswer( df, @@ -1216,7 +1217,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add field to deeply nested struct") { checkAnswer( - structLevel3.withColumn("a", 'a.withField("a.a.d", lit(4))), + structLevel3.withColumn("a", Symbol("a").withField("a.a.d", lit(4))), Row(Row(Row(Row(1, null, 3, 4)))) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1233,7 +1234,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace field in struct") { checkAnswer( - structLevel1.withColumn("a", 'a.withField("b", lit(2))), + structLevel1.withColumn("a", Symbol("a").withField("b", lit(2))), Row(Row(1, 2, 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1245,7 +1246,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace field in nullable struct") { checkAnswer( - nullableStructLevel1.withColumn("a", 'a.withField("b", lit("foo"))), + nullableStructLevel1.withColumn("a", Symbol("a").withField("b", lit("foo"))), Row(null) :: Row(Row(1, "foo", 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1271,7 +1272,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace field with null value in struct") { checkAnswer( - structLevel1.withColumn("a", 'a.withField("c", lit(null).cast(IntegerType))), + structLevel1.withColumn("a", Symbol("a").withField("c", lit(null).cast(IntegerType))), Row(Row(1, null, null)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1283,7 +1284,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace multiple fields in struct") { checkAnswer( - structLevel1.withColumn("a", 'a.withField("a", lit(10)).withField("b", lit(20))), + structLevel1.withColumn("a", Symbol("a").withField("a", lit(10)).withField("b", lit(20))), Row(Row(10, 20, 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1295,7 +1296,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace multiple fields in nullable struct") { checkAnswer( - nullableStructLevel1.withColumn("a", 'a.withField("a", lit(10)).withField("b", lit(20))), + nullableStructLevel1.withColumn("a", Symbol("a").withField("a", lit(10)) + .withField("b", lit(20))), Row(null) :: Row(Row(10, 20, 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1308,7 +1310,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace field in nested struct") { Seq( structLevel2.withColumn("a", $"a".withField("a.b", lit(2))), - structLevel2.withColumn("a", 'a.withField("a", $"a.a".withField("b", lit(2)))) + structLevel2.withColumn("a", Symbol("a").withField("a", $"a.a".withField("b", lit(2)))) ).foreach { df => checkAnswer( df, @@ -1389,7 +1391,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - structLevel1.withColumn("a", 'a.withField("b", lit(100))), + structLevel1.withColumn("a", Symbol("a").withField("b", lit(100))), Row(Row(1, 100, 100)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1401,7 +1403,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace fields in struct in given order") { checkAnswer( - structLevel1.withColumn("a", 'a.withField("b", lit(2)).withField("b", lit(20))), + structLevel1.withColumn("a", Symbol("a").withField("b", lit(2)).withField("b", lit(20))), Row(Row(1, 20, 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1413,7 +1415,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add field and then replace same field in struct") { checkAnswer( - structLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("d", lit(5))), + structLevel1.withColumn("a", Symbol("a").withField("d", lit(4)).withField("d", lit(5))), Row(Row(1, null, 3, 5)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1437,7 +1439,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - df.withColumn("a", 'a.withField("`a.b`.`e.f`", lit(2))), + df.withColumn("a", Symbol("a").withField("`a.b`.`e.f`", lit(2))), Row(Row(Row(1, 2, 3))) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1449,7 +1451,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) intercept[AnalysisException] { - df.withColumn("a", 'a.withField("a.b.e.f", lit(2))) + df.withColumn("a", Symbol("a").withField("a.b.e.f", lit(2))) }.getMessage should include("No such struct field a in a.b") } @@ -1464,7 +1466,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace field in struct even if casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { checkAnswer( - mixedCaseStructLevel1.withColumn("a", 'a.withField("A", lit(2))), + mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("A", lit(2))), Row(Row(2, 1)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1473,7 +1475,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - mixedCaseStructLevel1.withColumn("a", 'a.withField("b", lit(2))), + mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("b", lit(2))), Row(Row(1, 2)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1486,7 +1488,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add field to struct because casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { checkAnswer( - mixedCaseStructLevel1.withColumn("a", 'a.withField("A", lit(2))), + mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("A", lit(2))), Row(Row(1, 1, 2)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1496,7 +1498,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - mixedCaseStructLevel1.withColumn("a", 'a.withField("b", lit(2))), + mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("b", lit(2))), Row(Row(1, 1, 2)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1524,7 +1526,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace nested field in struct even if casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { checkAnswer( - mixedCaseStructLevel2.withColumn("a", 'a.withField("A.a", lit(2))), + mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("A.a", lit(2))), Row(Row(Row(2, 1), Row(1, 1))) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1539,7 +1541,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - mixedCaseStructLevel2.withColumn("a", 'a.withField("b.a", lit(2))), + mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("b.a", lit(2))), Row(Row(Row(1, 1), Row(2, 1))) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1558,11 +1560,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should throw an exception because casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { intercept[AnalysisException] { - mixedCaseStructLevel2.withColumn("a", 'a.withField("A.a", lit(2))) + mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("A.a", lit(2))) }.getMessage should include("No such struct field A in a, B") intercept[AnalysisException] { - mixedCaseStructLevel2.withColumn("a", 'a.withField("b.a", lit(2))) + mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("b.a", lit(2))) }.getMessage should include("No such struct field b in a, B") } } @@ -1769,17 +1771,17 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should throw an exception if any intermediate structs don't exist") { intercept[AnalysisException] { - structLevel2.withColumn("a", 'a.dropFields("x.b")) + structLevel2.withColumn("a", Symbol("a").dropFields("x.b")) }.getMessage should include("No such struct field x in a") intercept[AnalysisException] { - structLevel3.withColumn("a", 'a.dropFields("a.x.b")) + structLevel3.withColumn("a", Symbol("a").dropFields("a.x.b")) }.getMessage should include("No such struct field x in a") } test("dropFields should throw an exception if intermediate field is not a struct") { intercept[AnalysisException] { - structLevel1.withColumn("a", 'a.dropFields("b.a")) + structLevel1.withColumn("a", Symbol("a").dropFields("b.a")) }.getMessage should include("struct argument should be struct type, got: int") } @@ -1793,13 +1795,13 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructField("a", structType, nullable = false))), nullable = false)))) - structLevel2.withColumn("a", 'a.dropFields("a.b")) + structLevel2.withColumn("a", Symbol("a").dropFields("a.b")) }.getMessage should include("Ambiguous reference to fields") } test("dropFields should drop field in struct") { checkAnswer( - structLevel1.withColumn("a", 'a.dropFields("b")), + structLevel1.withColumn("a", Symbol("a").dropFields("b")), Row(Row(1, 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1822,7 +1824,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should drop multiple fields in struct") { Seq( structLevel1.withColumn("a", $"a".dropFields("b", "c")), - structLevel1.withColumn("a", 'a.dropFields("b").dropFields("c")) + structLevel1.withColumn("a", Symbol("a").dropFields("b").dropFields("c")) ).foreach { df => checkAnswer( df, @@ -1836,7 +1838,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should throw an exception if no fields will be left in struct") { intercept[AnalysisException] { - structLevel1.withColumn("a", 'a.dropFields("a", "b", "c")) + structLevel1.withColumn("a", Symbol("a").dropFields("a", "b", "c")) }.getMessage should include("cannot drop all fields in struct") } @@ -1860,7 +1862,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should drop field in nested struct") { checkAnswer( - structLevel2.withColumn("a", 'a.dropFields("a.b")), + structLevel2.withColumn("a", Symbol("a").dropFields("a.b")), Row(Row(Row(1, 3))) :: Nil, StructType( Seq(StructField("a", StructType(Seq( @@ -1873,7 +1875,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should drop multiple fields in nested struct") { checkAnswer( - structLevel2.withColumn("a", 'a.dropFields("a.b", "a.c")), + structLevel2.withColumn("a", Symbol("a").dropFields("a.b", "a.c")), Row(Row(Row(1))) :: Nil, StructType( Seq(StructField("a", StructType(Seq( @@ -1910,7 +1912,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should drop field in deeply nested struct") { checkAnswer( - structLevel3.withColumn("a", 'a.dropFields("a.a.b")), + structLevel3.withColumn("a", Symbol("a").dropFields("a.a.b")), Row(Row(Row(Row(1, 3)))) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1934,7 +1936,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - structLevel1.withColumn("a", 'a.dropFields("b")), + structLevel1.withColumn("a", Symbol("a").dropFields("b")), Row(Row(1)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1945,7 +1947,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should drop field in struct even if casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { checkAnswer( - mixedCaseStructLevel1.withColumn("a", 'a.dropFields("A")), + mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("A")), Row(Row(1)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1953,7 +1955,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - mixedCaseStructLevel1.withColumn("a", 'a.dropFields("b")), + mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("b")), Row(Row(1)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1965,7 +1967,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should not drop field in struct because casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { checkAnswer( - mixedCaseStructLevel1.withColumn("a", 'a.dropFields("A")), + mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("A")), Row(Row(1, 1)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1974,7 +1976,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - mixedCaseStructLevel1.withColumn("a", 'a.dropFields("b")), + mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("b")), Row(Row(1, 1)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -1987,7 +1989,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should drop nested field in struct even if casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { checkAnswer( - mixedCaseStructLevel2.withColumn("a", 'a.dropFields("A.a")), + mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("A.a")), Row(Row(Row(1), Row(1, 1))) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -2001,7 +2003,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - mixedCaseStructLevel2.withColumn("a", 'a.dropFields("b.a")), + mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("b.a")), Row(Row(Row(1, 1), Row(1))) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -2019,18 +2021,18 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("dropFields should throw an exception because casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { intercept[AnalysisException] { - mixedCaseStructLevel2.withColumn("a", 'a.dropFields("A.a")) + mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("A.a")) }.getMessage should include("No such struct field A in a, B") intercept[AnalysisException] { - mixedCaseStructLevel2.withColumn("a", 'a.dropFields("b.a")) + mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("b.a")) }.getMessage should include("No such struct field b in a, B") } } test("dropFields should drop only fields that exist") { checkAnswer( - structLevel1.withColumn("a", 'a.dropFields("d")), + structLevel1.withColumn("a", Symbol("a").dropFields("d")), Row(Row(1, null, 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( @@ -2040,7 +2042,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) checkAnswer( - structLevel1.withColumn("a", 'a.dropFields("b", "d")), + structLevel1.withColumn("a", Symbol("a").dropFields("b", "d")), Row(Row(1, 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala index 076b64cde8c66..376fa2e95a8e2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala @@ -83,7 +83,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession // key "b" => (19:39:27 ~ 19:39:37) checkAnswer( - df.groupBy(session_window($"time", "10 seconds"), 'id) + df.groupBy(session_window($"time", "10 seconds"), Symbol("id")) .agg(count("*").as("counts"), sum("value").as("sum")) .orderBy($"session_window.start".asc) .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)", @@ -113,7 +113,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession // key "b" => (19:39:27 ~ 19:39:37) checkAnswer( - df.groupBy(session_window($"time", "10 seconds"), 'id) + df.groupBy(session_window($"time", "10 seconds"), Symbol("id")) .agg(count("*").as("counts"), sum_distinct(col("value")).as("sum")) .orderBy($"session_window.start".asc) .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)", @@ -142,7 +142,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession // key "b" => (19:39:27 ~ 19:39:37) checkAnswer( - df.groupBy(session_window($"time", "10 seconds"), 'id) + df.groupBy(session_window($"time", "10 seconds"), Symbol("id")) .agg(sum_distinct(col("value")).as("sum"), sum_distinct(col("value2")).as("sum2")) .orderBy($"session_window.start".asc) .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)", @@ -171,7 +171,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession // b => (19:39:27 ~ 19:39:37), (19:39:39 ~ 19:39:55) checkAnswer( - df.groupBy(session_window($"time", "10 seconds"), 'id) + df.groupBy(session_window($"time", "10 seconds"), Symbol("id")) .agg(count("*").as("counts"), sum("value").as("sum")) .orderBy($"session_window.start".asc) .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index a5403ec548d7e..3659f20fb6ec2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -593,7 +593,7 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit } test("SPARK-35884: Explain should only display one plan before AQE takes effect") { - val df = (0 to 10).toDF("id").where('id > 5) + val df = (0 to 10).toDF("id").where(Symbol("id") > 5) val modes = Seq(SimpleMode, ExtendedMode, CostMode, FormattedMode) modes.foreach { mode => checkKeywordsExistsInExplain(df, mode, "AdaptiveSparkPlan") @@ -608,7 +608,8 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit test("SPARK-35884: Explain formatted with subquery") { withTempView("t1", "t2") { - spark.range(100).select('id % 10 as "key", 'id as "value").createOrReplaceTempView("t1") + spark.range(100).select(Symbol("id") % 10 as "key", Symbol("id") as "value") + .createOrReplaceTempView("t1") spark.range(10).createOrReplaceTempView("t2") val query = """ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index 8024f24e2eb13..11886f80f9455 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -967,52 +967,57 @@ class FileBasedDataSourceSuite extends QueryTest // cases when value == MAX var v = Short.MaxValue - checkPushedFilters(format, df.where('id > v.toInt), Array(), noScan = true) - checkPushedFilters(format, df.where('id >= v.toInt), Array(sources.IsNotNull("id"), - sources.EqualTo("id", v))) - checkPushedFilters(format, df.where('id === v.toInt), Array(sources.IsNotNull("id"), - sources.EqualTo("id", v))) - checkPushedFilters(format, df.where('id <=> v.toInt), + checkPushedFilters(format, df.where(Symbol("id") > v.toInt), Array(), noScan = true) + checkPushedFilters(format, df.where(Symbol("id") >= v.toInt), + Array(sources.IsNotNull("id"), sources.EqualTo("id", v))) + checkPushedFilters(format, df.where(Symbol("id") === v.toInt), + Array(sources.IsNotNull("id"), sources.EqualTo("id", v))) + checkPushedFilters(format, df.where(Symbol("id") <=> v.toInt), Array(sources.EqualNullSafe("id", v))) - checkPushedFilters(format, df.where('id <= v.toInt), Array(sources.IsNotNull("id"))) - checkPushedFilters(format, df.where('id < v.toInt), Array(sources.IsNotNull("id"), - sources.Not(sources.EqualTo("id", v)))) + checkPushedFilters(format, df.where(Symbol("id") <= v.toInt), + Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(Symbol("id") < v.toInt), + Array(sources.IsNotNull("id"), sources.Not(sources.EqualTo("id", v)))) // cases when value > MAX var v1: Int = positiveInt - checkPushedFilters(format, df.where('id > v1), Array(), noScan = true) - checkPushedFilters(format, df.where('id >= v1), Array(), noScan = true) - checkPushedFilters(format, df.where('id === v1), Array(), noScan = true) - checkPushedFilters(format, df.where('id <=> v1), Array(), noScan = true) - checkPushedFilters(format, df.where('id <= v1), Array(sources.IsNotNull("id"))) - checkPushedFilters(format, df.where('id < v1), Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(Symbol("id") > v1), Array(), noScan = true) + checkPushedFilters(format, df.where(Symbol("id") >= v1), Array(), noScan = true) + checkPushedFilters(format, df.where(Symbol("id") === v1), Array(), noScan = true) + checkPushedFilters(format, df.where(Symbol("id") <=> v1), Array(), noScan = true) + checkPushedFilters(format, df.where(Symbol("id") <= v1), Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(Symbol("id") < v1), Array(sources.IsNotNull("id"))) // cases when value = MIN v = Short.MinValue - checkPushedFilters(format, df.where(lit(v.toInt) < 'id), Array(sources.IsNotNull("id"), - sources.Not(sources.EqualTo("id", v)))) - checkPushedFilters(format, df.where(lit(v.toInt) <= 'id), Array(sources.IsNotNull("id"))) - checkPushedFilters(format, df.where(lit(v.toInt) === 'id), Array(sources.IsNotNull("id"), + checkPushedFilters(format, df.where(lit(v.toInt) < Symbol("id")), + Array(sources.IsNotNull("id"), sources.Not(sources.EqualTo("id", v)))) + checkPushedFilters(format, df.where(lit(v.toInt) <= Symbol("id")), + Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(lit(v.toInt) === Symbol("id")), + Array(sources.IsNotNull("id"), sources.EqualTo("id", v))) - checkPushedFilters(format, df.where(lit(v.toInt) <=> 'id), + checkPushedFilters(format, df.where(lit(v.toInt) <=> Symbol("id")), Array(sources.EqualNullSafe("id", v))) - checkPushedFilters(format, df.where(lit(v.toInt) >= 'id), Array(sources.IsNotNull("id"), - sources.EqualTo("id", v))) - checkPushedFilters(format, df.where(lit(v.toInt) > 'id), Array(), noScan = true) + checkPushedFilters(format, df.where(lit(v.toInt) >= Symbol("id")), + Array(sources.IsNotNull("id"), sources.EqualTo("id", v))) + checkPushedFilters(format, df.where(lit(v.toInt) > Symbol("id")), Array(), noScan = true) // cases when value < MIN v1 = negativeInt - checkPushedFilters(format, df.where(lit(v1) < 'id), Array(sources.IsNotNull("id"))) - checkPushedFilters(format, df.where(lit(v1) <= 'id), Array(sources.IsNotNull("id"))) - checkPushedFilters(format, df.where(lit(v1) === 'id), Array(), noScan = true) - checkPushedFilters(format, df.where(lit(v1) >= 'id), Array(), noScan = true) - checkPushedFilters(format, df.where(lit(v1) > 'id), Array(), noScan = true) + checkPushedFilters(format, df.where(lit(v1) < Symbol("id")), + Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(lit(v1) <= Symbol("id")), + Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(lit(v1) === Symbol("id")), Array(), noScan = true) + checkPushedFilters(format, df.where(lit(v1) >= Symbol("id")), Array(), noScan = true) + checkPushedFilters(format, df.where(lit(v1) > Symbol("id")), Array(), noScan = true) // cases when value is within range (MIN, MAX) - checkPushedFilters(format, df.where('id > 30), Array(sources.IsNotNull("id"), + checkPushedFilters(format, df.where(Symbol("id") > 30), Array(sources.IsNotNull("id"), sources.GreaterThan("id", 30))) - checkPushedFilters(format, df.where(lit(100) >= 'id), Array(sources.IsNotNull("id"), - sources.LessThanOrEqual("id", 100))) + checkPushedFilters(format, df.where(lit(100) >= Symbol("id")), + Array(sources.IsNotNull("id"), sources.LessThanOrEqual("id", 100))) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala index 14b59ba23d09f..ce98fd27350a8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala @@ -85,10 +85,11 @@ trait FileScanSuiteBase extends SharedSparkSession { val options = new CaseInsensitiveStringMap(ImmutableMap.copyOf(optionsMap)) val optionsNotEqual = new CaseInsensitiveStringMap(ImmutableMap.copyOf(ImmutableMap.of("key2", "value2"))) - val partitionFilters = Seq(And(IsNull('data.int), LessThan('data.int, 0))) - val partitionFiltersNotEqual = Seq(And(IsNull('data.int), LessThan('data.int, 1))) - val dataFilters = Seq(And(IsNull('data.int), LessThan('data.int, 0))) - val dataFiltersNotEqual = Seq(And(IsNull('data.int), LessThan('data.int, 1))) + val partitionFilters = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 0))) + val partitionFiltersNotEqual = Seq(And(IsNull(Symbol("data").int), + LessThan(Symbol("data").int, 1))) + val dataFilters = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 0))) + val dataFiltersNotEqual = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 1))) scanBuilders.foreach { case (name, scanBuilder, exclusions) => test(s"SPARK-33482: Test $name equals") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 77493afe43145..ec6c863b8183f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -183,7 +183,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan test("inner join where, one match per row") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { checkAnswer( - upperCaseData.join(lowerCaseData).where('n === 'N), + upperCaseData.join(lowerCaseData).where(Symbol("n") === 'N), Seq( Row(1, "A", 1, "a"), Row(2, "B", 2, "b"), @@ -404,8 +404,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan test("full outer join") { withTempView("`left`", "`right`") { - upperCaseData.where('N <= 4).createOrReplaceTempView("`left`") - upperCaseData.where('N >= 3).createOrReplaceTempView("`right`") + upperCaseData.where(Symbol("N") <= 4).createOrReplaceTempView("`left`") + upperCaseData.where(Symbol("N") >= 3).createOrReplaceTempView("`right`") val left = UnresolvedRelation(TableIdentifier("left")) val right = UnresolvedRelation(TableIdentifier("right")) @@ -623,7 +623,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan testData.createOrReplaceTempView("B") testData2.createOrReplaceTempView("C") testData3.createOrReplaceTempView("D") - upperCaseData.where('N >= 3).createOrReplaceTempView("`right`") + upperCaseData.where(Symbol("N") >= 3).createOrReplaceTempView("`right`") val cartesianQueries = Seq( /** The following should error out since there is no explicit cross join */ "SELECT * FROM testData inner join testData2", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 6661b58b8f522..e18c087a26279 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -403,7 +403,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { test("SPARK-24709: infers schemas of json strings and pass them to from_json") { val in = Seq("""{"a": [1, 2, 3]}""").toDS() - val out = in.select(from_json('value, schema_of_json("""{"a": [1]}""")) as "parsed") + val out = in.select(from_json(Symbol("value"), schema_of_json("""{"a": [1]}""")) as "parsed") val expected = StructType(StructField( "parsed", StructType(StructField( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index f3bff7389ee74..ab52cb98208f2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -47,12 +47,12 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { c: Column => Column, f: T => U): Unit = { checkAnswer( - doubleData.select(c('a)), + doubleData.select(c(Symbol("a"))), (1 to 10).map(n => Row(f((n * 0.2 - 1).asInstanceOf[T]))) ) checkAnswer( - doubleData.select(c('b)), + doubleData.select(c(Symbol("b"))), (1 to 10).map(n => Row(f((-n * 0.2 + 1).asInstanceOf[T]))) ) @@ -65,13 +65,13 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { private def testOneToOneNonNegativeMathFunction(c: Column => Column, f: Double => Double): Unit = { checkAnswer( - nnDoubleData.select(c('a)), + nnDoubleData.select(c(Symbol("a"))), (1 to 10).map(n => Row(f(n * 0.1))) ) if (f(-1) === StrictMath.log1p(-1)) { checkAnswer( - nnDoubleData.select(c('b)), + nnDoubleData.select(c(Symbol("b"))), (1 to 9).map(n => Row(f(n * -0.1))) :+ Row(null) ) } @@ -87,12 +87,12 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { d: (Column, Double) => Column, f: (Double, Double) => Double): Unit = { checkAnswer( - nnDoubleData.select(c('a, 'a)), + nnDoubleData.select(c('a, Symbol("a"))), nnDoubleData.collect().toSeq.map(r => Row(f(r.getDouble(0), r.getDouble(0)))) ) checkAnswer( - nnDoubleData.select(c('a, 'b)), + nnDoubleData.select(c('a, Symbol("b"))), nnDoubleData.collect().toSeq.map(r => Row(f(r.getDouble(0), r.getDouble(1)))) ) @@ -109,7 +109,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { val nonNull = nullDoubles.collect().toSeq.filter(r => r.get(0) != null) checkAnswer( - nullDoubles.select(c('a, 'a)).orderBy('a.asc), + nullDoubles.select(c('a, Symbol("a"))).orderBy(Symbol("a").asc), Row(null) +: nonNull.map(r => Row(f(r.getDouble(0), r.getDouble(0)))) ) } @@ -255,7 +255,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("factorial") { val df = (0 to 5).map(i => (i, i)).toDF("a", "b") checkAnswer( - df.select(factorial('a)), + df.select(factorial(Symbol("a"))), Seq(Row(1), Row(1), Row(2), Row(6), Row(24), Row(120)) ) checkAnswer( @@ -271,11 +271,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("round/bround/ceil/floor") { val df = Seq(5, 55, 555).map(Tuple1(_)).toDF("a") checkAnswer( - df.select(round('a), round('a, -1), round('a, -2)), + df.select(round(Symbol("a")), round('a, -1), round('a, -2)), Seq(Row(5, 10, 0), Row(55, 60, 100), Row(555, 560, 600)) ) checkAnswer( - df.select(bround('a), bround('a, -1), bround('a, -2)), + df.select(bround(Symbol("a")), bround('a, -1), bround('a, -2)), Seq(Row(5, 0, 0), Row(55, 60, 100), Row(555, 560, 600)) ) checkAnswer( @@ -343,11 +343,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("round/bround/ceil/floor with data frame from a local Seq of Product") { val df = spark.createDataFrame(Seq(Tuple1(BigDecimal("5.9")))).toDF("value") checkAnswer( - df.withColumn("value_rounded", round('value)), + df.withColumn("value_rounded", round(Symbol("value"))), Seq(Row(BigDecimal("5.9"), BigDecimal("6"))) ) checkAnswer( - df.withColumn("value_brounded", bround('value)), + df.withColumn("value_brounded", bround(Symbol("value"))), Seq(Row(BigDecimal("5.9"), BigDecimal("6"))) ) checkAnswer( @@ -423,10 +423,10 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("hex") { val data = Seq((28, -28, 100800200404L, "hello")).toDF("a", "b", "c", "d") - checkAnswer(data.select(hex('a)), Seq(Row("1C"))) - checkAnswer(data.select(hex('b)), Seq(Row("FFFFFFFFFFFFFFE4"))) - checkAnswer(data.select(hex('c)), Seq(Row("177828FED4"))) - checkAnswer(data.select(hex('d)), Seq(Row("68656C6C6F"))) + checkAnswer(data.select(hex(Symbol("a"))), Seq(Row("1C"))) + checkAnswer(data.select(hex(Symbol("b"))), Seq(Row("FFFFFFFFFFFFFFE4"))) + checkAnswer(data.select(hex(Symbol("c"))), Seq(Row("177828FED4"))) + checkAnswer(data.select(hex(Symbol("d"))), Seq(Row("68656C6C6F"))) checkAnswer(data.selectExpr("hex(a)"), Seq(Row("1C"))) checkAnswer(data.selectExpr("hex(b)"), Seq(Row("FFFFFFFFFFFFFFE4"))) checkAnswer(data.selectExpr("hex(c)"), Seq(Row("177828FED4"))) @@ -436,8 +436,8 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("unhex") { val data = Seq(("1C", "737472696E67")).toDF("a", "b") - checkAnswer(data.select(unhex('a)), Row(Array[Byte](28.toByte))) - checkAnswer(data.select(unhex('b)), Row("string".getBytes(StandardCharsets.UTF_8))) + checkAnswer(data.select(unhex(Symbol("a"))), Row(Array[Byte](28.toByte))) + checkAnswer(data.select(unhex(Symbol("b"))), Row("string".getBytes(StandardCharsets.UTF_8))) checkAnswer(data.selectExpr("unhex(a)"), Row(Array[Byte](28.toByte))) checkAnswer(data.selectExpr("unhex(b)"), Row("string".getBytes(StandardCharsets.UTF_8))) checkAnswer(data.selectExpr("""unhex("##")"""), Row(null)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 326ea314ec68e..c28dde9cea09a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3066,15 +3066,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark val df = spark.read.format(format).load(dir.getCanonicalPath) checkPushedFilters( format, - df.where(('id < 2 and 's.contains("foo")) or ('id > 10 and 's.contains("bar"))), + df.where((Symbol("id") < 2 and Symbol("s").contains("foo")) or + (Symbol("id") > 10 and Symbol("s").contains("bar"))), Array(sources.Or(sources.LessThan("id", 2), sources.GreaterThan("id", 10)))) checkPushedFilters( format, - df.where('s.contains("foo") or ('id > 10 and 's.contains("bar"))), + df.where(Symbol("s").contains("foo") or + (Symbol("id") > 10 and Symbol("s").contains("bar"))), Array.empty) checkPushedFilters( format, - df.where('id < 2 and not('id > 10 and 's.contains("bar"))), + df.where(Symbol("id") < 2 and not(Symbol("id") > 10 and Symbol("s").contains("bar"))), Array(sources.IsNotNull("id"), sources.LessThan("id", 2))) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 57fc49ddc8131..c37309d97acae 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -407,9 +407,9 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared withTable("TBL1", "TBL") { import org.apache.spark.sql.functions._ val df = spark.range(1000L).select('id, - 'id * 2 as "FLD1", - 'id * 12 as "FLD2", - lit(null).cast(DoubleType) + 'id as "fld3") + Symbol("id") * 2 as "FLD1", + Symbol("id") * 12 as "FLD2", + lit(null).cast(DoubleType) + Symbol("id") as "fld3") df.write .mode(SaveMode.Overwrite) .bucketBy(10, "id", "FLD1", "FLD2") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala index d100cad89fcc1..e651459394fd9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala @@ -424,7 +424,7 @@ class UDFSuite extends QueryTest with SharedSparkSession { ("N", Integer.valueOf(3), null)).toDF("a", "b", "c") val udf1 = udf((a: String, b: Int, c: Any) => a + b + c) - val df = input.select(udf1('a, 'b, 'c)) + val df = input.select(udf1(Symbol("a"), 'b, 'c)) checkAnswer(df, Seq(Row("null1x"), Row(null), Row("N3null"))) // test Java UDF. Java UDF can't have primitive inputs, as it's generic typed. @@ -554,7 +554,7 @@ class UDFSuite extends QueryTest with SharedSparkSession { spark.udf.register("buildLocalDateInstantType", udf((d: LocalDate, i: Instant) => LocalDateInstantType(d, i))) checkAnswer(df.selectExpr(s"buildLocalDateInstantType(d, i) as di") - .select('di.cast(StringType)), + .select(Symbol("di").cast(StringType)), Row(s"{$expectedDate, $expectedInstant}") :: Nil) // test null cases @@ -584,7 +584,7 @@ class UDFSuite extends QueryTest with SharedSparkSession { spark.udf.register("buildTimestampInstantType", udf((t: Timestamp, i: Instant) => TimestampInstantType(t, i))) checkAnswer(df.selectExpr("buildTimestampInstantType(t, i) as ti") - .select('ti.cast(StringType)), + .select(Symbol("ti").cast(StringType)), Row(s"{$expectedTimestamp, $expectedInstant}")) // test null cases diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala index cc52b6d8a14a7..729312c3e5912 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala @@ -82,14 +82,14 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque } test("register user type: MyDenseVector for MyLabeledPoint") { - val labels: RDD[Double] = pointsRDD.select('label).rdd.map { case Row(v: Double) => v } + val labels: RDD[Double] = pointsRDD.select(Symbol("label")).rdd.map { case Row(v: Double) => v } val labelsArrays: Array[Double] = labels.collect() assert(labelsArrays.size === 2) assert(labelsArrays.contains(1.0)) assert(labelsArrays.contains(0.0)) val features: RDD[TestUDT.MyDenseVector] = - pointsRDD.select('features).rdd.map { case Row(v: TestUDT.MyDenseVector) => v } + pointsRDD.select(Symbol("features")).rdd.map { case Row(v: TestUDT.MyDenseVector) => v } val featuresArrays: Array[TestUDT.MyDenseVector] = features.collect() assert(featuresArrays.size === 2) assert(featuresArrays.contains(new TestUDT.MyDenseVector(Array(0.1, 1.0)))) @@ -137,8 +137,9 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque val df = Seq((1, vec)).toDF("int", "vec") assert(vec === df.collect()(0).getAs[TestUDT.MyDenseVector](1)) assert(vec === df.take(1)(0).getAs[TestUDT.MyDenseVector](1)) - checkAnswer(df.limit(1).groupBy('int).agg(first('vec)), Row(1, vec)) - checkAnswer(df.orderBy('int).limit(1).groupBy('int).agg(first('vec)), Row(1, vec)) + checkAnswer(df.limit(1).groupBy(Symbol("int")).agg(first(Symbol("vec"))), Row(1, vec)) + checkAnswer(df.orderBy(Symbol("int")).limit(1).groupBy(Symbol("int")) + .agg(first(Symbol("vec"))), Row(1, vec)) } test("UDTs with JSON") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala index 3edc4b9502064..05aafceb36ec7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala @@ -210,7 +210,7 @@ private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2Sessio verifyTable(t1, df) // Check that appends are by name - df.select('data, 'id).write.format(v2Format).mode("append").saveAsTable(t1) + df.select(Symbol("data"), Symbol("id")).write.format(v2Format).mode("append").saveAsTable(t1) verifyTable(t1, df.union(df)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala index dd810a70d1585..03dcfcf7ddc7d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala @@ -93,7 +93,7 @@ class DataSourceV2DataFrameSuite assert(spark.table(t1).count() === 0) // appends are by name not by position - df.select('data, 'id).write.mode("append").saveAsTable(t1) + df.select(Symbol("data"), Symbol("id")).write.mode("append").saveAsTable(t1) checkAnswer(spark.table(t1), df) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala index fd3c69eff5652..23164edddaeed 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala @@ -80,8 +80,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS withClue(cls.getName) { val df = spark.read.format(cls.getName).load() checkAnswer(df, (0 until 10).map(i => Row(i, -i))) - checkAnswer(df.select('j), (0 until 10).map(i => Row(-i))) - checkAnswer(df.filter('i > 5), (6 until 10).map(i => Row(i, -i))) + checkAnswer(df.select(Symbol("j")), (0 until 10).map(i => Row(-i))) + checkAnswer(df.filter(Symbol("i") > 5), (6 until 10).map(i => Row(i, -i))) } } } @@ -92,7 +92,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS val df = spark.read.format(cls.getName).load() checkAnswer(df, (0 until 10).map(i => Row(i, -i))) - val q1 = df.select('j) + val q1 = df.select(Symbol("j")) checkAnswer(q1, (0 until 10).map(i => Row(-i))) if (cls == classOf[AdvancedDataSourceV2]) { val batch = getBatch(q1) @@ -104,7 +104,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS assert(batch.requiredSchema.fieldNames === Seq("j")) } - val q2 = df.filter('i > 3) + val q2 = df.filter(Symbol("i") > 3) checkAnswer(q2, (4 until 10).map(i => Row(i, -i))) if (cls == classOf[AdvancedDataSourceV2]) { val batch = getBatch(q2) @@ -116,7 +116,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS assert(batch.requiredSchema.fieldNames === Seq("i", "j")) } - val q3 = df.select('i).filter('i > 6) + val q3 = df.select(Symbol("i")).filter(Symbol("i") > 6) checkAnswer(q3, (7 until 10).map(i => Row(i))) if (cls == classOf[AdvancedDataSourceV2]) { val batch = getBatch(q3) @@ -128,16 +128,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS assert(batch.requiredSchema.fieldNames === Seq("i")) } - val q4 = df.select('j).filter('j < -10) + val q4 = df.select(Symbol("j")).filter(Symbol("j") < -10) checkAnswer(q4, Nil) if (cls == classOf[AdvancedDataSourceV2]) { val batch = getBatch(q4) - // 'j < 10 is not supported by the testing data source. + // Symbol("j") < 10 is not supported by the testing data source. assert(batch.filters.isEmpty) assert(batch.requiredSchema.fieldNames === Seq("j")) } else { val batch = getJavaBatch(q4) - // 'j < 10 is not supported by the testing data source. + // Symbol("j") < 10 is not supported by the testing data source. assert(batch.filters.isEmpty) assert(batch.requiredSchema.fieldNames === Seq("j")) } @@ -152,7 +152,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS val df = spark.read.format(cls.getName).load() checkAnswer(df, (0 until 10).map(i => Row(i, -i))) - val q1 = df.select('j) + val q1 = df.select(Symbol("j")) checkAnswer(q1, (0 until 10).map(i => Row(-i))) if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) { val batch = getBatchWithV2Filter(q1) @@ -164,7 +164,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS assert(batch.requiredSchema.fieldNames === Seq("j")) } - val q2 = df.filter('i > 3) + val q2 = df.filter(Symbol("i") > 3) checkAnswer(q2, (4 until 10).map(i => Row(i, -i))) if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) { val batch = getBatchWithV2Filter(q2) @@ -176,7 +176,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS assert(batch.requiredSchema.fieldNames === Seq("i", "j")) } - val q3 = df.select('i).filter('i > 6) + val q3 = df.select(Symbol("i")).filter(Symbol("i") > 6) checkAnswer(q3, (7 until 10).map(i => Row(i))) if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) { val batch = getBatchWithV2Filter(q3) @@ -188,16 +188,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS assert(batch.requiredSchema.fieldNames === Seq("i")) } - val q4 = df.select('j).filter('j < -10) + val q4 = df.select(Symbol("j")).filter(Symbol("j") < -10) checkAnswer(q4, Nil) if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) { val batch = getBatchWithV2Filter(q4) - // 'j < 10 is not supported by the testing data source. + // Symbol("j") < 10 is not supported by the testing data source. assert(batch.filters.isEmpty) assert(batch.requiredSchema.fieldNames === Seq("j")) } else { val batch = getJavaBatchWithV2Filter(q4) - // 'j < 10 is not supported by the testing data source. + // Symbol("j") < 10 is not supported by the testing data source. assert(batch.filters.isEmpty) assert(batch.requiredSchema.fieldNames === Seq("j")) } @@ -210,8 +210,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS withClue(cls.getName) { val df = spark.read.format(cls.getName).load() checkAnswer(df, (0 until 90).map(i => Row(i, -i))) - checkAnswer(df.select('j), (0 until 90).map(i => Row(-i))) - checkAnswer(df.filter('i > 50), (51 until 90).map(i => Row(i, -i))) + checkAnswer(df.select(Symbol("j")), (0 until 90).map(i => Row(-i))) + checkAnswer(df.filter(Symbol("i") > 50), (51 until 90).map(i => Row(i, -i))) } } } @@ -235,12 +235,12 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS "supports external metadata") { withTempDir { dir => val cls = classOf[SupportsExternalMetadataWritableDataSource].getName - spark.range(10).select('id as 'i, -'id as 'j).write.format(cls) - .option("path", dir.getCanonicalPath).mode("append").save() + spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.format(cls).option("path", dir.getCanonicalPath).mode("append").save() val schema = new StructType().add("i", "long").add("j", "long") checkAnswer( spark.read.format(cls).option("path", dir.getCanonicalPath).schema(schema).load(), - spark.range(10).select('id, -'id)) + spark.range(10).select(Symbol("id"), -Symbol("id"))) } } @@ -251,25 +251,25 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS val df = spark.read.format(cls.getName).load() checkAnswer(df, Seq(Row(1, 4), Row(1, 4), Row(3, 6), Row(2, 6), Row(4, 2), Row(4, 2))) - val groupByColA = df.groupBy('i).agg(sum('j)) + val groupByColA = df.groupBy(Symbol("i")).agg(sum(Symbol("j"))) checkAnswer(groupByColA, Seq(Row(1, 8), Row(2, 6), Row(3, 6), Row(4, 4))) assert(collectFirst(groupByColA.queryExecution.executedPlan) { case e: ShuffleExchangeExec => e }.isEmpty) - val groupByColAB = df.groupBy('i, 'j).agg(count("*")) + val groupByColAB = df.groupBy(Symbol("i"), Symbol("j")).agg(count("*")) checkAnswer(groupByColAB, Seq(Row(1, 4, 2), Row(2, 6, 1), Row(3, 6, 1), Row(4, 2, 2))) assert(collectFirst(groupByColAB.queryExecution.executedPlan) { case e: ShuffleExchangeExec => e }.isEmpty) - val groupByColB = df.groupBy('j).agg(sum('i)) + val groupByColB = df.groupBy(Symbol("j")).agg(sum(Symbol("i"))) checkAnswer(groupByColB, Seq(Row(2, 8), Row(4, 2), Row(6, 5))) assert(collectFirst(groupByColB.queryExecution.executedPlan) { case e: ShuffleExchangeExec => e }.isDefined) - val groupByAPlusB = df.groupBy('i + 'j).agg(count("*")) + val groupByAPlusB = df.groupBy(Symbol("i") + Symbol("j")).agg(count("*")) checkAnswer(groupByAPlusB, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1))) assert(collectFirst(groupByAPlusB.queryExecution.executedPlan) { case e: ShuffleExchangeExec => e @@ -307,37 +307,43 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS val path = file.getCanonicalPath assert(spark.read.format(cls.getName).option("path", path).load().collect().isEmpty) - spark.range(10).select('id as 'i, -'id as 'j).write.format(cls.getName) + spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.format(cls.getName) .option("path", path).mode("append").save() checkAnswer( spark.read.format(cls.getName).option("path", path).load(), - spark.range(10).select('id, -'id)) + spark.range(10).select(Symbol("id"), -Symbol("id"))) // default save mode is ErrorIfExists intercept[AnalysisException] { - spark.range(10).select('id as 'i, -'id as 'j).write.format(cls.getName) + spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.format(cls.getName) .option("path", path).save() } - spark.range(10).select('id as 'i, -'id as 'j).write.mode("append").format(cls.getName) + spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.mode("append").format(cls.getName) .option("path", path).save() checkAnswer( spark.read.format(cls.getName).option("path", path).load(), - spark.range(10).union(spark.range(10)).select('id, -'id)) + spark.range(10).union(spark.range(10)).select(Symbol("id"), -Symbol("id"))) - spark.range(5).select('id as 'i, -'id as 'j).write.format(cls.getName) + spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.format(cls.getName) .option("path", path).mode("overwrite").save() checkAnswer( spark.read.format(cls.getName).option("path", path).load(), - spark.range(5).select('id, -'id)) + spark.range(5).select(Symbol("id"), -Symbol("id"))) val e = intercept[AnalysisException] { - spark.range(5).select('id as 'i, -'id as 'j).write.format(cls.getName) + spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.format(cls.getName) .option("path", path).mode("ignore").save() } assert(e.message.contains("please use Append or Overwrite modes instead")) val e2 = intercept[AnalysisException] { - spark.range(5).select('id as 'i, -'id as 'j).write.format(cls.getName) + spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.format(cls.getName) .option("path", path).mode("error").save() } assert(e2.getMessage.contains("please use Append or Overwrite modes instead")) @@ -354,7 +360,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS } } // this input data will fail to read middle way. - val input = spark.range(15).select(failingUdf('id).as('i)).select('i, -'i as 'j) + val input = spark.range(15).select(failingUdf(Symbol("id")).as(Symbol("i"))) + .select(Symbol("i"), -Symbol("i") as Symbol("j")) val e3 = intercept[SparkException] { input.write.format(cls.getName).option("path", path).mode("overwrite").save() } @@ -374,11 +381,13 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS assert(spark.read.format(cls.getName).option("path", path).load().collect().isEmpty) val numPartition = 6 - spark.range(0, 10, 1, numPartition).select('id as 'i, -'id as 'j).write.format(cls.getName) + spark.range(0, 10, 1, numPartition) + .select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.format(cls.getName) .mode("append").option("path", path).save() checkAnswer( spark.read.format(cls.getName).option("path", path).load(), - spark.range(10).select('id, -'id)) + spark.range(10).select(Symbol("id"), -Symbol("id"))) assert(SimpleCounter.getCounter == numPartition, "method onDataWriterCommit should be called as many as the number of partitions") @@ -395,7 +404,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS test("SPARK-23301: column pruning with arbitrary expressions") { val df = spark.read.format(classOf[AdvancedDataSourceV2].getName).load() - val q1 = df.select('i + 1) + val q1 = df.select(Symbol("i") + 1) checkAnswer(q1, (1 until 11).map(i => Row(i))) val batch1 = getBatch(q1) assert(batch1.requiredSchema.fieldNames === Seq("i")) @@ -406,14 +415,14 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS assert(batch2.requiredSchema.isEmpty) // 'j === 1 can't be pushed down, but we should still be able do column pruning - val q3 = df.filter('j === -1).select('j * 2) + val q3 = df.filter(Symbol("j") === -1).select(Symbol("j") * 2) checkAnswer(q3, Row(-2)) val batch3 = getBatch(q3) assert(batch3.filters.isEmpty) assert(batch3.requiredSchema.fieldNames === Seq("j")) // column pruning should work with other operators. - val q4 = df.sort('i).limit(1).select('i + 1) + val q4 = df.sort(Symbol("i")).limit(1).select(Symbol("i") + 1) checkAnswer(q4, Row(1)) val batch4 = getBatch(q4) assert(batch4.requiredSchema.fieldNames === Seq("i")) @@ -435,7 +444,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS val df = spark.read.format(classOf[AdvancedDataSourceV2].getName).load() checkCanonicalizedOutput(df, 2, 2) - checkCanonicalizedOutput(df.select('i), 2, 1) + checkCanonicalizedOutput(df.select(Symbol("i")), 2, 1) } test("SPARK-25425: extra options should override sessions options during reading") { @@ -474,7 +483,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS withTempView("t1") { val t2 = spark.read.format(classOf[SimpleDataSourceV2].getName).load() Seq(2, 3).toDF("a").createTempView("t1") - val df = t2.where("i < (select max(a) from t1)").select('i) + val df = t2.where("i < (select max(a) from t1)").select(Symbol("i")) val subqueries = stripAQEPlan(df.queryExecution.executedPlan).collect { case p => p.subqueries }.flatten @@ -493,8 +502,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS Seq(classOf[AdvancedDataSourceV2], classOf[JavaAdvancedDataSourceV2]).foreach { cls => withClue(cls.getName) { val df = spark.read.format(cls.getName).load() - val q1 = df.select('i).filter('i > 6) - val q2 = df.select('i).filter('i > 5) + val q1 = df.select(Symbol("i")).filter(Symbol("i") > 6) + val q2 = df.select(Symbol("i")).filter(Symbol("i") > 5) val scan1 = getScanExec(q1) val scan2 = getScanExec(q2) assert(!scan1.equals(scan2)) @@ -507,7 +516,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS withClue(cls.getName) { val df = spark.read.format(cls.getName).load() // before SPARK-33267 below query just threw NPE - df.select('i).where("i in (1, null)").collect() + df.select(Symbol("i")).where("i in (1, null)").collect() } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala index 9cb524c2c3822..473f679b4b99d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala @@ -75,7 +75,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with saveMode: SaveMode, withCatalogOption: Option[String], partitionBy: Seq[String]): Unit = { - val df = spark.range(10).withColumn("part", 'id % 5) + val df = spark.range(10).withColumn("part", Symbol("id") % 5) val dfw = df.write.format(format).mode(saveMode).option("name", "t1") withCatalogOption.foreach(cName => dfw.option("catalog", cName)) dfw.partitionBy(partitionBy: _*).save() @@ -140,7 +140,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with test("Ignore mode if table exists - session catalog") { sql(s"create table t1 (id bigint) using $format") - val df = spark.range(10).withColumn("part", 'id % 5) + val df = spark.range(10).withColumn("part", Symbol("id") % 5) val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1") dfw.save() @@ -152,7 +152,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with test("Ignore mode if table exists - testcat catalog") { sql(s"create table $catalogName.t1 (id bigint) using $format") - val df = spark.range(10).withColumn("part", 'id % 5) + val df = spark.range(10).withColumn("part", Symbol("id") % 5) val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1") dfw.option("catalog", catalogName).save() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala index a33b9fad7ff4f..06fc2022c01ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala @@ -35,9 +35,9 @@ class AggregatingAccumulatorSuite extends SparkFunSuite with SharedSparkSession with ExpressionEvalHelper { - private val a = 'a.long - private val b = 'b.string - private val c = 'c.double + private val a = Symbol("a").long + private val b = Symbol("b").string + private val c = Symbol("c").double private val inputAttributes = Seq(a, b, c) private def str(s: String): UTF8String = UTF8String.fromString(s) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index f774c4504bb43..09a880a706b0f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -133,8 +133,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU """.stripMargin) checkAnswer(query, identity, df.select( - 'a.cast("string"), - 'b.cast("string"), + Symbol("a").cast("string"), + Symbol("b").cast("string"), 'c.cast("string"), 'd.cast("string"), 'e.cast("string")).collect()) @@ -164,7 +164,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU 'b.cast("string").as("value")).collect()) checkAnswer( - df.select('a, 'b), + df.select(Symbol("a"), Symbol("b")), (child: SparkPlan) => createScriptTransformationExec( script = "cat", output = Seq( @@ -178,7 +178,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU 'b.cast("string").as("value")).collect()) checkAnswer( - df.select('a), + df.select(Symbol("a")), (child: SparkPlan) => createScriptTransformationExec( script = "cat", output = Seq( @@ -242,7 +242,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU child = child, ioschema = serde ), - df.select('a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j).collect()) + df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e"), + Symbol("f"), Symbol("g"), Symbol("h"), Symbol("i"), Symbol("j")).collect()) } } } @@ -282,7 +283,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU child = child, ioschema = defaultIOSchema ), - df.select('a, 'b, 'c, 'd, 'e).collect()) + df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e")).collect()) } } @@ -304,7 +305,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU |USING 'cat' AS (a timestamp, b date) |FROM v """.stripMargin) - checkAnswer(query, identity, df.select('a, 'b).collect()) + checkAnswer(query, identity, df.select(Symbol("a"), Symbol("b")).collect()) } } } @@ -379,7 +380,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) checkAnswer( - df.select('a, 'b), + df.select(Symbol("a"), Symbol("b")), (child: SparkPlan) => createScriptTransformationExec( script = "cat", output = Seq( @@ -452,10 +453,10 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU (Array(6, 7, 8), Array(Array(6, 7), Array(8)), Map("c" -> 3), Map("d" -> Array("e", "f"))) ).toDF("a", "b", "c", "d") - .select('a, 'b, 'c, 'd, - struct('a, 'b).as("e"), - struct('a, 'd).as("f"), - struct(struct('a, 'b), struct('a, 'd)).as("g") + .select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), + struct(Symbol("a"), Symbol("b")).as("e"), + struct(Symbol("a"), Symbol("d")).as("f"), + struct(struct(Symbol("a"), Symbol("b")), struct(Symbol("a"), Symbol("d"))).as("g") ) checkAnswer( @@ -483,7 +484,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU child = child, ioschema = defaultIOSchema ), - df.select('a, 'b, 'c, 'd, 'e, 'f, 'g).collect()) + df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e"), + Symbol("f"), Symbol("g")).collect()) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala index 4ff96e6574cac..e4f17eb60108d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala @@ -26,9 +26,11 @@ class CoGroupedIteratorSuite extends SparkFunSuite with ExpressionEvalHelper { test("basic") { val leftInput = Seq(create_row(1, "a"), create_row(1, "b"), create_row(2, "c")).iterator val rightInput = Seq(create_row(1, 2L), create_row(2, 3L), create_row(3, 4L)).iterator - val leftGrouped = GroupedIterator(leftInput, Seq('i.int.at(0)), Seq('i.int, 's.string)) - val rightGrouped = GroupedIterator(rightInput, Seq('i.int.at(0)), Seq('i.int, 'l.long)) - val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq('i.int)) + val leftGrouped = GroupedIterator(leftInput, Seq(Symbol("i").int.at(0)), + Seq(Symbol("i").int, Symbol("s").string)) + val rightGrouped = GroupedIterator(rightInput, Seq(Symbol("i").int.at(0)), + Seq(Symbol("i").int, Symbol("l").long)) + val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq(Symbol("i").int)) val result = cogrouped.map { case (key, leftData, rightData) => @@ -52,7 +54,8 @@ class CoGroupedIteratorSuite extends SparkFunSuite with ExpressionEvalHelper { test("SPARK-11393: respect the fact that GroupedIterator.hasNext is not idempotent") { val leftInput = Seq(create_row(2, "a")).iterator val rightInput = Seq(create_row(1, 2L)).iterator - val leftGrouped = GroupedIterator(leftInput, Seq('i.int.at(0)), Seq('i.int, 's.string)) + val leftGrouped = GroupedIterator(leftInput, Seq(Symbol("i").int.at(0)), + Seq(Symbol("i").int, Symbol("s").string)) val rightGrouped = GroupedIterator(rightInput, Seq('i.int.at(0)), Seq('i.int, 'l.long)) val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq('i.int)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala index 4b2a2b439c89e..06c51cee02019 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala @@ -32,7 +32,7 @@ class GroupedIteratorSuite extends SparkFunSuite { val fromRow = encoder.createDeserializer() val input = Seq(Row(1, "a"), Row(1, "b"), Row(2, "c")) val grouped = GroupedIterator(input.iterator.map(toRow), - Seq('i.int.at(0)), schema.toAttributes) + Seq(Symbol("i").int.at(0)), schema.toAttributes) val result = grouped.map { case (key, data) => @@ -59,7 +59,7 @@ class GroupedIteratorSuite extends SparkFunSuite { Row(3, 2L, "e")) val grouped = GroupedIterator(input.iterator.map(toRow), - Seq('i.int.at(0), 'l.long.at(1)), schema.toAttributes) + Seq(Symbol("i").int.at(0), Symbol("l").long.at(1)), schema.toAttributes) val result = grouped.map { case (key, data) => @@ -80,7 +80,7 @@ class GroupedIteratorSuite extends SparkFunSuite { val toRow = encoder.createSerializer() val input = Seq(Row(1, "a"), Row(1, "b"), Row(2, "c")) val grouped = GroupedIterator(input.iterator.map(toRow), - Seq('i.int.at(0)), schema.toAttributes) + Seq(Symbol("i").int.at(0)), schema.toAttributes) assert(grouped.length == 2) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index 2ab1b6d4963a5..dfc1b70cf4a5d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -59,18 +59,21 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("count is partially aggregated") { - val query = testData.groupBy('value).agg(count('key)).queryExecution.analyzed + val query = testData.groupBy(Symbol("value")).agg(count(Symbol("key"))).queryExecution.analyzed testPartialAggregationPlan(query) } test("count distinct is partially aggregated") { - val query = testData.groupBy('value).agg(count_distinct('key)).queryExecution.analyzed + val query = testData.groupBy(Symbol("value")).agg(count_distinct(Symbol("key"))) + .queryExecution.analyzed testPartialAggregationPlan(query) } test("mixed aggregates are partially aggregated") { val query = - testData.groupBy('value).agg(count('value), count_distinct('key)).queryExecution.analyzed + testData.groupBy(Symbol("value")) + .agg(count(Symbol("value")), count_distinct(Symbol("key"))) + .queryExecution.analyzed testPartialAggregationPlan(query) } @@ -193,45 +196,47 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("efficient terminal limit -> sort should use TakeOrderedAndProject") { - val query = testData.select('key, 'value).sort('key).limit(2) + val query = testData.select(Symbol("key"), Symbol("value")).sort(Symbol("key")).limit(2) val planned = query.queryExecution.executedPlan assert(planned.isInstanceOf[execution.TakeOrderedAndProjectExec]) - assert(planned.output === testData.select('key, 'value).logicalPlan.output) + assert(planned.output === testData.select(Symbol("key"), Symbol("value")).logicalPlan.output) } test("terminal limit -> project -> sort should use TakeOrderedAndProject") { - val query = testData.select('key, 'value).sort('key).select('value, 'key).limit(2) + val query = testData.select(Symbol("key"), Symbol("value")).sort(Symbol("key")) + .select(Symbol("value"), Symbol("key")).limit(2) val planned = query.queryExecution.executedPlan assert(planned.isInstanceOf[execution.TakeOrderedAndProjectExec]) - assert(planned.output === testData.select('value, 'key).logicalPlan.output) + assert(planned.output === testData.select(Symbol("value"), Symbol("key")).logicalPlan.output) } test("terminal limits that are not handled by TakeOrderedAndProject should use CollectLimit") { - val query = testData.select('value).limit(2) + val query = testData.select(Symbol("value")).limit(2) val planned = query.queryExecution.sparkPlan assert(planned.isInstanceOf[CollectLimitExec]) - assert(planned.output === testData.select('value).logicalPlan.output) + assert(planned.output === testData.select(Symbol("value")).logicalPlan.output) } test("TakeOrderedAndProject can appear in the middle of plans") { - val query = testData.select('key, 'value).sort('key).limit(2).filter('key === 3) + val query = testData.select(Symbol("key"), Symbol("value")) + .sort(Symbol("key")).limit(2).filter('key === 3) val planned = query.queryExecution.executedPlan assert(planned.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isDefined) } test("CollectLimit can appear in the middle of a plan when caching is used") { - val query = testData.select('key, 'value).limit(2).cache() + val query = testData.select(Symbol("key"), Symbol("value")).limit(2).cache() val planned = query.queryExecution.optimizedPlan.asInstanceOf[InMemoryRelation] assert(planned.cachedPlan.isInstanceOf[CollectLimitExec]) } test("TakeOrderedAndProjectExec appears only when number of limit is below the threshold.") { withSQLConf(SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1000") { - val query0 = testData.select('value).orderBy('key).limit(100) + val query0 = testData.select(Symbol("value")).orderBy(Symbol("key")).limit(100) val planned0 = query0.queryExecution.executedPlan assert(planned0.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isDefined) - val query1 = testData.select('value).orderBy('key).limit(2000) + val query1 = testData.select(Symbol("value")).orderBy(Symbol("key")).limit(2000) val planned1 = query1.queryExecution.executedPlan assert(planned1.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isEmpty) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala index 751078d08fda9..21702b6cf582c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala @@ -51,7 +51,7 @@ abstract class RemoveRedundantSortsSuiteBase test("remove redundant sorts with limit") { withTempView("t") { - spark.range(100).select('id as "key").createOrReplaceTempView("t") + spark.range(100).select(Symbol("id") as "key").createOrReplaceTempView("t") val query = """ |SELECT key FROM @@ -64,8 +64,8 @@ abstract class RemoveRedundantSortsSuiteBase test("remove redundant sorts with broadcast hash join") { withTempView("t1", "t2") { - spark.range(1000).select('id as "key").createOrReplaceTempView("t1") - spark.range(1000).select('id as "key").createOrReplaceTempView("t2") + spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t1") + spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t2") val queryTemplate = """ |SELECT /*+ BROADCAST(%s) */ t1.key FROM @@ -100,8 +100,8 @@ abstract class RemoveRedundantSortsSuiteBase test("remove redundant sorts with sort merge join") { withTempView("t1", "t2") { - spark.range(1000).select('id as "key").createOrReplaceTempView("t1") - spark.range(1000).select('id as "key").createOrReplaceTempView("t2") + spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t1") + spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t2") val query = """ |SELECT /*+ MERGE(t1) */ t1.key FROM | (SELECT key FROM t1 WHERE key > 10 ORDER BY key DESC LIMIT 10) t1 @@ -123,8 +123,8 @@ abstract class RemoveRedundantSortsSuiteBase test("cached sorted data doesn't need to be re-sorted") { withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "true") { - val df = spark.range(1000).select('id as "key").sort('key.desc).cache() - val resorted = df.sort('key.desc) + val df = spark.range(1000).select(Symbol("id") as "key").sort(Symbol("key").desc).cache() + val resorted = df.sort(Symbol("key").desc) val sortedAsc = df.sort('key.asc) checkNumSorts(df, 0) checkNumSorts(resorted, 0) @@ -140,7 +140,7 @@ abstract class RemoveRedundantSortsSuiteBase test("SPARK-33472: shuffled join with different left and right side partition numbers") { withTempView("t1", "t2") { - spark.range(0, 100, 1, 2).select('id as "key").createOrReplaceTempView("t1") + spark.range(0, 100, 1, 2).select(Symbol("id") as "key").createOrReplaceTempView("t1") (0 to 100).toDF("key").createOrReplaceTempView("t2") val queryTemplate = """ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index ee6d3525b6f1a..e4b7e5a271dfb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -593,7 +593,8 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { spark.range(10).write.saveAsTable("add_col") withView("v") { sql("CREATE VIEW v AS SELECT * FROM add_col") - spark.range(10).select('id, 'id as 'a).write.mode("overwrite").saveAsTable("add_col") + spark.range(10).select(Symbol("id"), 'id as Symbol("a")) + .write.mode("overwrite").saveAsTable("add_col") checkAnswer(sql("SELECT * FROM v"), spark.range(10).toDF()) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala index 812fdba8dda23..5fa7a4d0c71cc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala @@ -44,13 +44,15 @@ class SortSuite extends SparkPlanTest with SharedSparkSession { checkAnswer( input.toDF("a", "b", "c"), - (child: SparkPlan) => SortExec('a.asc :: 'b.asc :: Nil, global = true, child = child), + (child: SparkPlan) => SortExec(Symbol("a").asc :: Symbol("b").asc :: Nil, + global = true, child = child), input.sortBy(t => (t._1, t._2)).map(Row.fromTuple), sortAnswers = false) checkAnswer( input.toDF("a", "b", "c"), - (child: SparkPlan) => SortExec('b.asc :: 'a.asc :: Nil, global = true, child = child), + (child: SparkPlan) => SortExec(Symbol("b").asc :: Symbol("a").asc :: Nil, + global = true, child = child), input.sortBy(t => (t._2, t._1)).map(Row.fromTuple), sortAnswers = false) } @@ -59,9 +61,9 @@ class SortSuite extends SparkPlanTest with SharedSparkSession { checkThatPlansAgree( (1 to 100).map(v => Tuple1(v)).toDF().selectExpr("NULL as a"), (child: SparkPlan) => - GlobalLimitExec(10, SortExec('a.asc :: Nil, global = true, child = child)), + GlobalLimitExec(10, SortExec(Symbol("a").asc :: Nil, global = true, child = child)), (child: SparkPlan) => - GlobalLimitExec(10, ReferenceSort('a.asc :: Nil, global = true, child)), + GlobalLimitExec(10, ReferenceSort(Symbol("a").asc :: Nil, global = true, child)), sortAnswers = false ) } @@ -70,15 +72,15 @@ class SortSuite extends SparkPlanTest with SharedSparkSession { checkThatPlansAgree( (1 to 100).map(v => Tuple1(v)).toDF("a"), (child: SparkPlan) => - GlobalLimitExec(10, SortExec('a.asc :: Nil, global = true, child = child)), + GlobalLimitExec(10, SortExec(Symbol("a").asc :: Nil, global = true, child = child)), (child: SparkPlan) => - GlobalLimitExec(10, ReferenceSort('a.asc :: Nil, global = true, child)), + GlobalLimitExec(10, ReferenceSort(Symbol("a").asc :: Nil, global = true, child)), sortAnswers = false ) } test("sorting does not crash for large inputs") { - val sortOrder = 'a.asc :: Nil + val sortOrder = Symbol("a").asc :: Nil val stringLength = 1024 * 1024 * 2 checkThatPlansAgree( Seq(Tuple1("a" * stringLength), Tuple1("b" * stringLength)).toDF("a").repartition(1), @@ -92,8 +94,8 @@ class SortSuite extends SparkPlanTest with SharedSparkSession { AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "unsafe external sort") { checkThatPlansAgree( (1 to 100).map(v => Tuple1(v)).toDF("a"), - (child: SparkPlan) => SortExec('a.asc :: Nil, global = true, child = child), - (child: SparkPlan) => ReferenceSort('a.asc :: Nil, global = true, child), + (child: SparkPlan) => SortExec(Symbol("a").asc :: Nil, global = true, child = child), + (child: SparkPlan) => ReferenceSort(Symbol("a").asc :: Nil, global = true, child), sortAnswers = false) } } @@ -106,7 +108,8 @@ class SortSuite extends SparkPlanTest with SharedSparkSession { ) checkAnswer( input.toDF("a", "b", "c"), - (child: SparkPlan) => SortExec(Stream('a.asc, 'b.asc, 'c.asc), global = true, child = child), + (child: SparkPlan) => SortExec(Stream(Symbol("a").asc, 'b.asc, 'c.asc), + global = true, child = child), input.sortBy(t => (t._1, t._2, t._3)).map(Row.fromTuple), sortAnswers = false) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index ba6dd170d89a9..e26be63b10955 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -312,7 +312,7 @@ class SparkSqlParserSuite extends AnalysisTest { Seq(AttributeReference("a", StringType)(), AttributeReference("b", StringType)(), AttributeReference("c", StringType)()), - Project(Seq('a, 'b, 'c), + Project(Seq(Symbol("a"), Symbol("b"), Symbol("c")), UnresolvedRelation(TableIdentifier("testData"))), ioSchema)) @@ -336,9 +336,9 @@ class SparkSqlParserSuite extends AnalysisTest { UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), Literal(10)), Aggregate( - Seq('a), + Seq(Symbol("a")), Seq( - 'a, + Symbol("a"), UnresolvedAlias( UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), None), UnresolvedAlias( @@ -363,12 +363,12 @@ class SparkSqlParserSuite extends AnalysisTest { AttributeReference("c", StringType)()), WithWindowDefinition( Map("w" -> WindowSpecDefinition( - Seq('a), - Seq(SortOrder('b, Ascending, NullsFirst, Seq.empty)), + Seq(Symbol("a")), + Seq(SortOrder(Symbol("b"), Ascending, NullsFirst, Seq.empty)), UnspecifiedFrame)), Project( Seq( - 'a, + Symbol("a"), UnresolvedAlias( UnresolvedWindowExpression( UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), @@ -403,9 +403,9 @@ class SparkSqlParserSuite extends AnalysisTest { UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), Literal(10)), Aggregate( - Seq('a, 'myCol, 'myCol2), + Seq(Symbol("a"), Symbol("myCol"), Symbol("myCol2")), Seq( - 'a, + Symbol("a"), UnresolvedAlias( UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), None), UnresolvedAlias( @@ -415,7 +415,7 @@ class SparkSqlParserSuite extends AnalysisTest { UnresolvedGenerator( FunctionIdentifier("explode"), Seq(UnresolvedAttribute("myTable.myCol"))), - Nil, false, Option("mytable2"), Seq('myCol2), + Nil, false, Option("mytable2"), Seq(Symbol("myCol2")), Generate( UnresolvedGenerator( FunctionIdentifier("explode"), @@ -423,7 +423,7 @@ class SparkSqlParserSuite extends AnalysisTest { Seq( UnresolvedFunction("array", Seq(Literal(1), Literal(2), Literal(3)), false)), false))), - Nil, false, Option("mytable"), Seq('myCol), + Nil, false, Option("mytable"), Seq(Symbol("myCol")), UnresolvedRelation(TableIdentifier("testData")))))), ioSchema)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala index c025670fb895e..3718b3a3c3378 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala @@ -49,7 +49,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark { val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols) val cols = (0 until numCols).map { idx => - from_json('value, schema).getField(s"col$idx") + from_json(Symbol("value"), schema).getField(s"col$idx") } Seq( @@ -88,7 +88,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark { val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols) val predicate = (0 until numCols).map { idx => - (from_json('value, schema).getField(s"col$idx") >= Literal(100000)).expr + (from_json(Symbol("value"), schema).getField(s"col$idx") >= Literal(100000)).expr }.asInstanceOf[Seq[Expression]].reduce(Or) Seq( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala index 6ec5c6287eed1..ce48945e52c5d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala @@ -58,7 +58,7 @@ class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSparkSession { private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan) val limit = 250 - val sortOrder = 'a.desc :: 'b.desc :: Nil + val sortOrder = Symbol("a").desc :: Symbol("b").desc :: Nil test("TakeOrderedAndProject.doExecute without project") { withClue(s"seed = $seed") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index 7332d49b942f8..b5b67287447c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -573,7 +573,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession import testImplicits._ withTempPath { dir => val path = dir.getCanonicalPath - val df = spark.range(10).select(Seq.tabulate(201) {i => ('id + i).as(s"c$i")} : _*) + val df = spark.range(10).select(Seq.tabulate(201) {i => (Symbol("id") + i).as(s"c$i")} : _*) df.write.mode(SaveMode.Overwrite).parquet(path) withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "202", @@ -590,7 +590,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession test("Control splitting consume function by operators with config") { import testImplicits._ - val df = spark.range(10).select(Seq.tabulate(2) {i => ('id + i).as(s"c$i")} : _*) + val df = spark.range(10).select(Seq.tabulate(2) {i => (Symbol("id") + i).as(s"c$i")} : _*) Seq(true, false).foreach { config => withSQLConf(SQLConf.WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR.key -> s"$config") { @@ -653,9 +653,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME.key -> "true") { // the same query run twice should produce identical code, which would imply a hit in // the generated code cache. - val ds1 = spark.range(3).select('id + 2) + val ds1 = spark.range(3).select(Symbol("id") + 2) val code1 = genCode(ds1) - val ds2 = spark.range(3).select('id + 2) + val ds2 = spark.range(3).select(Symbol("id") + 2) val code2 = genCode(ds2) // same query shape as above, deliberately assert(code1 == code2, "Should produce same code") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index ef4c2d0e08031..c24cc2bab9fd1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -280,11 +280,12 @@ class AdaptiveQueryExecSuite SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true", SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true", SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES.key -> AQEPropagateEmptyRelation.ruleName) { - val df1 = spark.range(10).withColumn("a", 'id) - val df2 = spark.range(10).withColumn("b", 'id) + val df1 = spark.range(10).withColumn("a", Symbol("id")) + val df2 = spark.range(10).withColumn("b", Symbol("id")) withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { - val testDf = df1.where('a > 10).join(df2.where('b > 10), Seq("id"), "left_outer") - .groupBy('a).count() + val testDf = df1.where(Symbol("a") > 10) + .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer") + .groupBy(Symbol("a")).count() checkAnswer(testDf, Seq()) val plan = testDf.queryExecution.executedPlan assert(find(plan)(_.isInstanceOf[SortMergeJoinExec]).isDefined) @@ -296,8 +297,9 @@ class AdaptiveQueryExecSuite } withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1") { - val testDf = df1.where('a > 10).join(df2.where('b > 10), Seq("id"), "left_outer") - .groupBy('a).count() + val testDf = df1.where(Symbol("a") > 10) + .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer") + .groupBy(Symbol("a")).count() checkAnswer(testDf, Seq()) val plan = testDf.queryExecution.executedPlan assert(find(plan)(_.isInstanceOf[BroadcastHashJoinExec]).isDefined) @@ -751,17 +753,17 @@ class AdaptiveQueryExecSuite spark .range(0, 1000, 1, 10) .select( - when('id < 250, 249) - .when('id >= 750, 1000) - .otherwise('id).as("key1"), - 'id as "value1") + when(Symbol("id") < 250, 249) + .when(Symbol("id") >= 750, 1000) + .otherwise(Symbol("id")).as("key1"), + Symbol("id") as "value1") .createOrReplaceTempView("skewData1") spark .range(0, 1000, 1, 10) .select( - when('id < 250, 249) - .otherwise('id).as("key2"), - 'id as "value2") + when(Symbol("id") < 250, 249) + .otherwise(Symbol("id")).as("key2"), + Symbol("id") as "value2") .createOrReplaceTempView("skewData2") def checkSkewJoin( @@ -996,17 +998,17 @@ class AdaptiveQueryExecSuite spark .range(0, 1000, 1, 10) .select( - when('id < 250, 249) - .when('id >= 750, 1000) - .otherwise('id).as("key1"), - 'id as "value1") + when(Symbol("id") < 250, 249) + .when(Symbol("id") >= 750, 1000) + .otherwise(Symbol("id")).as("key1"), + Symbol("id") as "value1") .createOrReplaceTempView("skewData1") spark .range(0, 1000, 1, 10) .select( - when('id < 250, 249) - .otherwise('id).as("key2"), - 'id as "value2") + when(Symbol("id") < 250, 249) + .otherwise(Symbol("id")).as("key2"), + Symbol("id") as "value2") .createOrReplaceTempView("skewData2") val (_, adaptivePlan) = runAdaptiveAndVerifyResult( "SELECT * FROM skewData1 join skewData2 ON key1 = key2") @@ -1084,7 +1086,7 @@ class AdaptiveQueryExecSuite test("AQE should set active session during execution") { withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { - val df = spark.range(10).select(sum('id)) + val df = spark.range(10).select(sum(Symbol("id"))) assert(df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec]) SparkSession.setActiveSession(null) checkAnswer(df, Seq(Row(45))) @@ -1111,7 +1113,7 @@ class AdaptiveQueryExecSuite SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") { try { spark.experimental.extraStrategies = TestStrategy :: Nil - val df = spark.range(10).groupBy('id).count() + val df = spark.range(10).groupBy(Symbol("id")).count() df.collect() } finally { spark.experimental.extraStrategies = Nil @@ -1567,7 +1569,7 @@ class AdaptiveQueryExecSuite test("SPARK-33494: Do not use local shuffle read for repartition") { withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { - val df = spark.table("testData").repartition('key) + val df = spark.table("testData").repartition(Symbol("key")) df.collect() // local shuffle read breaks partitioning and shouldn't be used for repartition operation // which is specified by users. @@ -1651,23 +1653,23 @@ class AdaptiveQueryExecSuite withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") { // Repartition with no partition num specified. - checkBHJ(df.repartition('b), + checkBHJ(df.repartition(Symbol("b")), // The top shuffle from repartition is optimized out. optimizeOutRepartition = true, probeSideLocalRead = false, probeSideCoalescedRead = true) // Repartition with default partition num (5 in test env) specified. - checkBHJ(df.repartition(5, 'b), + checkBHJ(df.repartition(5, Symbol("b")), // The top shuffle from repartition is optimized out // The final plan must have 5 partitions, no optimization can be made to the probe side. optimizeOutRepartition = true, probeSideLocalRead = false, probeSideCoalescedRead = false) // Repartition with non-default partition num specified. - checkBHJ(df.repartition(4, 'b), + checkBHJ(df.repartition(4, Symbol("b")), // The top shuffle from repartition is not optimized out optimizeOutRepartition = false, probeSideLocalRead = true, probeSideCoalescedRead = true) // Repartition by col and project away the partition cols - checkBHJ(df.repartition('b).select('key), + checkBHJ(df.repartition(Symbol("b")).select(Symbol("key")), // The top shuffle from repartition is not optimized out optimizeOutRepartition = false, probeSideLocalRead = true, probeSideCoalescedRead = true) } @@ -1679,23 +1681,23 @@ class AdaptiveQueryExecSuite SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR.key -> "0", SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "10") { // Repartition with no partition num specified. - checkSMJ(df.repartition('b), + checkSMJ(df.repartition(Symbol("b")), // The top shuffle from repartition is optimized out. optimizeOutRepartition = true, optimizeSkewJoin = false, coalescedRead = true) // Repartition with default partition num (5 in test env) specified. - checkSMJ(df.repartition(5, 'b), + checkSMJ(df.repartition(5, Symbol("b")), // The top shuffle from repartition is optimized out. // The final plan must have 5 partitions, can't do coalesced read. optimizeOutRepartition = true, optimizeSkewJoin = false, coalescedRead = false) // Repartition with non-default partition num specified. - checkSMJ(df.repartition(4, 'b), + checkSMJ(df.repartition(4, Symbol("b")), // The top shuffle from repartition is not optimized out. optimizeOutRepartition = false, optimizeSkewJoin = true, coalescedRead = false) // Repartition by col and project away the partition cols - checkSMJ(df.repartition('b).select('key), + checkSMJ(df.repartition(Symbol("b")).select(Symbol("key")), // The top shuffle from repartition is not optimized out. optimizeOutRepartition = false, optimizeSkewJoin = true, coalescedRead = false) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala index e9bdff5853a51..31d5fd9ffdffe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala @@ -49,7 +49,7 @@ object RangeBenchmark extends SqlBasedBenchmark { } benchmark.addCase("filter after range", numIters = 4) { _ => - spark.range(N).filter('id % 100 === 0).noop() + spark.range(N).filter(Symbol("id") % 100 === 0).noop() } benchmark.addCase("count after range", numIters = 4) { _ => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala index 2cf12dd92f64c..120ddf469f4a0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala @@ -152,7 +152,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession { } test("projection") { - val logicalPlan = testData.select('value, 'key).logicalPlan + val logicalPlan = testData.select(Symbol("value"), Symbol("key")).logicalPlan val plan = spark.sessionState.executePlan(logicalPlan).sparkPlan val scan = InMemoryRelation(new TestCachedBatchSerializer(useCompression = true, 5), MEMORY_ONLY, plan, None, logicalPlan) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 4d24b262fa03a..53d643d3ea901 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -288,12 +288,12 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { val s = ScriptTransformation("func", Seq.empty, p, null) compareTransformQuery("select transform(a, b) using 'func' from e where f < 10", - s.copy(child = p.copy(child = p.child.where('f < 10)), - output = Seq('key.string, 'value.string))) + s.copy(child = p.copy(child = p.child.where(Symbol("f") < 10)), + output = Seq(Symbol("key").string, Symbol("value").string))) compareTransformQuery("map a, b using 'func' as c, d from e", - s.copy(output = Seq('c.string, 'd.string))) + s.copy(output = Seq(Symbol("c").string, Symbol("d").string))) compareTransformQuery("reduce a, b using 'func' as (c int, d decimal(10, 0)) from e", - s.copy(output = Seq('c.int, 'd.decimal(10, 0)))) + s.copy(output = Seq(Symbol("c").int, Symbol("d").decimal(10, 0)))) } test("use backticks in output of Script Transform") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 00d1ed2cbc680..9da40df7dbd2d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -115,7 +115,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession { }.getMessage assert(e.contains("Hive support is required to CREATE Hive TABLE (AS SELECT)")) - spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1") + spark.range(1).select('id as Symbol("a"), 'id as Symbol("b")).write.saveAsTable("t1") e = intercept[AnalysisException] { sql("CREATE TABLE t STORED AS parquet SELECT a, b from t1") }.getMessage @@ -1374,7 +1374,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { sql("CREATE TABLE t USING parquet SELECT 1 as a, 1 as b") checkAnswer(spark.table("t"), Row(1, 1) :: Nil) - spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1") + spark.range(1).select('id as Symbol("a"), 'id as Symbol("b")).write.saveAsTable("t1") sql("CREATE TABLE t2 USING parquet SELECT a, b from t1") checkAnswer(spark.table("t2"), spark.table("t1")) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala index 37fe3c205e5d8..ef6d6f4a2968a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala @@ -26,12 +26,12 @@ import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructT class DataSourceStrategySuite extends PlanTest with SharedSparkSession { val attrInts = Seq( - 'cint.int, + Symbol("cint").int, Symbol("c.int").int, - GetStructField('a.struct(StructType( + GetStructField(Symbol("a").struct(StructType( StructField("cstr", StringType, nullable = true) :: StructField("cint", IntegerType, nullable = true) :: Nil)), 1, None), - GetStructField('a.struct(StructType( + GetStructField(Symbol("a").struct(StructType( StructField("c.int", IntegerType, nullable = true) :: StructField("cstr", StringType, nullable = true) :: Nil)), 0, None), GetStructField(Symbol("a.b").struct(StructType( @@ -40,7 +40,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession { StructField("cint", IntegerType, nullable = true) :: Nil)), 2, None), GetStructField(Symbol("a.b").struct(StructType( StructField("c.int", IntegerType, nullable = true) :: Nil)), 0, None), - GetStructField(GetStructField('a.struct(StructType( + GetStructField(GetStructField(Symbol("a").struct(StructType( StructField("cstr1", StringType, nullable = true) :: StructField("b", StructType(StructField("cint", IntegerType, nullable = true) :: StructField("cstr2", StringType, nullable = true) :: Nil)) :: Nil)), 1, None), 0, None) @@ -55,12 +55,12 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession { )) val attrStrs = Seq( - 'cstr.string, + Symbol("cstr").string, Symbol("c.str").string, - GetStructField('a.struct(StructType( + GetStructField(Symbol("a").struct(StructType( StructField("cint", IntegerType, nullable = true) :: StructField("cstr", StringType, nullable = true) :: Nil)), 1, None), - GetStructField('a.struct(StructType( + GetStructField(Symbol("a").struct(StructType( StructField("c.str", StringType, nullable = true) :: StructField("cint", IntegerType, nullable = true) :: Nil)), 0, None), GetStructField(Symbol("a.b").struct(StructType( @@ -69,7 +69,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession { StructField("cstr", StringType, nullable = true) :: Nil)), 2, None), GetStructField(Symbol("a.b").struct(StructType( StructField("c.str", StringType, nullable = true) :: Nil)), 0, None), - GetStructField(GetStructField('a.struct(StructType( + GetStructField(GetStructField(Symbol("a").struct(StructType( StructField("cint1", IntegerType, nullable = true) :: StructField("b", StructType(StructField("cstr", StringType, nullable = true) :: StructField("cint2", IntegerType, nullable = true) :: Nil)) :: Nil)), 1, None), 0, None) @@ -280,7 +280,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession { }} test("SPARK-26865 DataSourceV2Strategy should push normalized filters") { - val attrInt = 'cint.int + val attrInt = Symbol("cint").int assertResult(Seq(IsNotNull(attrInt))) { DataSourceStrategy.normalizeExprs(Seq(IsNotNull(attrInt.withName("CiNt"))), Seq(attrInt)) } @@ -308,11 +308,11 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession { } // `Abs(col)` can not be pushed down, so it returns `None` - assert(PushableColumnAndNestedColumn.unapply(Abs('col.int)) === None) + assert(PushableColumnAndNestedColumn.unapply(Abs(Symbol("col").int)) === None) } test("SPARK-36644: Push down boolean column filter") { - testTranslateFilter('col.boolean, Some(sources.EqualTo("col", true))) + testTranslateFilter(Symbol("col").boolean, Some(sources.EqualTo("col", true))) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala index 6ba3d2723412b..3034d4fe67c1b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala @@ -143,7 +143,8 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester { test("Data source options should be propagated in method checkAndGlobPathIfNecessary") { val dataSourceOptions = Map("fs.defaultFS" -> "nonexistentFs://nonexistentFs") val dataSource = DataSource(spark, "parquet", Seq("/path3"), options = dataSourceOptions) - val checkAndGlobPathIfNecessary = PrivateMethod[Seq[Path]]('checkAndGlobPathIfNecessary) + val checkAndGlobPathIfNecessary = + PrivateMethod[Seq[Path]](Symbol("checkAndGlobPathIfNecessary")) val message = intercept[java.io.IOException] { dataSource invokePrivate checkAndGlobPathIfNecessary(false, false) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala index f492fc653653e..c9e15f71524d4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala @@ -39,12 +39,15 @@ class FileFormatWriterSuite test("SPARK-22252: FileFormatWriter should respect the input query schema") { withTable("t1", "t2", "t3", "t4") { - spark.range(1).select('id as 'col1, 'id as 'col2).write.saveAsTable("t1") + spark.range(1).select(Symbol("id") as Symbol("col1"), Symbol("id") as Symbol("col2")) + .write.saveAsTable("t1") spark.sql("select COL1, COL2 from t1").write.saveAsTable("t2") checkAnswer(spark.table("t2"), Row(0, 0)) // Test picking part of the columns when writing. - spark.range(1).select('id, 'id as 'col1, 'id as 'col2).write.saveAsTable("t3") + spark.range(1) + .select(Symbol("id"), Symbol("id") as Symbol("col1"), Symbol("id") as Symbol("col2")) + .write.saveAsTable("t3") spark.sql("select COL1, COL2 from t3").write.saveAsTable("t4") checkAnswer(spark.table("t4"), Row(0, 0)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala index 634016664dfb6..b14ccb089f449 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala @@ -60,7 +60,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre "file9" -> 1, "file10" -> 1)) - checkScan(table.select('c1)) { partitions => + checkScan(table.select(Symbol("c1"))) { partitions => // 10 one byte files should fit in a single partition with 10 files. assert(partitions.size == 1, "when checking partitions") assert(partitions.head.files.size == 10, "when checking partition 1") @@ -83,7 +83,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "11", SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") { - checkScan(table.select('c1)) { partitions => + checkScan(table.select(Symbol("c1"))) { partitions => // 5 byte files should be laid out [(5, 5), (5)] assert(partitions.size == 2, "when checking partitions") assert(partitions(0).files.size == 2, "when checking partition 1") @@ -108,7 +108,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "10", SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") { - checkScan(table.select('c1)) { partitions => + checkScan(table.select(Symbol("c1"))) { partitions => // Files should be laid out [(0-10), (10-15, 4)] assert(partitions.size == 2, "when checking partitions") assert(partitions(0).files.size == 1, "when checking partition 1") @@ -141,7 +141,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "4", SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") { - checkScan(table.select('c1)) { partitions => + checkScan(table.select(Symbol("c1"))) { partitions => // Files should be laid out [(file1), (file2, file3), (file4, file5), (file6)] assert(partitions.size == 4, "when checking partitions") assert(partitions(0).files.size == 1, "when checking partition 1") @@ -359,7 +359,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre withSQLConf( SQLConf.FILES_MAX_PARTITION_BYTES.key -> "2", SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "0") { - checkScan(table.select('c1)) { partitions => + checkScan(table.select(Symbol("c1"))) { partitions => assert(partitions.size == 2) assert(partitions(0).files.size == 1) assert(partitions(1).files.size == 2) @@ -375,7 +375,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre withSQLConf( SQLConf.FILES_MAX_PARTITION_BYTES.key -> "2", SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "0") { - checkScan(table.select('c1)) { partitions => + checkScan(table.select(Symbol("c1"))) { partitions => assert(partitions.size == 3) assert(partitions(0).files.size == 1) assert(partitions(1).files.size == 2) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala index fe50e4e7f9d1a..2c227baa04fc2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala @@ -573,7 +573,7 @@ abstract class SchemaPruningSuite Seq(Concat(Seq($"name.first", $"name.last")), Concat(Seq($"name.last", $"name.first"))) ), - Seq('a.string, 'b.string), + Seq(Symbol("a").string, Symbol("b").string), sql("select * from contacts").logicalPlan ).toDF() checkScan(query1, "struct>") @@ -590,7 +590,7 @@ abstract class SchemaPruningSuite val name = StructType.fromDDL("first string, middle string, last string") val query2 = Expand( Seq(Seq($"name", $"name.last")), - Seq('a.struct(name), 'b.string), + Seq(Symbol("a").struct(name), Symbol("b").string), sql("select * from contacts").logicalPlan ).toDF() checkScan(query2, "struct>") @@ -909,7 +909,7 @@ abstract class SchemaPruningSuite .createOrReplaceTempView("table") val read = spark.table("table") - val query = read.select(explode($"items").as('item)).select(count($"*")) + val query = read.select(explode($"items").as(Symbol("item"))).select(count($"*")) checkScan(query, "struct>>") checkAnswer(query, Row(2) :: Nil) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 7bbe371879d40..9f9b7b72ab329 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -1836,7 +1836,7 @@ abstract class CSVSuite val idf = spark.read .schema(schema) .csv(path.getCanonicalPath) - .select('f15, 'f10, 'f5) + .select(Symbol("f15"), Symbol("f10"), Symbol("f5")) assert(idf.count() == 2) checkAnswer(idf, List(Row(15, 10, 5), Row(-15, -10, -5))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala index e4f6ccaa9a621..c741320d4220b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala @@ -263,7 +263,7 @@ object JsonBenchmark extends SqlBasedBenchmark { benchmark.addCase("from_json", iters) { _ => val schema = new StructType().add("a", IntegerType) - val from_json_ds = in.select(from_json('value, schema)) + val from_json_ds = in.select(from_json(Symbol("value"), schema)) from_json_ds.noop() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala index 3cb8287f09b26..b892a9e155815 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala @@ -90,7 +90,7 @@ class NoopStreamSuite extends StreamTest { .option("numPartitions", "1") .option("rowsPerSecond", "5") .load() - .select('value) + .select(Symbol("value")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala index b4073bedf5597..811953754953a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala @@ -42,7 +42,7 @@ class NoopSuite extends SharedSparkSession { withTempPath { dir => val path = dir.getCanonicalPath spark.range(numElems) - .select('id mod 10 as "key", 'id as "value") + .select(Symbol("id") mod 10 as "key", Symbol("id") as "value") .write .partitionBy("key") .parquet(path) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala index 038606b854d9e..551a3f5a7cc1b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala @@ -371,7 +371,7 @@ abstract class OrcQueryTest extends OrcTest { withTempPath { dir => val path = dir.getCanonicalPath - spark.range(0, 10).select('id as "Acol").write.orc(path) + spark.range(0, 10).select(Symbol("id") as "Acol").write.orc(path) spark.read.orc(path).schema("Acol") intercept[IllegalArgumentException] { spark.read.orc(path).schema("acol") @@ -416,19 +416,19 @@ abstract class OrcQueryTest extends OrcTest { s"No data was filtered for predicate: $pred") } - checkPredicate('a === 5, List(5).map(Row(_, null))) - checkPredicate('a <=> 5, List(5).map(Row(_, null))) - checkPredicate('a < 5, List(1, 3).map(Row(_, null))) - checkPredicate('a <= 5, List(1, 3, 5).map(Row(_, null))) - checkPredicate('a > 5, List(7, 9).map(Row(_, null))) - checkPredicate('a >= 5, List(5, 7, 9).map(Row(_, null))) - checkPredicate('a.isNull, List(null).map(Row(_, null))) - checkPredicate('b.isNotNull, List()) - checkPredicate('a.isin(3, 5, 7), List(3, 5, 7).map(Row(_, null))) - checkPredicate('a > 0 && 'a < 3, List(1).map(Row(_, null))) - checkPredicate('a < 1 || 'a > 8, List(9).map(Row(_, null))) - checkPredicate(!('a > 3), List(1, 3).map(Row(_, null))) - checkPredicate(!('a > 0 && 'a < 3), List(3, 5, 7, 9).map(Row(_, null))) + checkPredicate(Symbol("a") === 5, List(5).map(Row(_, null))) + checkPredicate(Symbol("a") <=> 5, List(5).map(Row(_, null))) + checkPredicate(Symbol("a") < 5, List(1, 3).map(Row(_, null))) + checkPredicate(Symbol("a") <= 5, List(1, 3, 5).map(Row(_, null))) + checkPredicate(Symbol("a") > 5, List(7, 9).map(Row(_, null))) + checkPredicate(Symbol("a") >= 5, List(5, 7, 9).map(Row(_, null))) + checkPredicate(Symbol("a").isNull, List(null).map(Row(_, null))) + checkPredicate(Symbol("b").isNotNull, List()) + checkPredicate(Symbol("a").isin(3, 5, 7), List(3, 5, 7).map(Row(_, null))) + checkPredicate(Symbol("a") > 0 && Symbol("a") < 3, List(1).map(Row(_, null))) + checkPredicate(Symbol("a") < 1 || Symbol("a") > 8, List(9).map(Row(_, null))) + checkPredicate(!(Symbol("a") > 3), List(1, 3).map(Row(_, null))) + checkPredicate(!(Symbol("a") > 0 && Symbol("a") < 3), List(3, 5, 7, 9).map(Row(_, null))) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 9b554b626df85..d5180a393f61a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -1426,39 +1426,39 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared test("filter pushdown - StringStartsWith") { withParquetDataFrame((1 to 4).map(i => Tuple1(i + "str" + i))) { implicit df => checkFilterPredicate( - '_1.startsWith("").asInstanceOf[Predicate], + Symbol("_1").startsWith("").asInstanceOf[Predicate], classOf[UserDefinedByInstance[_, _]], Seq("1str1", "2str2", "3str3", "4str4").map(Row(_))) Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix => checkFilterPredicate( - '_1.startsWith(prefix).asInstanceOf[Predicate], + Symbol("_1").startsWith(prefix).asInstanceOf[Predicate], classOf[UserDefinedByInstance[_, _]], "2str2") } Seq("2S", "null", "2str22").foreach { prefix => checkFilterPredicate( - '_1.startsWith(prefix).asInstanceOf[Predicate], + Symbol("_1").startsWith(prefix).asInstanceOf[Predicate], classOf[UserDefinedByInstance[_, _]], Seq.empty[Row]) } checkFilterPredicate( - !'_1.startsWith("").asInstanceOf[Predicate], + !Symbol("_1").startsWith("").asInstanceOf[Predicate], classOf[Operators.Not], Seq().map(Row(_))) Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix => checkFilterPredicate( - !'_1.startsWith(prefix).asInstanceOf[Predicate], + !Symbol("_1").startsWith(prefix).asInstanceOf[Predicate], classOf[Operators.Not], Seq("1str1", "3str3", "4str4").map(Row(_))) } Seq("2S", "null", "2str22").foreach { prefix => checkFilterPredicate( - !'_1.startsWith(prefix).asInstanceOf[Predicate], + !Symbol("_1").startsWith(prefix).asInstanceOf[Predicate], classOf[Operators.Not], Seq("1str1", "2str2", "3str3", "4str4").map(Row(_))) } @@ -1472,7 +1472,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared // SPARK-28371: make sure filter is null-safe. withParquetDataFrame(Seq(Tuple1[String](null))) { implicit df => checkFilterPredicate( - '_1.startsWith("blah").asInstanceOf[Predicate], + Symbol("_1").startsWith("blah").asInstanceOf[Predicate], classOf[UserDefinedByInstance[_, _]], Seq.empty[Row]) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index c70ac8084a841..99b2d9844ed1b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -187,7 +187,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession .range(1000) // Parquet doesn't allow column names with spaces, have to add an alias here. // Minus 500 here so that negative decimals are also tested. - .select((('id - 500) / 100.0) cast decimal as 'dec) + .select(((Symbol("id") - 500) / 100.0) cast decimal as Symbol("dec")) .coalesce(1) } @@ -802,7 +802,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession withTempPath { dir => val m2 = intercept[SparkException] { - val df = spark.range(1).select('id as 'a, 'id as 'b).coalesce(1) + val df = spark.range(1).select(Symbol("id") as Symbol("a"), Symbol("id") as Symbol("b")) + .coalesce(1) df.write.partitionBy("a").options(extraOptions).parquet(dir.getCanonicalPath) }.getCause.getMessage assert(m2.contains("Intentional exception for testing purposes")) @@ -868,7 +869,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession checkAnswer( // Decimal column in this file is encoded using plain dictionary readResourceParquetFile("test-data/dec-in-i32.parquet"), - spark.range(1 << 4).select('id % 10 cast DecimalType(5, 2) as 'i32_dec)) + spark.range(1 << 4).select(Symbol("id") % 10 cast DecimalType(5, 2) as Symbol("i32_dec"))) } } @@ -877,7 +878,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession checkAnswer( // Decimal column in this file is encoded using plain dictionary readResourceParquetFile("test-data/dec-in-i64.parquet"), - spark.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 'i64_dec)) + spark.range(1 << 4).select(Symbol("id") % 10 cast DecimalType(10, 2) as Symbol("i64_dec"))) } } @@ -886,7 +887,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession checkAnswer( // Decimal column in this file is encoded using plain dictionary readResourceParquetFile("test-data/dec-in-fixed-len.parquet"), - spark.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 'fixed_len_dec)) + spark.range(1 << 4) + .select(Symbol("id") % 10 cast DecimalType(10, 2) as Symbol("fixed_len_dec"))) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index bf37421331db6..f3751562c332e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -979,7 +979,8 @@ abstract class ParquetPartitionDiscoverySuite withTempPath { dir => withSQLConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key -> "1") { val path = dir.getCanonicalPath - val df = spark.range(5).select('id as 'a, 'id as 'b, 'id as 'c).coalesce(1) + val df = spark.range(5).select(Symbol("id") as Symbol("a"), Symbol("id") as Symbol("b"), + Symbol("id") as Symbol("c")).coalesce(1) df.write.partitionBy("b", "c").parquet(path) checkAnswer(spark.read.parquet(path), df) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 057de2abdb9e0..654ab7fe36200 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -153,7 +153,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS (1, "2016-01-01 10:11:12.123456"), (2, null), (3, "1965-01-01 10:11:12.123456")) - .toDS().select('_1, $"_2".cast("timestamp")) + .toDS().select(Symbol("_1"), $"_2".cast("timestamp")) checkAnswer(sql("select * from ts"), expected) } } @@ -805,7 +805,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS test("SPARK-15804: write out the metadata to parquet file") { val df = Seq((1, "abc"), (2, "hello")).toDF("a", "b") val md = new MetadataBuilder().putString("key", "value").build() - val dfWithmeta = df.select('a, 'b.as("b", md)) + val dfWithmeta = df.select(Symbol("a"), Symbol("b").as("b", md)) withTempPath { dir => val path = dir.getCanonicalPath @@ -1027,7 +1027,7 @@ class ParquetV1QuerySuite extends ParquetQuerySuite { withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "10") { withTempPath { dir => val path = dir.getCanonicalPath - val df = spark.range(10).select(Seq.tabulate(11) {i => ('id + i).as(s"c$i")} : _*) + val df = spark.range(10).select(Seq.tabulate(11) {i => (Symbol("id") + i).as(s"c$i")} : _*) df.write.mode(SaveMode.Overwrite).parquet(path) // do not return batch - whole stage codegen is disabled for wide table (>200 columns) @@ -1060,7 +1060,7 @@ class ParquetV2QuerySuite extends ParquetQuerySuite { withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "10") { withTempPath { dir => val path = dir.getCanonicalPath - val df = spark.range(10).select(Seq.tabulate(11) {i => ('id + i).as(s"c$i")} : _*) + val df = spark.range(10).select(Seq.tabulate(11) {i => (Symbol("id") + i).as(s"c$i")} : _*) df.write.mode(SaveMode.Overwrite).parquet(path) // do not return batch - whole stage codegen is disabled for wide table (>200 columns) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala index 2feea41d15656..d0228d7bdf9f2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala @@ -944,7 +944,8 @@ class ParquetSchemaSuite extends ParquetSchemaTest { withTempPath { dir => val path = dir.getCanonicalPath spark.range(3).write.parquet(s"$path/p=1") - spark.range(3).select('id cast IntegerType as 'id).write.parquet(s"$path/p=2") + spark.range(3).select(Symbol("id") cast IntegerType as Symbol("id")) + .write.parquet(s"$path/p=2") val message = intercept[SparkException] { spark.read.option("mergeSchema", "true").parquet(path).schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala index 143feebdd4994..0fb6fc58c400d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.types.BooleanType class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { test("SPARK-36644: Push down boolean column filter") { - testTranslateFilter('col.boolean, + testTranslateFilter(Symbol("col").boolean, Some(new V2EqualTo(FieldReference("col"), LiteralValue(true, BooleanType)))) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala index 767a26876f902..6e2eba68d9262 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala @@ -36,11 +36,11 @@ class ValidateRequirementsSuite extends PlanTest with SharedSparkSession { rightPartitionNum: Int, success: Boolean): Unit = { val table1 = - spark.range(10).select('id + 1 as 'a1, 'id + 2 as 'b1, 'id + 3 as 'c1) - .queryExecution.executedPlan + spark.range(10).select(Symbol("id") + 1 as Symbol("a1"), Symbol("id") + 2 as Symbol("b1"), + Symbol("id") + 3 as Symbol("c1")).queryExecution.executedPlan val table2 = - spark.range(10).select('id + 1 as 'a2, 'id + 2 as 'b2, 'id + 3 as 'c2) - .queryExecution.executedPlan + spark.range(10).select(Symbol("id") + 1 as Symbol("a2"), Symbol("id") + 2 as Symbol("b2"), + Symbol("id") + 3 as Symbol("c2")).queryExecution.executedPlan val leftKeys = joinKeyIndices.map(table1.output) val rightKeys = joinKeyIndices.map(table2.output) @@ -105,14 +105,14 @@ class ValidateRequirementsSuite extends PlanTest with SharedSparkSession { partNums: Seq[Int], success: Boolean): Unit = { val table1 = - spark.range(10).select('id + 1 as 'a1, 'id + 2 as 'b1, 'id + 3 as 'c1) - .queryExecution.executedPlan + spark.range(10).select(Symbol("id") + 1 as Symbol("a1"), Symbol("id") + 2 as Symbol("b1"), + Symbol("id") + 3 as Symbol("c1")).queryExecution.executedPlan val table2 = - spark.range(10).select('id + 1 as 'a2, 'id + 2 as 'b2, 'id + 3 as 'c2) - .queryExecution.executedPlan + spark.range(10).select(Symbol("id") + 1 as Symbol("a2"), Symbol("id") + 2 as Symbol("b2"), + Symbol("id") + 3 as Symbol("c2")).queryExecution.executedPlan val table3 = - spark.range(10).select('id + 1 as 'a3, 'id + 2 as 'b3, 'id + 3 as 'c3) - .queryExecution.executedPlan + spark.range(10).select(Symbol("id") + 1 as Symbol("a3"), Symbol("id") + 2 as Symbol("b3"), + Symbol("id") + 3 as Symbol("c3")).queryExecution.executedPlan val key1 = joinKeyIndices1.map(_._1).map(table1.output) val key2 = joinKeyIndices1.map(_._2).map(table2.output) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala index f27a249c8f753..256e942620272 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala @@ -415,8 +415,8 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils test("Broadcast timeout") { val timeout = 5 val slowUDF = udf({ x: Int => Thread.sleep(timeout * 1000); x }) - val df1 = spark.range(10).select($"id" as 'a) - val df2 = spark.range(5).select(slowUDF($"id") as 'a) + val df1 = spark.range(10).select($"id" as Symbol("a")) + val df2 = spark.range(5).select(slowUDF($"id") as Symbol("a")) val testDf = df1.join(broadcast(df2), "a") withSQLConf(SQLConf.BROADCAST_TIMEOUT.key -> timeout.toString) { if (!conf.adaptiveExecutionEnabled) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index 0fd5c892e2c42..aa746370b8fd3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -79,7 +79,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils // Assume the execution plan is // PhysicalRDD(nodeId = 1) -> Filter(nodeId = 0) Seq((0L, false), (1L, true)).foreach { case (nodeId, enableWholeStage) => - val df = person.filter('age < 25) + val df = person.filter(Symbol("age") < 25) testSparkPlanMetrics(df, 1, Map( nodeId -> (("Filter", Map( "number of output rows" -> 1L)))), @@ -94,7 +94,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils // Filter(nodeId = 1) // Range(nodeId = 2) // TODO: update metrics in generated operators - val ds = spark.range(10).filter('id < 5) + val ds = spark.range(10).filter(Symbol("id") < 5) testSparkPlanMetricsWithPredicates(ds.toDF(), 1, Map( 0L -> (("WholeStageCodegen (1)", Map( "duration" -> { @@ -128,7 +128,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils ) // 2 partitions and each partition contains 2 keys - val df2 = testData2.groupBy('a).count() + val df2 = testData2.groupBy(Symbol("a")).count() val expected2 = Seq( Map("number of output rows" -> 4L, "avg hash probe bucket list iters" -> @@ -176,7 +176,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils // Exchange(nodeId = 5) // LocalTableScan(nodeId = 6) Seq(true, false).foreach { enableWholeStage => - val df = generateRandomBytesDF().repartition(2).groupBy('a).count() + val df = generateRandomBytesDF().repartition(2).groupBy(Symbol("a")).count() val nodeIds = if (enableWholeStage) { Set(4L, 1L) } else { @@ -204,7 +204,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils // Assume the execution plan is // ... -> ObjectHashAggregate(nodeId = 2) -> Exchange(nodeId = 1) // -> ObjectHashAggregate(nodeId = 0) - val df = testData2.groupBy().agg(collect_set('a)) // 2 partitions + val df = testData2.groupBy().agg(collect_set(Symbol("a"))) // 2 partitions testSparkPlanMetrics(df, 1, Map( 2L -> (("ObjectHashAggregate", Map("number of output rows" -> 2L))), 1L -> (("Exchange", Map( @@ -216,7 +216,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils ) // 2 partitions and each partition contains 2 keys - val df2 = testData2.groupBy('a).agg(collect_set('a)) + val df2 = testData2.groupBy(Symbol("a")).agg(collect_set(Symbol("a"))) testSparkPlanMetrics(df2, 1, Map( 2L -> (("ObjectHashAggregate", Map( "number of output rows" -> 4L, @@ -233,7 +233,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils // 2 partitions and each partition contains 2 keys, with fallback to sort-based aggregation withSQLConf(SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "1") { - val df3 = testData2.groupBy('a).agg(collect_set('a)) + val df3 = testData2.groupBy(Symbol("a")).agg(collect_set(Symbol("a"))) testSparkPlanMetrics(df3, 1, Map( 2L -> (("ObjectHashAggregate", Map( "number of output rows" -> 4L, @@ -263,7 +263,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils // LocalTableScan(nodeId = 3) // Because of SPARK-25267, ConvertToLocalRelation is disabled in the test cases of sql/core, // so Project here is not collapsed into LocalTableScan. - val df = Seq(1, 3, 2).toDF("id").sort('id) + val df = Seq(1, 3, 2).toDF("id").sort(Symbol("id")) testSparkPlanMetricsWithPredicates(df, 2, Map( 0L -> (("Sort", Map( "sort time" -> { @@ -281,7 +281,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils test("SortMergeJoin metrics") { // Because SortMergeJoin may skip different rows if the number of partitions is different, this // test should use the deterministic number of partitions. - val testDataForJoin = testData2.filter('a < 2) // TestData2(1, 1) :: TestData2(1, 2) + val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2) testDataForJoin.createOrReplaceTempView("testDataForJoin") withTempView("testDataForJoin") { // Assume the execution plan is @@ -314,7 +314,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils test("SortMergeJoin(outer) metrics") { // Because SortMergeJoin may skip different rows if the number of partitions is different, // this test should use the deterministic number of partitions. - val testDataForJoin = testData2.filter('a < 2) // TestData2(1, 1) :: TestData2(1, 2) + val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2) testDataForJoin.createOrReplaceTempView("testDataForJoin") withTempView("testDataForJoin") { // Assume the execution plan is @@ -459,7 +459,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils } test("BroadcastNestedLoopJoin metrics") { - val testDataForJoin = testData2.filter('a < 2) // TestData2(1, 1) :: TestData2(1, 2) + val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2) testDataForJoin.createOrReplaceTempView("testDataForJoin") withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") { withTempView("testDataForJoin") { @@ -512,7 +512,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils test("CartesianProduct metrics") { withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") { - val testDataForJoin = testData2.filter('a < 2) // TestData2(1, 1) :: TestData2(1, 2) + val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2) testDataForJoin.createOrReplaceTempView("testDataForJoin") withTempView("testDataForJoin") { // Assume the execution plan is @@ -547,7 +547,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils test("save metrics") { withTempPath { file => // person creates a temporary view. get the DF before listing previous execution IDs - val data = person.select('name) + val data = person.select(Symbol("name")) val previousExecutionIds = currentExecutionIds() // Assume the execution plan is // PhysicalRDD(nodeId = 0) @@ -704,7 +704,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { // A special query that only has one partition, so there is no shuffle and the entire query // can be whole-stage-codegened. - val df = spark.range(0, 1500, 1, 1).limit(10).groupBy('id).count().limit(1).filter('id >= 0) + val df = spark.range(0, 1500, 1, 1).limit(10).groupBy(Symbol("id")) + .count().limit(1).filter('id >= 0) df.collect() val plan = df.queryExecution.executedPlan diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala index 53ef9dfbe39fa..f06e62b33b1a0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala @@ -44,8 +44,8 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter { val df = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(df)( @@ -104,8 +104,8 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter { val df = testSource.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long]) /** Reset this test source so that it appears to be a new source requiring initialization */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala index 5884380271f0e..11dbf9c2beaa1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala @@ -141,7 +141,7 @@ class ConsoleWriteSupportSuite extends StreamTest { .option("numPartitions", "1") .option("rowsPerSecond", "5") .load() - .select('value) + .select(Symbol("value")) val query = input.writeStream.format("console").trigger(Trigger.Continuous(200)).start() assert(query.isActive) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala index 0fe339b93047a..46440c98226aa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala @@ -165,8 +165,8 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA val windowedAggregation = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"count".as[Long]) .map(_.toInt) .repartition(1) @@ -199,8 +199,8 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA val windowedAggregation = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"count".as[Long]) .map(_.toInt) .repartition(1) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala index 449aea8256673..fe846acab28ca 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala @@ -60,7 +60,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest { .format("rate-micro-batch") .option("rowsPerBatch", "10") .load() - .select('value) + .select(Symbol("value")) val clock = new StreamManualClock testStream(input)( @@ -97,7 +97,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest { .format("rate-micro-batch") .option("rowsPerBatch", "10") .load() - .select('value) + .select(Symbol("value")) val clock = new StreamManualClock testStream(input)( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala index 6440e69e2ec23..2c1bb41302c11 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala @@ -83,7 +83,7 @@ class RateStreamProviderSuite extends StreamTest { .format("rate") .option("rowsPerSecond", "10") .load() - .select('value) + .select(Symbol("value")) var streamDuration = 0 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala index d4792301a1ce5..0678cfc38660e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala @@ -67,7 +67,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest { val inputData = MemoryStream[Int] val query = inputData.toDS().toDF("value") - .select('value) + .select(Symbol("value")) .groupBy($"value") .agg(count("*")) .writeStream @@ -119,7 +119,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest { def startQuery(): StreamingQuery = { inputData.toDS().toDF("value") - .select('value) + .select(Symbol("value")) .groupBy($"value") .agg(count("*")) .writeStream @@ -156,7 +156,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest { SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION.key -> "100") { val inputData = MemoryStream[Int] val query = inputData.toDS().toDF("value") - .select('value) + .select(Symbol("value")) .groupBy($"value") .agg(count("*")) .writeStream @@ -179,7 +179,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest { val inputData = MemoryStream[Int] val query = inputData.toDS().toDF("value") - .select('value) + .select(Symbol("value")) .groupBy($"value") .agg(count("*")) .writeStream diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala index ad744696f5472..9b5b532d3ecdc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala @@ -878,7 +878,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils val oldCount = statusStore.executionsList().size val cls = classOf[CustomMetricsDataSource].getName - spark.range(10).select('id as 'i, -'id as 'j).write.format(cls) + spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j")) + .write.format(cls) .option("path", dir.getCanonicalPath).mode("append").save() // Wait until the new execution is started and being tracked. @@ -919,7 +920,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils try { val cls = classOf[CustomMetricsDataSource].getName - spark.range(0, 10, 1, 2).select('id as 'i, -'id as 'j).write.format(cls) + spark.range(0, 10, 1, 2).select(Symbol("id") as Symbol("i"), -'id as Symbol("j")) + .write.format(cls) .option("path", dir.getCanonicalPath).mode("append").save() // Wait until the new execution is started and being tracked. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala index dde463dd395f7..057bb34175a29 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala @@ -81,7 +81,7 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { withTempPath { path => val pathString = path.getCanonicalPath - spark.range(10).select('id.as("ID")).write.json(pathString) + spark.range(10).select(Symbol("id").as("ID")).write.json(pathString) spark.range(10).write.mode("append").json(pathString) assert(spark.read.json(pathString).columns.toSet == Set("id", "ID")) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala index 81ce979ef0b62..1b1f3714dc701 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala @@ -36,7 +36,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with override def beforeAll(): Unit = { super.beforeAll() - targetAttributes = Seq('a.int, 'd.int, 'b.int, 'c.int) + targetAttributes = Seq(Symbol("a").int, Symbol("d").int, Symbol("b").int, Symbol("c").int) targetPartitionSchema = new StructType() .add("b", IntegerType) .add("c", IntegerType) @@ -74,7 +74,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with caseSensitive) { intercept[AssertionError] { rule.convertStaticPartitions( - sourceAttributes = Seq('e.int, 'f.int), + sourceAttributes = Seq(Symbol("e").int, Symbol("f").int), providedPartitions = Map("b" -> None, "c" -> None), targetAttributes = targetAttributes, targetPartitionSchema = targetPartitionSchema) @@ -85,7 +85,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with // Missing columns. intercept[AnalysisException] { rule.convertStaticPartitions( - sourceAttributes = Seq('e.int), + sourceAttributes = Seq(Symbol("e").int), providedPartitions = Map("b" -> Some("1"), "c" -> None), targetAttributes = targetAttributes, targetPartitionSchema = targetPartitionSchema) @@ -96,7 +96,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with // Missing partitioning columns. intercept[AnalysisException] { rule.convertStaticPartitions( - sourceAttributes = Seq('e.int, 'f.int), + sourceAttributes = Seq(Symbol("e").int, Symbol("f").int), providedPartitions = Map("b" -> Some("1")), targetAttributes = targetAttributes, targetPartitionSchema = targetPartitionSchema) @@ -105,7 +105,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with // Missing partitioning columns. intercept[AnalysisException] { rule.convertStaticPartitions( - sourceAttributes = Seq('e.int, 'f.int, 'g.int), + sourceAttributes = Seq(Symbol("e").int, Symbol("f").int, Symbol("g").int), providedPartitions = Map("b" -> Some("1")), targetAttributes = targetAttributes, targetPartitionSchema = targetPartitionSchema) @@ -114,7 +114,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with // Wrong partitioning columns. intercept[AnalysisException] { rule.convertStaticPartitions( - sourceAttributes = Seq('e.int, 'f.int), + sourceAttributes = Seq(Symbol("e").int, Symbol("f").int), providedPartitions = Map("b" -> Some("1"), "d" -> None), targetAttributes = targetAttributes, targetPartitionSchema = targetPartitionSchema) @@ -125,7 +125,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with // Wrong partitioning columns. intercept[AnalysisException] { rule.convertStaticPartitions( - sourceAttributes = Seq('e.int, 'f.int), + sourceAttributes = Seq(Symbol("e").int, Symbol("f").int), providedPartitions = Map("b" -> Some("1"), "d" -> Some("2")), targetAttributes = targetAttributes, targetPartitionSchema = targetPartitionSchema) @@ -134,7 +134,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with // Wrong partitioning columns. intercept[AnalysisException] { rule.convertStaticPartitions( - sourceAttributes = Seq('e.int), + sourceAttributes = Seq(Symbol("e").int), providedPartitions = Map("b" -> Some("1"), "c" -> Some("3"), "d" -> Some("2")), targetAttributes = targetAttributes, targetPartitionSchema = targetPartitionSchema) @@ -144,7 +144,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with // Wrong partitioning columns. intercept[AnalysisException] { rule.convertStaticPartitions( - sourceAttributes = Seq('e.int, 'f.int), + sourceAttributes = Seq(Symbol("e").int, Symbol("f").int), providedPartitions = Map("b" -> Some("1"), "C" -> Some("3")), targetAttributes = targetAttributes, targetPartitionSchema = targetPartitionSchema) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala index a81bd3bd060d3..3d315be636741 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala @@ -133,8 +133,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val inputData1 = MemoryStream[Int] val aggWithoutWatermark = inputData1.toDF() .withColumn("eventTime", timestamp_seconds($"value")) - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(aggWithoutWatermark, outputMode = Complete)( @@ -151,8 +151,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val aggWithWatermark = inputData2.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(aggWithWatermark)( @@ -174,8 +174,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val aggWithWatermark = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) // Unlike the ProcessingTime trigger, Trigger.Once only runs one trigger every time @@ -229,8 +229,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val aggWithWatermark = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) @@ -291,8 +291,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val windowedAggregation = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(windowedAggregation)( @@ -316,8 +316,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val windowedAggregation = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(windowedAggregation, OutputMode.Update)( @@ -346,8 +346,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val aggWithWatermark = input.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "2 years 5 months") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) def monthsSinceEpoch(date: Date): Int = { @@ -378,8 +378,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val df = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(df)( @@ -413,17 +413,17 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val firstDf = first.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .select('value) + .select(Symbol("value")) val second = MemoryStream[Int] val secondDf = second.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "5 seconds") - .select('value) + .select(Symbol("value")) withTempDir { checkpointDir => - val unionWriter = firstDf.union(secondDf).agg(sum('value)) + val unionWriter = firstDf.union(secondDf).agg(sum(Symbol("value"))) .writeStream .option("checkpointLocation", checkpointDir.getCanonicalPath) .format("memory") @@ -490,8 +490,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val windowedAggregation = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) // No eviction when asked to compute complete results. @@ -516,7 +516,7 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") .groupBy($"eventTime") - .agg(count("*") as 'count) + .agg(count("*") as Symbol("count")) .select($"eventTime".cast("long").as[Long], $"count".as[Long]) testStream(windowedAggregation)( @@ -587,7 +587,7 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val groupEvents = input .withWatermark("eventTime", "2 seconds") .groupBy("symbol", "eventTime") - .agg(count("price") as 'count) + .agg(count("price") as Symbol("count")) .select("symbol", "eventTime", "count") val q = groupEvents.writeStream .outputMode("append") @@ -606,14 +606,14 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val aliasWindow = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .select(window($"eventTime", "5 seconds") as 'aliasWindow) + .select(window($"eventTime", "5 seconds") as Symbol("aliasWindow")) // Check the eventTime metadata is kept in the top level alias. assert(aliasWindow.logicalPlan.output.exists( _.metadata.contains(EventTimeWatermark.delayKey))) val windowedAggregation = aliasWindow - .groupBy('aliasWindow) - .agg(count("*") as 'count) + .groupBy(Symbol("aliasWindow")) + .agg(count("*") as Symbol("count")) .select($"aliasWindow".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(windowedAggregation)( @@ -636,8 +636,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche val windowedAggregation = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(windowedAggregation)( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index e89197b5ff26c..71e8ae74fe207 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -216,7 +216,7 @@ class StreamSuite extends StreamTest { query.processAllAvailable() // Parquet write page-level CRC checksums will change the file size and // affect the data order when reading these files. Please see PARQUET-1746 for details. - val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort('a).as[Long] + val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort(Symbol("a")).as[Long] checkDataset[Long](outputDf, (0L to 10L).toArray: _*) } finally { query.stop() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala index 8a7bb8b60c878..a183e6b4e3950 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala @@ -109,7 +109,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { val aggregated = inputData.toDF() - .select($"*", explode($"_2") as 'value) + .select($"*", explode($"_2") as Symbol("value")) .groupBy($"_1") .agg(size(collect_set($"value"))) .as[(Int, Int)] @@ -190,8 +190,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { val aggWithWatermark = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) implicit class RichStreamExecution(query: StreamExecution) { @@ -413,13 +413,13 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { inputDataOne.toDF() .groupBy($"value") .agg(count("*")) - .where('value >= current_timestamp().cast("long") - 10L) + .where(Symbol("value") >= current_timestamp().cast("long") - 10L) val inputDataTwo = MemoryStream[Long] val aggregatedTwo = inputDataTwo.toDF() .groupBy($"value") .agg(count("*")) - .where('value >= localtimestamp().cast(TimestampType).cast("long") - 10L) + .where(Symbol("value") >= localtimestamp().cast(TimestampType).cast("long") - 10L) Seq((inputDataOne, aggregatedOne), (inputDataTwo, aggregatedTwo)).foreach { x => val inputData = x._1 @@ -475,7 +475,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { val inputData = MemoryStream[Long] val aggregated = inputData.toDF() - .select(to_utc_timestamp(from_unixtime('value * SECONDS_PER_DAY), tz)) + .select(to_utc_timestamp(from_unixtime(Symbol("value") * SECONDS_PER_DAY), tz)) .toDF("value") .groupBy($"value") .agg(count("*")) @@ -522,12 +522,12 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { val streamInput = MemoryStream[Int] val batchDF = Seq(1, 2, 3, 4, 5) .toDF("value") - .withColumn("parity", 'value % 2) - .groupBy('parity) - .agg(count("*") as 'joinValue) + .withColumn("parity", Symbol("value") % 2) + .groupBy(Symbol("parity")) + .agg(count("*") as Symbol("joinValue")) val joinDF = streamInput .toDF() - .join(batchDF, 'value === 'parity) + .join(batchDF, Symbol("value") === Symbol("parity")) // make sure we're planning an aggregate in the first place assert(batchDF.queryExecution.optimizedPlan match { case _: Aggregate => true }) @@ -639,7 +639,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { def createDf(partitions: Int): Dataset[(Long, Long)] = { spark.readStream .format((new MockSourceProvider).getClass.getCanonicalName) - .load().coalesce(partitions).groupBy('a % 1).count().as[(Long, Long)] + .load().coalesce(partitions).groupBy(Symbol("a") % 1).count().as[(Long, Long)] } testStream(createDf(1), Complete())( @@ -677,7 +677,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { testWithAllStateVersions("SPARK-22230: last should change with new batches") { val input = MemoryStream[Int] - val aggregated = input.toDF().agg(last('value)) + val aggregated = input.toDF().agg(last(Symbol("value"))) testStream(aggregated, OutputMode.Complete())( AddData(input, 1, 2, 3), CheckLastBatch(3), @@ -853,8 +853,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { val aggWithWatermark = inputData.toDF() .withColumn("eventTime", timestamp_seconds($"value")) .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) inputData.reset() // reset the input to clear any data from prev test diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala index aa03da6c5843f..c1908d95f39e3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala @@ -146,8 +146,8 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest { .withWatermark("eventTime", "10 seconds") .dropDuplicates() .withWatermark("eventTime", "10 seconds") - .groupBy(window($"eventTime", "5 seconds") as 'window) - .agg(count("*") as 'count) + .groupBy(window($"eventTime", "5 seconds") as Symbol("window")) + .agg(count("*") as Symbol("count")) .select($"window".getField("start").cast("long").as[Long], $"count".as[Long]) testStream(windowedaggregate)( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index 2fbe6c4fed392..29caaf7289d6f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -56,9 +56,9 @@ abstract class StreamingJoinSuite val input = MemoryStream[Int] val df = input.toDF .select( - 'value as "key", + Symbol("value") as "key", timestamp_seconds($"value") as s"${prefix}Time", - ('value * multiplier) as s"${prefix}Value") + (Symbol("value") * multiplier) as s"${prefix}Value") .withWatermark(s"${prefix}Time", "10 seconds") (input, df) @@ -69,13 +69,16 @@ abstract class StreamingJoinSuite val (input1, df1) = setupStream("left", 2) val (input2, df2) = setupStream("right", 3) - val windowed1 = df1.select('key, window('leftTime, "10 second"), 'leftValue) - val windowed2 = df2.select('key, window('rightTime, "10 second"), 'rightValue) + val windowed1 = df1 + .select(Symbol("key"), window(Symbol("leftTime"), "10 second"), Symbol("leftValue")) + val windowed2 = df2 + .select(Symbol("key"), window(Symbol("rightTime"), "10 second"), Symbol("rightValue")) val joined = windowed1.join(windowed2, Seq("key", "window"), joinType) val select = if (joinType == "left_semi") { - joined.select('key, $"window.end".cast("long"), 'leftValue) + joined.select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue")) } else { - joined.select('key, $"window.end".cast("long"), 'leftValue, 'rightValue) + joined.select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), + Symbol("rightValue")) } (input1, input2, select) @@ -87,25 +90,29 @@ abstract class StreamingJoinSuite val (leftInput, df1) = setupStream("left", 2) val (rightInput, df2) = setupStream("right", 3) // Use different schemas to ensure the null row is being generated from the correct side. - val left = df1.select('key, window('leftTime, "10 second"), 'leftValue) - val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string")) + val left = df1.select(Symbol("key"), window(Symbol("leftTime"), "10 second"), + Symbol("leftValue")) + val right = df2.select(Symbol("key"), window(Symbol("rightTime"), "10 second"), + Symbol("rightValue").cast("string")) val joined = left.join( right, left("key") === right("key") && left("window") === right("window") - && 'leftValue > 4, + && Symbol("leftValue") > 4, joinType) val select = if (joinType == "left_semi") { - joined.select(left("key"), left("window.end").cast("long"), 'leftValue) + joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue")) } else if (joinType == "left_outer") { - joined.select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue) + joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"), + Symbol("rightValue")) } else if (joinType == "right_outer") { - joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue) + joined.select(right("key"), right("window.end").cast("long"), Symbol("leftValue"), + Symbol("rightValue")) } else { - joined.select(left("key"), left("window.end").cast("long"), 'leftValue, - right("key"), right("window.end").cast("long"), 'rightValue) + joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"), + right("key"), right("window.end").cast("long"), Symbol("rightValue")) } (leftInput, rightInput, select) @@ -117,25 +124,29 @@ abstract class StreamingJoinSuite val (leftInput, df1) = setupStream("left", 2) val (rightInput, df2) = setupStream("right", 3) // Use different schemas to ensure the null row is being generated from the correct side. - val left = df1.select('key, window('leftTime, "10 second"), 'leftValue) - val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string")) + val left = df1.select(Symbol("key"), window(Symbol("leftTime"), "10 second"), + Symbol("leftValue")) + val right = df2.select(Symbol("key"), window(Symbol("rightTime"), "10 second"), + Symbol("rightValue").cast("string")) val joined = left.join( right, left("key") === right("key") && left("window") === right("window") - && 'rightValue.cast("int") > 7, + && Symbol("rightValue").cast("int") > 7, joinType) val select = if (joinType == "left_semi") { - joined.select(left("key"), left("window.end").cast("long"), 'leftValue) + joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue")) } else if (joinType == "left_outer") { - joined.select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue) + joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"), + Symbol("rightValue")) } else if (joinType == "right_outer") { - joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue) + joined.select(right("key"), right("window.end").cast("long"), Symbol("leftValue"), + Symbol("rightValue")) } else { - joined.select(left("key"), left("window.end").cast("long"), 'leftValue, - right("key"), right("window.end").cast("long"), 'rightValue) + joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"), + right("key"), right("window.end").cast("long"), Symbol("rightValue")) } (leftInput, rightInput, select) @@ -152,12 +163,13 @@ abstract class StreamingJoinSuite val rightInput = MemoryStream[(Int, Int)] val df1 = leftInput.toDF.toDF("leftKey", "time") - .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue") + .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime", + (Symbol("leftKey") * 2) as "leftValue") .withWatermark("leftTime", watermark) val df2 = rightInput.toDF.toDF("rightKey", "time") - .select('rightKey, timestamp_seconds($"time") as "rightTime", - ('rightKey * 3) as "rightValue") + .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime", + (Symbol("rightKey") * 3) as "rightValue") .withWatermark("rightTime", watermark) val joined = @@ -168,9 +180,10 @@ abstract class StreamingJoinSuite joinType) val select = if (joinType == "left_semi") { - joined.select('leftKey, 'leftTime.cast("int")) + joined.select(Symbol("leftKey"), Symbol("leftTime").cast("int")) } else { - joined.select('leftKey, 'rightKey, 'leftTime.cast("int"), 'rightTime.cast("int")) + joined.select(Symbol("leftKey"), Symbol("rightKey"), Symbol("leftTime").cast("int"), + Symbol("rightTime").cast("int")) } (leftInput, rightInput, select) @@ -217,8 +230,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val input1 = MemoryStream[Int] val input2 = MemoryStream[Int] - val df1 = input1.toDF.select('value as "key", ('value * 2) as "leftValue") - val df2 = input2.toDF.select('value as "key", ('value * 3) as "rightValue") + val df1 = input1.toDF.select(Symbol("value") as "key", (Symbol("value") * 2) as "leftValue") + val df2 = input2.toDF.select(Symbol("value") as "key", (Symbol("value") * 3) as "rightValue") val joined = df1.join(df2, "key") testStream(joined)( @@ -247,17 +260,17 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val input2 = MemoryStream[Int] val df1 = input1.toDF - .select('value as "key", timestamp_seconds($"value") as "timestamp", - ('value * 2) as "leftValue") - .select('key, window('timestamp, "10 second"), 'leftValue) + .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp", + (Symbol("value") * 2) as "leftValue") + .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue")) val df2 = input2.toDF - .select('value as "key", timestamp_seconds($"value") as "timestamp", - ('value * 3) as "rightValue") - .select('key, window('timestamp, "10 second"), 'rightValue) + .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp", + (Symbol("value") * 3) as "rightValue") + .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue")) val joined = df1.join(df2, Seq("key", "window")) - .select('key, $"window.end".cast("long"), 'leftValue, 'rightValue) + .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue")) testStream(joined)( AddData(input1, 1), @@ -288,18 +301,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val input2 = MemoryStream[Int] val df1 = input1.toDF - .select('value as "key", timestamp_seconds($"value") as "timestamp", - ('value * 2) as "leftValue") + .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp", + (Symbol("value") * 2) as "leftValue") .withWatermark("timestamp", "10 seconds") - .select('key, window('timestamp, "10 second"), 'leftValue) + .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue")) val df2 = input2.toDF - .select('value as "key", timestamp_seconds($"value") as "timestamp", - ('value * 3) as "rightValue") - .select('key, window('timestamp, "10 second"), 'rightValue) + .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp", + (Symbol("value") * 3) as "rightValue") + .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue")) val joined = df1.join(df2, Seq("key", "window")) - .select('key, $"window.end".cast("long"), 'leftValue, 'rightValue) + .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue")) testStream(joined)( AddData(input1, 1), @@ -339,17 +352,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val rightInput = MemoryStream[(Int, Int)] val df1 = leftInput.toDF.toDF("leftKey", "time") - .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue") + .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime", + (Symbol("leftKey") * 2) as "leftValue") .withWatermark("leftTime", "10 seconds") val df2 = rightInput.toDF.toDF("rightKey", "time") - .select('rightKey, timestamp_seconds($"time") as "rightTime", - ('rightKey * 3) as "rightValue") + .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime", + (Symbol("rightKey") * 3) as "rightValue") .withWatermark("rightTime", "10 seconds") val joined = df1.join(df2, expr("leftKey = rightKey AND leftTime < rightTime - interval 5 seconds")) - .select('leftKey, 'leftTime.cast("int"), 'rightTime.cast("int")) + .select(Symbol("leftKey"), Symbol("leftTime").cast("int"), Symbol("rightTime").cast("int")) testStream(joined)( AddData(leftInput, (1, 5)), @@ -398,12 +412,13 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val rightInput = MemoryStream[(Int, Int)] val df1 = leftInput.toDF.toDF("leftKey", "time") - .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue") + .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime", + (Symbol("leftKey") * 2) as "leftValue") .withWatermark("leftTime", "20 seconds") val df2 = rightInput.toDF.toDF("rightKey", "time") - .select('rightKey, timestamp_seconds($"time") as "rightTime", - ('rightKey * 3) as "rightValue") + .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime", + (Symbol("rightKey") * 3) as "rightValue") .withWatermark("rightTime", "30 seconds") val condition = expr( @@ -432,7 +447,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { // drop state where rightTime < eventTime - 5 val joined = - df1.join(df2, condition).select('leftKey, 'leftTime.cast("int"), 'rightTime.cast("int")) + df1.join(df2, condition).select(Symbol("leftKey"), Symbol("leftTime").cast("int"), + Symbol("rightTime").cast("int")) testStream(joined)( // If leftTime = 20, then it match only with rightTime = [15, 30] @@ -479,8 +495,10 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val input1 = MemoryStream[Int] val input2 = MemoryStream[Int] - val df1 = input1.toDF.select('value as "leftKey", ('value * 2) as "leftValue") - val df2 = input2.toDF.select('value as "rightKey", ('value * 3) as "rightValue") + val df1 = input1.toDF + .select(Symbol("value") as "leftKey", (Symbol("value") * 2) as "leftValue") + val df2 = input2.toDF + .select(Symbol("value") as "rightKey", (Symbol("value") * 3) as "rightValue") val joined = df1.join(df2, expr("leftKey < rightKey")) val e = intercept[Exception] { val q = joined.writeStream.format("memory").queryName("test").start() @@ -494,8 +512,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val input = MemoryStream[Int] val df = input.toDF val join = - df.select('value % 5 as "key", 'value).join( - df.select('value % 5 as "key", 'value), "key") + df.select(Symbol("value") % 5 as "key", Symbol("value")).join( + df.select(Symbol("value") % 5 as "key", Symbol("value")), "key") testStream(join)( AddData(input, 1, 2), @@ -559,9 +577,11 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val input2 = MemoryStream[Int] val input3 = MemoryStream[Int] - val df1 = input1.toDF.select('value as "leftKey", ('value * 2) as "leftValue") - val df2 = input2.toDF.select('value as "middleKey", ('value * 3) as "middleValue") - val df3 = input3.toDF.select('value as "rightKey", ('value * 5) as "rightValue") + val df1 = input1.toDF.select(Symbol("value") as "leftKey", (Symbol("value") * 2) as "leftValue") + val df2 = input2.toDF + .select(Symbol("value") as "middleKey", (Symbol("value") * 3) as "middleValue") + val df3 = input3.toDF + .select(Symbol("value") as "rightKey", (Symbol("value") * 5) as "rightValue") val joined = df1.join(df2, expr("leftKey = middleKey")).join(df3, expr("rightKey = middleKey")) @@ -576,9 +596,12 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val input1 = MemoryStream[Int] val input2 = MemoryStream[Int] - val df1 = input1.toDF.select('value as 'a, 'value * 2 as 'b) - val df2 = input2.toDF.select('value as 'a, 'value * 2 as 'b).repartition('b) - val joined = df1.join(df2, Seq("a", "b")).select('a) + val df1 = input1.toDF + .select(Symbol("value") as Symbol("a"), Symbol("value") * 2 as Symbol("b")) + val df2 = input2.toDF + .select(Symbol("value") as Symbol("a"), Symbol("value") * 2 as Symbol("b")) + .repartition(Symbol("b")) + val joined = df1.join(df2, Seq("a", "b")).select(Symbol("a")) testStream(joined)( AddData(input1, 1.to(1000): _*), @@ -667,18 +690,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite { val input2 = MemoryStream[Int] val df1 = input1.toDF - .select('value as "key", timestamp_seconds($"value") as "timestamp", - ('value * 2) as "leftValue") + .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp", + (Symbol("value") * 2) as "leftValue") .withWatermark("timestamp", "10 seconds") - .select('key, window('timestamp, "10 second"), 'leftValue) + .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue")) val df2 = input2.toDF - .select('value as "key", timestamp_seconds($"value") as "timestamp", - ('value * 3) as "rightValue") - .select('key, window('timestamp, "10 second"), 'rightValue) + .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp", + (Symbol("value") * 3) as "rightValue") + .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue")) val joined = df1.join(df2, Seq("key", "window")) - .select('key, $"window.end".cast("long"), 'leftValue, 'rightValue) + .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue")) testStream(joined)( StartStream(additionalConfs = Map(SQLConf.SHUFFLE_PARTITIONS.key -> "3")), @@ -924,15 +947,19 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { val (leftInput, simpleLeftDf) = setupStream("left", 2) val (rightInput, simpleRightDf) = setupStream("right", 3) - val left = simpleLeftDf.select('key, window('leftTime, "10 second"), 'leftValue) - val right = simpleRightDf.select('key, window('rightTime, "10 second"), 'rightValue) + val left = simpleLeftDf + .select(Symbol("key"), window(Symbol("leftTime"), "10 second"), Symbol("leftValue")) + val right = simpleRightDf + .select(Symbol("key"), window(Symbol("rightTime"), "10 second"), Symbol("rightValue")) val joined = left.join( right, left("key") === right("key") && left("window") === right("window") && - 'leftValue > 10 && ('rightValue < 300 || 'rightValue > 1000), + Symbol("leftValue") > 10 && + (Symbol("rightValue") < 300 || Symbol("rightValue") > 1000), "left_outer") - .select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue) + .select(left("key"), left("window.end").cast("long"), Symbol("leftValue"), + Symbol("rightValue")) testStream(joined)( // leftValue <= 10 should generate outer join rows even though it matches right keys @@ -1123,9 +1150,9 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { val input1 = MemoryStream[Int](desiredPartitionsForInput1) val df1 = input1.toDF .select( - 'value as "key", - 'value as "leftValue", - 'value as "rightValue") + Symbol("value") as "key", + Symbol("value") as "leftValue", + Symbol("value") as "rightValue") val (input2, df2) = setupStream("left", 2) val (input3, df3) = setupStream("right", 3) @@ -1133,7 +1160,7 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { .join(df3, df2("key") === df3("key") && df2("leftTime") === df3("rightTime"), "inner") - .select(df2("key"), 'leftValue, 'rightValue) + .select(df2("key"), Symbol("leftValue"), Symbol("rightValue")) (input1, input2, input3, df1.union(joined)) } @@ -1316,15 +1343,15 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { "_2 * 3 as rightValue") .withWatermark("rightTime", "10 seconds") - val windowed1 = df1.select('leftKey1, 'leftKey2, - window('leftTime, "10 second").as('leftWindow), 'leftValue) - val windowed2 = df2.select('rightKey1, 'rightKey2, - window('rightTime, "10 second").as('rightWindow), 'rightValue) + val windowed1 = df1.select(Symbol("leftKey1"), Symbol("leftKey2"), + window(Symbol("leftTime"), "10 second").as(Symbol("leftWindow")), Symbol("leftValue")) + val windowed2 = df2.select(Symbol("rightKey1"), Symbol("rightKey2"), + window(Symbol("rightTime"), "10 second").as(Symbol("rightWindow")), Symbol("rightValue")) windowed1.join(windowed2, expr("leftKey1 <=> rightKey1 AND leftKey2 = rightKey2 AND leftWindow = rightWindow"), "left_outer" - ).select('leftKey1, 'rightKey1, 'leftKey2, 'rightKey2, $"leftWindow.end".cast("long"), - 'leftValue, 'rightValue) + ).select(Symbol("leftKey1"), Symbol("rightKey1"), Symbol("leftKey2"), Symbol("rightKey2"), + $"leftWindow.end".cast("long"), Symbol("leftValue"), Symbol("rightValue")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala index 99fcef109a07c..7bc4288b2c1c4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala @@ -237,7 +237,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually { val inputData = MemoryStream[Int] val query = inputData.toDS().toDF("value") - .select('value) + .select(Symbol("value")) .groupBy($"value") .agg(count("*")) .writeStream diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala index 54bed5c966d1f..84060733e865c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala @@ -860,8 +860,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi val baseDf = Seq((1, "A"), (2, "b")).toDF("num", "char").where("char = 'A'") val otherDf = stream.toDF().toDF("num", "numSq") .join(broadcast(baseDf), "num") - .groupBy('char) - .agg(sum('numSq)) + .groupBy(Symbol("char")) + .agg(sum(Symbol("numSq"))) testStream(otherDf, OutputMode.Complete())( AddData(stream, (1, 1), (2, 4)), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala index e82b9df93dd7d..d0f3a87acbc29 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala @@ -417,7 +417,7 @@ class StreamingSessionWindowSuite extends StreamTest .selectExpr("explode(split(value, ' ')) AS sessionId", "eventTime") events - .groupBy(sessionWindow as 'session, 'sessionId) + .groupBy(sessionWindow as Symbol("session"), Symbol("sessionId")) .agg(count("*").as("numEvents")) .selectExpr("sessionId", "CAST(session.start AS LONG)", "CAST(session.end AS LONG)", "CAST(session.end AS LONG) - CAST(session.start AS LONG) AS durationMs", @@ -429,8 +429,8 @@ class StreamingSessionWindowSuite extends StreamTest .selectExpr("*") .withColumn("eventTime", $"value".cast("timestamp")) .withWatermark("eventTime", "10 seconds") - .groupBy(session_window($"eventTime", "5 seconds") as 'session) - .agg(count("*") as 'count, sum("value") as 'sum) + .groupBy(session_window($"eventTime", "5 seconds") as Symbol("session")) + .agg(count("*") as Symbol("count"), sum("value") as Symbol("sum")) .select($"session".getField("start").cast("long").as[Long], $"session".getField("end").cast("long").as[Long], $"count".as[Long], $"sum".as[Long]) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 0e2fcfbd46356..5893c3da09812 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -257,7 +257,7 @@ class ContinuousSuite extends ContinuousSuiteBase { .option("numPartitions", "2") .option("rowsPerSecond", "2") .load() - .select('value) + .select(Symbol("value")) val query = df.writeStream .format("memory") @@ -306,7 +306,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase { .option("numPartitions", "5") .option("rowsPerSecond", "500") .load() - .select('value) + .select(Symbol("value")) testStream(df)( StartStream(longContinuousTrigger), @@ -326,7 +326,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase { .option("numPartitions", "5") .option("rowsPerSecond", "500") .load() - .select('value) + .select(Symbol("value")) testStream(df)( StartStream(Trigger.Continuous(2012)), @@ -345,7 +345,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase { .option("numPartitions", "5") .option("rowsPerSecond", "500") .load() - .select('value) + .select(Symbol("value")) testStream(df)( StartStream(Trigger.Continuous(1012)), @@ -436,7 +436,7 @@ class ContinuousEpochBacklogSuite extends ContinuousSuiteBase { .option("numPartitions", "2") .option("rowsPerSecond", "500") .load() - .select('value) + .select(Symbol("value")) testStream(df)( StartStream(Trigger.Continuous(1)), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala index fc78527af381e..c40ba02fd0dd8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala @@ -553,7 +553,10 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { val createArray = udf { (length: Long) => for (i <- 1 to length.toInt) yield i.toString } - spark.range(4).select(createArray('id + 1) as 'ex, 'id, 'id % 4 as 'part).coalesce(1).write + spark.range(4) + .select(createArray(Symbol("id") + 1) as Symbol("ex"), Symbol("id"), + Symbol("id") % 4 as Symbol("part")) + .coalesce(1).write .partitionBy("part", "id") .mode("overwrite") .parquet(src.toString) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index cb3bd29c27991..dabd9c001eb3d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -880,7 +880,8 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with val createArray = udf { (length: Long) => for (i <- 1 to length.toInt) yield i.toString } - spark.range(4).select(createArray('id + 1) as 'ex, 'id, 'id % 4 as 'part).coalesce(1).write + spark.range(4).select(createArray(Symbol("id") + 1) as Symbol("ex"), + Symbol("id"), Symbol("id") % 4 as Symbol("part")).coalesce(1).write .partitionBy("part", "id") .mode("overwrite") .parquet(src.toString) diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala index baa04ada8b5d1..11201aadf67f8 100644 --- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala @@ -152,7 +152,7 @@ class SqlResourceSuite extends SparkFunSuite with PrivateMethodTester { import SqlResourceSuite._ val sqlResource = new SqlResource() - val prepareExecutionData = PrivateMethod[ExecutionData]('prepareExecutionData) + val prepareExecutionData = PrivateMethod[ExecutionData](Symbol("prepareExecutionData")) test("Prepare ExecutionData when details = false and planDescription = false") { val executionData = @@ -196,7 +196,7 @@ class SqlResourceSuite extends SparkFunSuite with PrivateMethodTester { } test("Parse wholeStageCodegenId from nodeName") { - val getWholeStageCodegenId = PrivateMethod[Option[Long]]('getWholeStageCodegenId) + val getWholeStageCodegenId = PrivateMethod[Option[Long]](Symbol("getWholeStageCodegenId")) val wholeStageCodegenId = sqlResource invokePrivate getWholeStageCodegenId(WHOLE_STAGE_CODEGEN_1) assert(wholeStageCodegenId == Some(1))