diff --git a/.github/workflows/util/install-spark-resources.sh b/.github/workflows/util/install-spark-resources.sh
index 4d1dd27a9c45..5d7216e54e20 100755
--- a/.github/workflows/util/install-spark-resources.sh
+++ b/.github/workflows/util/install-spark-resources.sh
@@ -114,10 +114,10 @@ case "$1" in
cd ${INSTALL_DIR} && \
install_spark "3.5.5" "3" "2.13"
;;
-4.0)
- # Spark-4.0, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
+4.1)
+ # Spark-4.x, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
cd ${INSTALL_DIR} && \
- install_spark "4.0.1" "3" "2.12"
+ install_spark "4.1.0" "3" "2.12"
;;
*)
echo "Spark version is expected to be specified."
diff --git a/.github/workflows/velox_backend_x86.yml b/.github/workflows/velox_backend_x86.yml
index 1f3df3eaa8b3..a1e2db10a864 100644
--- a/.github/workflows/velox_backend_x86.yml
+++ b/.github/workflows/velox_backend_x86.yml
@@ -1378,7 +1378,7 @@ jobs:
ccache -s
"
- spark-test-spark40:
+ spark-test-spark41:
needs: build-native-lib-centos-7
runs-on: ubuntu-22.04
env:
@@ -1403,11 +1403,11 @@ jobs:
pip3 install setuptools==77.0.3 && \
pip3 install pyspark==3.5.5 cython && \
pip3 install pandas==2.2.3 pyarrow==20.0.0
- - name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image update
+ - name: Prepare Spark Resources for Spark 4.1.0 #TODO remove after image update
run: |
- rm -rf /opt/shims/spark40
- bash .github/workflows/util/install-spark-resources.sh 4.0
- mv /opt/shims/spark40/spark_home/assembly/target/scala-2.12 /opt/shims/spark40/spark_home/assembly/target/scala-2.13
+ rm -rf /opt/shims/spark41
+ bash .github/workflows/util/install-spark-resources.sh 4.1
+ mv /opt/shims/spark41/spark_home/assembly/target/scala-2.12 /opt/shims/spark41/spark_home/assembly/target/scala-2.13
- name: Build and Run unit test for Spark 4.0.0 with scala-2.13 (other tests)
run: |
cd $GITHUB_WORKSPACE/
@@ -1417,7 +1417,7 @@ jobs:
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean test -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox \
- -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark40/spark_home/" \
+ -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest
- name: Upload test report
if: always()
@@ -1435,7 +1435,7 @@ jobs:
**/gluten-ut/**/hs_err_*.log
**/gluten-ut/**/core.*
- spark-test-spark40-slow:
+ spark-test-spark41-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-22.04
env:
@@ -1453,11 +1453,11 @@ jobs:
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- - name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image update
+ - name: Prepare Spark Resources for Spark 4.1.0 #TODO remove after image update
run: |
- rm -rf /opt/shims/spark40
- bash .github/workflows/util/install-spark-resources.sh 4.0
- mv /opt/shims/spark40/spark_home/assembly/target/scala-2.12 /opt/shims/spark40/spark_home/assembly/target/scala-2.13
+ rm -rf /opt/shims/spark41
+ bash .github/workflows/util/install-spark-resources.sh 4.1
+ mv /opt/shims/spark41/spark_home/assembly/target/scala-2.12 /opt/shims/spark41/spark_home/assembly/target/scala-2.13
- name: Build and Run unit test for Spark 4.0 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
@@ -1466,7 +1466,7 @@ jobs:
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean test -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pspark-ut \
- -DargLine="-Dspark.test.home=/opt/shims/spark40/spark_home/" \
+ -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
diff --git a/backends-velox/src/main/scala/org/apache/gluten/extension/ArrowConvertorRule.scala b/backends-velox/src/main/scala/org/apache/gluten/extension/ArrowConvertorRule.scala
index 25371be8d1fa..f5e0b8526abd 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/extension/ArrowConvertorRule.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/extension/ArrowConvertorRule.scala
@@ -29,8 +29,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.util.PermissiveMode
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.csv.CSVTable
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2RelationShim.CSVTableExtractor
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.utils.SparkArrowUtil
@@ -56,25 +55,15 @@ case class ArrowConvertorRule(session: SparkSession) extends Rule[LogicalPlan] {
l.copy(relation = r.copy(fileFormat = new ArrowCSVFileFormat(csvOptions))(session))
case _ => l
}
- case d @ DataSourceV2Relation(
- t @ CSVTable(
- name,
- sparkSession,
- options,
- paths,
- userSpecifiedSchema,
- fallbackFileFormat),
- _,
- _,
- _,
- _) if validate(session, t.dataSchema, options.asCaseSensitiveMap().toMap) =>
+ case CSVTableExtractor(d, t)
+ if validate(session, t.dataSchema, t.options.asCaseSensitiveMap().toMap) =>
d.copy(table = ArrowCSVTable(
- "arrow" + name,
- sparkSession,
- options,
- paths,
- userSpecifiedSchema,
- fallbackFileFormat))
+ "arrow" + t.name,
+ t.sparkSession,
+ t.options,
+ t.paths,
+ t.userSpecifiedSchema,
+ t.fallbackFileFormat))
case r =>
r
}
diff --git a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
index 6239ab5ad749..ab76cba4aa5d 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
@@ -21,7 +21,7 @@ import org.apache.gluten.sql.shims.SparkShimLoader
import org.apache.spark.internal.Logging
import org.apache.spark.sql.execution.datasources.DataSourceUtils
-import org.apache.spark.sql.execution.datasources.parquet.{ParquetFooterReader, ParquetOptions}
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetFooterReaderShim, ParquetOptions}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}
@@ -135,7 +135,7 @@ object ParquetMetadataUtils extends Logging {
parquetOptions: ParquetOptions): Option[String] = {
val footer =
try {
- ParquetFooterReader.readFooter(conf, fileStatus, ParquetMetadataConverter.NO_FILTER)
+ ParquetFooterReaderShim.readFooter(conf, fileStatus, ParquetMetadataConverter.NO_FILTER)
} catch {
case e: Exception if ExceptionUtils.hasCause(e, classOf[ParquetCryptoRuntimeException]) =>
return Some("Encrypted Parquet footer detected.")
diff --git a/backends-velox/src/main/scala/org/apache/spark/api/python/ColumnarArrowEvalPythonExec.scala b/backends-velox/src/main/scala/org/apache/spark/api/python/ColumnarArrowEvalPythonExec.scala
index bfa33a804c38..033f8a428470 100644
--- a/backends-velox/src/main/scala/org/apache/spark/api/python/ColumnarArrowEvalPythonExec.scala
+++ b/backends-velox/src/main/scala/org/apache/spark/api/python/ColumnarArrowEvalPythonExec.scala
@@ -43,6 +43,7 @@ import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter}
import java.io.{DataInputStream, DataOutputStream}
import java.util.concurrent.atomic.AtomicBoolean
+import scala.annotation.nowarn
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
@@ -362,7 +363,7 @@ case class ColumnarArrowEvalPythonExec(
StructField(s"_$i", dt)
}.toSeq)
- val contextAwareIterator = new ContextAwareIterator(context, iter)
+ @nowarn val contextAwareIterator = new ContextAwareIterator(context, iter)
val inputCbCache = new ArrayBuffer[ColumnarBatch]()
var start_time: Long = 0
val inputBatchIter = contextAwareIterator.map {
diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
index c3132420916a..cfddfb8e21e3 100644
--- a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
+++ b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
@@ -753,7 +753,11 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
val df = sql("SELECT 1")
checkAnswer(df, Row(1))
val plan = df.queryExecution.executedPlan
- assert(plan.find(_.isInstanceOf[RDDScanExec]).isDefined)
+ if (isSparkVersionGE("4.1")) {
+ assert(plan.find(_.getClass.getSimpleName == "OneRowRelationExec").isDefined)
+ } else {
+ assert(plan.find(_.isInstanceOf[RDDScanExec]).isDefined)
+ }
assert(plan.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
assert(plan.find(_.isInstanceOf[RowToVeloxColumnarExec]).isDefined)
}
diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala
index 53f44a2ccc81..5958baa3771f 100644
--- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala
+++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala
@@ -92,12 +92,9 @@ class VeloxHashJoinSuite extends VeloxWholeStageTransformerSuite {
// The computing is combined into one single whole stage transformer.
val wholeStages = plan.collect { case wst: WholeStageTransformer => wst }
- if (SparkShimLoader.getSparkVersion.startsWith("3.2.")) {
+ if (isSparkVersionLE("3.2")) {
assert(wholeStages.length == 1)
- } else if (
- SparkShimLoader.getSparkVersion.startsWith("3.5.") ||
- SparkShimLoader.getSparkVersion.startsWith("4.0.")
- ) {
+ } else if (isSparkVersionGE("3.5")) {
assert(wholeStages.length == 5)
} else {
assert(wholeStages.length == 3)
diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
index 06f0acb784b8..37f13bcea8c3 100644
--- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
+++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
@@ -544,7 +544,8 @@ class VeloxStringFunctionsSuite extends VeloxWholeStageTransformerSuite {
s"from $LINEITEM_TABLE limit 5") { _ => }
}
- testWithMinSparkVersion("split", "3.4") {
+ // TODO: fix on spark-4.1
+ testWithSpecifiedSparkVersion("split", "3.4", "3.5") {
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, '') " +
s"from $LINEITEM_TABLE limit 5") {
diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
index 52a17995f386..8040cc455c48 100644
--- a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
+++ b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
@@ -39,7 +39,7 @@ class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
.set("spark.executor.cores", "1")
}
- test("arrow_udf test: without projection") {
+ testWithMaxSparkVersion("arrow_udf test: without projection", "4.0") {
lazy val base =
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0))
.toDF("a", "b")
@@ -59,7 +59,7 @@ class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
checkAnswer(df2, expected)
}
- test("arrow_udf test: with unrelated projection") {
+ testWithMaxSparkVersion("arrow_udf test: with unrelated projection", "4.0") {
lazy val base =
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0))
.toDF("a", "b")
diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/caller/CallerInfo.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/caller/CallerInfo.scala
index 732c898285dd..7dfaaaa774c5 100644
--- a/gluten-core/src/main/scala/org/apache/gluten/extension/caller/CallerInfo.scala
+++ b/gluten-core/src/main/scala/org/apache/gluten/extension/caller/CallerInfo.scala
@@ -18,7 +18,7 @@ package org.apache.gluten.extension.caller
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
import org.apache.spark.sql.execution.columnar.InMemoryRelation
-import org.apache.spark.sql.execution.streaming.StreamExecution
+import org.apache.spark.util.SparkVersionUtil
/**
* Helper API that stores information about the call site of the columnar rule. Specific columnar
@@ -70,7 +70,12 @@ object CallerInfo {
}
private def inStreamingCall(stack: Seq[StackTraceElement]): Boolean = {
- stack.exists(_.getClassName.equals(StreamExecution.getClass.getName.split('$').head))
+ val streamName = if (SparkVersionUtil.gteSpark41) {
+ "org.apache.spark.sql.execution.streaming.runtime.StreamExecution"
+ } else {
+ "org.apache.spark.sql.execution.streaming.StreamExecution"
+ }
+ stack.exists(_.getClassName.equals(streamName))
}
private def inBloomFilterStatFunctionCall(stack: Seq[StackTraceElement]): Boolean = {
diff --git a/gluten-core/src/main/scala/org/apache/spark/util/SparkVersionUtil.scala b/gluten-core/src/main/scala/org/apache/spark/util/SparkVersionUtil.scala
index efa0c63dca52..50114ab7023e 100644
--- a/gluten-core/src/main/scala/org/apache/spark/util/SparkVersionUtil.scala
+++ b/gluten-core/src/main/scala/org/apache/spark/util/SparkVersionUtil.scala
@@ -25,6 +25,7 @@ object SparkVersionUtil {
val gteSpark33: Boolean = comparedWithSpark33 >= 0
val gteSpark35: Boolean = comparedWithSpark35 >= 0
val gteSpark40: Boolean = compareMajorMinorVersion((4, 0)) >= 0
+ val gteSpark41: Boolean = compareMajorMinorVersion((4, 1)) >= 0
// Returns X. X < 0 if one < other, x == 0 if one == other, x > 0 if one > other.
def compareMajorMinorVersion(other: (Int, Int)): Int = {
diff --git a/gluten-iceberg/src/main/scala/org/apache/iceberg/spark/source/GlutenIcebergSourceUtil.scala b/gluten-iceberg/src/main/scala/org/apache/iceberg/spark/source/GlutenIcebergSourceUtil.scala
index ebe899f922c1..f1122280c1c1 100644
--- a/gluten-iceberg/src/main/scala/org/apache/iceberg/spark/source/GlutenIcebergSourceUtil.scala
+++ b/gluten-iceberg/src/main/scala/org/apache/iceberg/spark/source/GlutenIcebergSourceUtil.scala
@@ -33,6 +33,7 @@ import org.apache.iceberg.spark.SparkSchemaUtil
import java.lang.{Class, Long => JLong}
import java.util.{ArrayList => JArrayList, HashMap => JHashMap, List => JList, Map => JMap}
+import scala.annotation.nowarn
import scala.collection.JavaConverters._
object GlutenIcebergSourceUtil {
@@ -50,6 +51,7 @@ object GlutenIcebergSourceUtil {
}
}
+ @nowarn
def genSplitInfo(
partition: SparkDataSourceRDDPartition,
readPartitionSchema: StructType): SplitInfo = {
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 52f6d31d1d1b..418de8578f5c 100644
--- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -856,13 +856,6 @@ object ExpressionConverter extends SQLConfHelper with Logging {
dateAdd.children,
dateAdd
)
- case timeAdd: TimeAdd =>
- BackendsApiManager.getSparkPlanExecApiInstance.genDateAddTransformer(
- attributeSeq,
- substraitExprName,
- timeAdd.children,
- timeAdd
- )
case ss: StringSplit =>
BackendsApiManager.getSparkPlanExecApiInstance.genStringSplitTransformer(
substraitExprName,
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index b0b7c8079315..b13aced2a62c 100644
--- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -179,7 +179,6 @@ object ExpressionMappings {
Sig[Second](EXTRACT),
Sig[FromUnixTime](FROM_UNIXTIME),
Sig[DateAdd](DATE_ADD),
- Sig[TimeAdd](TIMESTAMP_ADD),
Sig[DateSub](DATE_SUB),
Sig[DateDiff](DATE_DIFF),
Sig[ToUnixTimestamp](TO_UNIX_TIMESTAMP),
diff --git a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenImplicits.scala b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenImplicits.scala
index 474a27176906..29036619ae7d 100644
--- a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenImplicits.scala
+++ b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenImplicits.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution
import org.apache.gluten.exception.GlutenException
import org.apache.gluten.execution.{GlutenPlan, WholeStageTransformer}
import org.apache.gluten.utils.PlanUtil
-
import org.apache.spark.sql.{Column, Dataset, SparkSession}
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.{CommandResult, LogicalPlan}
@@ -130,10 +129,8 @@ object GlutenImplicits {
val (innerNumGlutenNodes, innerFallbackNodeToReason) =
withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
// re-plan manually to skip cached data
- val newSparkPlan = QueryExecution.createSparkPlan(
- spark,
- spark.sessionState.planner,
- p.inputPlan.logicalLink.get)
+ val newSparkPlan = QueryExecutionShim.createSparkPlan(
+ spark, spark.sessionState.planner, p.inputPlan.logicalLink.get)
val newExecutedPlan = QueryExecution.prepareExecutedPlan(
spark,
newSparkPlan
diff --git a/gluten-ut/spark40/pom.xml b/gluten-ut/spark40/pom.xml
index 9d5633274a0d..d289c9b4e5c9 100644
--- a/gluten-ut/spark40/pom.xml
+++ b/gluten-ut/spark40/pom.xml
@@ -12,6 +12,10 @@
jar
Gluten Unit Test Spark40
+
+ 1.16.0
+
+
org.apache.gluten
@@ -23,14 +27,14 @@
org.apache.parquet
parquet-column
- 1.15.2
+ ${parquet.version}
tests
test
org.apache.parquet
parquet-common
- 1.15.2
+ ${parquet.version}
tests
test
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/GlutenSQLQueryTestSuite.scala
index b06901b92167..8d1c08c73dff 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/GlutenSQLQueryTestSuite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/GlutenSQLQueryTestSuite.scala
@@ -29,8 +29,8 @@ import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
+import org.apache.spark.sql.catalyst.util.stringToFile
import org.apache.spark.sql.execution.WholeStageCodegenExec
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.TimestampTypes
@@ -40,6 +40,7 @@ import org.apache.spark.util.Utils
import java.io.File
import java.net.URI
+import java.nio.file.Files
import java.util.Locale
import scala.collection.mutable
@@ -320,7 +321,7 @@ class GlutenSQLQueryTestSuite
newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
}
- val input = fileToString(new File(testCase.inputFile))
+ val input = Files.readString(new File(testCase.inputFile).toPath)
val (comments, code) = splitCommentsAndCodes(input)
@@ -331,7 +332,7 @@ class GlutenSQLQueryTestSuite
testCaseName =>
listTestCases.find(_.name == testCaseName).map {
testCase =>
- val input = fileToString(new File(testCase.inputFile))
+ val input = Files.readString(new File(testCase.inputFile).toPath)
val (_, code) = splitCommentsAndCodes(input)
code
}
@@ -738,7 +739,7 @@ class GlutenSQLQueryTestSuite
makeOutput: (String, Option[String], String) => QueryTestOutput): Unit = {
// Read back the golden file.
val expectedOutputs: Seq[QueryTestOutput] = {
- val goldenOutput = fileToString(new File(resultFile))
+ val goldenOutput = Files.readString(new File(resultFile).toPath)
val segments = goldenOutput.split("-- !query.*\n")
val numSegments = outputs.map(_.numSegments).sum + 1
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
index dc96c09bc240..b3f3fec4038f 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
import org.apache.spark.sql.execution.exchange.{BroadcastExchangeLike, ReusedExchangeExec}
import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
import org.apache.spark.sql.execution.metric.SQLMetric
-import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingQueryWrapper}
+import org.apache.spark.sql.execution.streaming.runtime.{MemoryStream, StreamingQueryWrapper}
import org.apache.spark.sql.internal.SQLConf
abstract class GlutenDynamicPartitionPruningSuiteBase
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2Suite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2Suite.scala
index 803c407ca8e7..6b48550b4d54 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2Suite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2Suite.scala
@@ -38,25 +38,25 @@ class GlutenDataSourceV2Suite extends DataSourceV2Suite with GlutenSQLTestsBaseT
val df = spark.read.format(cls.getName).load()
checkAnswer(df, Seq(Row(1, 4), Row(1, 4), Row(3, 6), Row(2, 6), Row(4, 2), Row(4, 2)))
- val groupByColA = df.groupBy('i).agg(sum('j))
+ val groupByColA = df.groupBy(Symbol("i")).agg(sum(Symbol("j")))
checkAnswer(groupByColA, Seq(Row(1, 8), Row(2, 6), Row(3, 6), Row(4, 4)))
assert(collectFirst(groupByColA.queryExecution.executedPlan) {
case e: ColumnarShuffleExchangeExec => e
}.isEmpty)
- val groupByColAB = df.groupBy('i, 'j).agg(count("*"))
+ val groupByColAB = df.groupBy(Symbol("i"), Symbol("j")).agg(count("*"))
checkAnswer(groupByColAB, Seq(Row(1, 4, 2), Row(2, 6, 1), Row(3, 6, 1), Row(4, 2, 2)))
assert(collectFirst(groupByColAB.queryExecution.executedPlan) {
case e: ColumnarShuffleExchangeExec => e
}.isEmpty)
- val groupByColB = df.groupBy('j).agg(sum('i))
+ val groupByColB = df.groupBy(Symbol("j")).agg(sum(Symbol("i")))
checkAnswer(groupByColB, Seq(Row(2, 8), Row(4, 2), Row(6, 5)))
assert(collectFirst(groupByColB.queryExecution.executedPlan) {
case e: ColumnarShuffleExchangeExec => e
}.isDefined)
- val groupByAPlusB = df.groupBy('i + 'j).agg(count("*"))
+ val groupByAPlusB = df.groupBy(Symbol("i") + Symbol("j")).agg(count("*"))
checkAnswer(groupByAPlusB, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1)))
assert(collectFirst(groupByAPlusB.queryExecution.executedPlan) {
case e: ColumnarShuffleExchangeExec => e
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenStreamingQuerySuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenStreamingQuerySuite.scala
index bda9c97eb5d7..d59c04693767 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenStreamingQuerySuite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenStreamingQuerySuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.execution
import org.apache.spark.sql.GlutenSQLTestsTrait
import org.apache.spark.sql.execution.exchange.REQUIRED_BY_STATEFUL_OPERATOR
-import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
import org.apache.spark.sql.streaming._
class GlutenStreamingQuerySuite extends StreamingQuerySuite with GlutenSQLTestsTrait {
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/adaptive/clickhouse/ClickHouseAdaptiveQueryExecSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/adaptive/clickhouse/ClickHouseAdaptiveQueryExecSuite.scala
index 70e6c81c1122..1b6e29065e5f 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/adaptive/clickhouse/ClickHouseAdaptiveQueryExecSuite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/adaptive/clickhouse/ClickHouseAdaptiveQueryExecSuite.scala
@@ -254,13 +254,13 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES.key -> AQEPropagateEmptyRelation.ruleName
) {
- val df1 = spark.range(10).withColumn("a", 'id)
- val df2 = spark.range(10).withColumn("b", 'id)
+ val df1 = spark.range(10).withColumn("a", Symbol("id"))
+ val df2 = spark.range(10).withColumn("b", Symbol("id"))
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
val testDf = df1
- .where('a > 10)
- .join(df2.where('b > 10), Seq("id"), "left_outer")
- .groupBy('a)
+ .where(Symbol("a") > 10)
+ .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer")
+ .groupBy(Symbol("a"))
.count()
checkAnswer(testDf, Seq())
val plan = testDf.queryExecution.executedPlan
@@ -269,9 +269,9 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1") {
val testDf = df1
- .where('a > 10)
- .join(df2.where('b > 10), Seq("id"), "left_outer")
- .groupBy('a)
+ .where(Symbol("a") > 10)
+ .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer")
+ .groupBy(Symbol("a"))
.count()
checkAnswer(testDf, Seq())
val plan = testDf.queryExecution.executedPlan
@@ -346,8 +346,8 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
// +- ShuffleExchange
// After applied the 'OptimizeShuffleWithLocalRead' rule, we can convert all the four
- // shuffle read to local shuffle read in the bottom two 'BroadcastHashJoin'.
- // For the top level 'BroadcastHashJoin', the probe side is not shuffle query stage
+ // shuffle read to local shuffle read in the bottom two Symbol("b")roadcastHashJoin'.
+ // For the top level Symbol("b")roadcastHashJoin', the probe side is not shuffle query stage
// and the build side shuffle query stage is also converted to local shuffle read.
checkNumLocalShuffleReads(adaptivePlan, 0)
}
@@ -652,19 +652,19 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
spark
.range(0, 1000, 1, 10)
.select(
- when('id < 250, 249)
- .when('id >= 750, 1000)
- .otherwise('id)
+ when(Symbol("id") < 250, 249)
+ .when(Symbol("id") >= 750, 1000)
+ .otherwise(Symbol("id"))
.as("key1"),
- 'id.as("value1"))
+ Symbol("id").as("value1"))
.createOrReplaceTempView("skewData1")
spark
.range(0, 1000, 1, 10)
.select(
- when('id < 250, 249)
- .otherwise('id)
+ when(Symbol("id") < 250, 249)
+ .otherwise(Symbol("id"))
.as("key2"),
- 'id.as("value2"))
+ Symbol("id").as("value2"))
.createOrReplaceTempView("skewData2")
def checkSkewJoin(
@@ -780,19 +780,19 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
spark
.range(0, 1000, 1, 10)
.select(
- when('id < 250, 249)
- .when('id >= 750, 1000)
- .otherwise('id)
+ when(Symbol("id") < 250, 249)
+ .when(Symbol("id") >= 750, 1000)
+ .otherwise(Symbol("id"))
.as("key1"),
- 'id.as("value1"))
+ Symbol("id").as("value1"))
.createOrReplaceTempView("skewData1")
spark
.range(0, 1000, 1, 10)
.select(
- when('id < 250, 249)
- .otherwise('id)
+ when(Symbol("id") < 250, 249)
+ .otherwise(Symbol("id"))
.as("key2"),
- 'id.as("value2"))
+ Symbol("id").as("value2"))
.createOrReplaceTempView("skewData2")
val (_, adaptivePlan) =
runAdaptiveAndVerifyResult("SELECT * FROM skewData1 join skewData2 ON key1 = key2")
@@ -940,7 +940,7 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "300") {
// Repartition with no partition num specified.
checkBHJ(
- df.repartition('b),
+ df.repartition(Symbol("b")),
// The top shuffle from repartition is optimized out.
optimizeOutRepartition = true,
probeSideLocalRead = false,
@@ -949,7 +949,7 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
// Repartition with default partition num (5 in test env) specified.
checkBHJ(
- df.repartition(5, 'b),
+ df.repartition(5, Symbol("b")),
// The top shuffle from repartition is optimized out
// The final plan must have 5 partitions, no optimization can be made to the probe side.
optimizeOutRepartition = true,
@@ -959,7 +959,7 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
// Repartition with non-default partition num specified.
checkBHJ(
- df.repartition(4, 'b),
+ df.repartition(4, Symbol("b")),
// The top shuffle from repartition is not optimized out
optimizeOutRepartition = false,
probeSideLocalRead = true,
@@ -968,7 +968,7 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
// Repartition by col and project away the partition cols
checkBHJ(
- df.repartition('b).select('key),
+ df.repartition(Symbol("b")).select(Symbol("key")),
// The top shuffle from repartition is not optimized out
optimizeOutRepartition = false,
probeSideLocalRead = true,
@@ -986,15 +986,16 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
) {
// Repartition with no partition num specified.
checkSMJ(
- df.repartition('b),
+ df.repartition(Symbol("b")),
// The top shuffle from repartition is optimized out.
optimizeOutRepartition = true,
optimizeSkewJoin = false,
- coalescedRead = true)
+ coalescedRead = true
+ )
// Repartition with default partition num (5 in test env) specified.
checkSMJ(
- df.repartition(5, 'b),
+ df.repartition(5, Symbol("b")),
// The top shuffle from repartition is optimized out.
// The final plan must have 5 partitions, can't do coalesced read.
optimizeOutRepartition = true,
@@ -1004,15 +1005,16 @@ class ClickHouseAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with Glute
// Repartition with non-default partition num specified.
checkSMJ(
- df.repartition(4, 'b),
+ df.repartition(4, Symbol("b")),
// The top shuffle from repartition is not optimized out.
optimizeOutRepartition = false,
optimizeSkewJoin = true,
- coalescedRead = false)
+ coalescedRead = false
+ )
// Repartition by col and project away the partition cols
checkSMJ(
- df.repartition('b).select('key),
+ df.repartition(Symbol("b")).select(Symbol("key")),
// The top shuffle from repartition is not optimized out.
optimizeOutRepartition = false,
optimizeSkewJoin = true,
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/adaptive/velox/VeloxAdaptiveQueryExecSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/adaptive/velox/VeloxAdaptiveQueryExecSuite.scala
index 4be6d072b4cb..f67f92481170 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/adaptive/velox/VeloxAdaptiveQueryExecSuite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/adaptive/velox/VeloxAdaptiveQueryExecSuite.scala
@@ -254,13 +254,13 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES.key -> AQEPropagateEmptyRelation.ruleName
) {
- val df1 = spark.range(10).withColumn("a", 'id)
- val df2 = spark.range(10).withColumn("b", 'id)
+ val df1 = spark.range(10).withColumn("a", Symbol("id"))
+ val df2 = spark.range(10).withColumn("b", Symbol("id"))
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
val testDf = df1
- .where('a > 10)
- .join(df2.where('b > 10), Seq("id"), "left_outer")
- .groupBy('a)
+ .where(Symbol("a") > 10)
+ .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer")
+ .groupBy(Symbol("a"))
.count()
checkAnswer(testDf, Seq())
val plan = testDf.queryExecution.executedPlan
@@ -269,9 +269,9 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1") {
val testDf = df1
- .where('a > 10)
- .join(df2.where('b > 10), Seq("id"), "left_outer")
- .groupBy('a)
+ .where(Symbol("a") > 10)
+ .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer")
+ .groupBy(Symbol("a"))
.count()
checkAnswer(testDf, Seq())
val plan = testDf.queryExecution.executedPlan
@@ -346,8 +346,8 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
// +- ShuffleExchange
// After applied the 'OptimizeShuffleWithLocalRead' rule, we can convert all the four
- // shuffle read to local shuffle read in the bottom two 'BroadcastHashJoin'.
- // For the top level 'BroadcastHashJoin', the probe side is not shuffle query stage
+ // shuffle read to local shuffle read in the bottom two Symbol("b")roadcastHashJoin'.
+ // For the top level Symbol("b")roadcastHashJoin', the probe side is not shuffle query stage
// and the build side shuffle query stage is also converted to local shuffle read.
checkNumLocalShuffleReads(adaptivePlan, 0)
}
@@ -649,19 +649,19 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
spark
.range(0, 1000, 1, 10)
.select(
- when('id < 250, 249)
- .when('id >= 750, 1000)
- .otherwise('id)
+ when(Symbol("id") < 250, 249)
+ .when(Symbol("id") >= 750, 1000)
+ .otherwise(Symbol("id"))
.as("key1"),
- 'id.as("value1"))
+ Symbol("id").as("value1"))
.createOrReplaceTempView("skewData1")
spark
.range(0, 1000, 1, 10)
.select(
- when('id < 250, 249)
- .otherwise('id)
+ when(Symbol("id") < 250, 249)
+ .otherwise(Symbol("id"))
.as("key2"),
- 'id.as("value2"))
+ Symbol("id").as("value2"))
.createOrReplaceTempView("skewData2")
def checkSkewJoin(
@@ -777,19 +777,19 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
spark
.range(0, 1000, 1, 10)
.select(
- when('id < 250, 249)
- .when('id >= 750, 1000)
- .otherwise('id)
+ when(Symbol("id") < 250, 249)
+ .when(Symbol("id") >= 750, 1000)
+ .otherwise(Symbol("id"))
.as("key1"),
- 'id.as("value1"))
+ Symbol("id").as("value1"))
.createOrReplaceTempView("skewData1")
spark
.range(0, 1000, 1, 10)
.select(
- when('id < 250, 249)
- .otherwise('id)
+ when(Symbol("id") < 250, 249)
+ .otherwise(Symbol("id"))
.as("key2"),
- 'id.as("value2"))
+ Symbol("id").as("value2"))
.createOrReplaceTempView("skewData2")
val (_, adaptivePlan) =
runAdaptiveAndVerifyResult("SELECT * FROM skewData1 join skewData2 ON key1 = key2")
@@ -922,7 +922,7 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "300") {
// Repartition with no partition num specified.
checkBHJ(
- df.repartition('b),
+ df.repartition(Symbol("b")),
// The top shuffle from repartition is optimized out.
optimizeOutRepartition = true,
probeSideLocalRead = false,
@@ -931,7 +931,7 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
// Repartition with default partition num (5 in test env) specified.
checkBHJ(
- df.repartition(5, 'b),
+ df.repartition(5, Symbol("b")),
// The top shuffle from repartition is optimized out
// The final plan must have 5 partitions, no optimization can be made to the probe side.
optimizeOutRepartition = true,
@@ -941,7 +941,7 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
// Repartition with non-default partition num specified.
checkBHJ(
- df.repartition(4, 'b),
+ df.repartition(4, Symbol("b")),
// The top shuffle from repartition is not optimized out
optimizeOutRepartition = false,
probeSideLocalRead = true,
@@ -950,7 +950,7 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
// Repartition by col and project away the partition cols
checkBHJ(
- df.repartition('b).select('key),
+ df.repartition(Symbol("b")).select(Symbol("key")),
// The top shuffle from repartition is not optimized out
optimizeOutRepartition = false,
probeSideLocalRead = true,
@@ -968,15 +968,16 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
) {
// Repartition with no partition num specified.
checkSMJ(
- df.repartition('b),
+ df.repartition(Symbol("b")),
// The top shuffle from repartition is optimized out.
optimizeOutRepartition = true,
optimizeSkewJoin = false,
- coalescedRead = true)
+ coalescedRead = true
+ )
// Repartition with default partition num (5 in test env) specified.
checkSMJ(
- df.repartition(5, 'b),
+ df.repartition(5, Symbol("b")),
// The top shuffle from repartition is optimized out.
// The final plan must have 5 partitions, can't do coalesced read.
optimizeOutRepartition = true,
@@ -986,15 +987,16 @@ class VeloxAdaptiveQueryExecSuite extends AdaptiveQueryExecSuite with GlutenSQLT
// Repartition with non-default partition num specified.
checkSMJ(
- df.repartition(4, 'b),
+ df.repartition(4, Symbol("b")),
// The top shuffle from repartition is not optimized out.
optimizeOutRepartition = false,
optimizeSkewJoin = true,
- coalescedRead = false)
+ coalescedRead = false
+ )
// Repartition by col and project away the partition cols
checkSMJ(
- df.repartition('b).select('key),
+ df.repartition(Symbol("b")).select(Symbol("key")),
// The top shuffle from repartition is not optimized out.
optimizeOutRepartition = false,
optimizeSkewJoin = true,
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
index 6cfa9f2028e8..97378349df26 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
@@ -131,5 +131,5 @@ class GlutenCSVv2Suite extends GlutenCSVSuite {
class GlutenCSVLegacyTimeParserSuite extends GlutenCSVSuite {
override def sparkConf: SparkConf =
super.sparkConf
- .set(SQLConf.LEGACY_TIME_PARSER_POLICY, "legacy")
+ .set(SQLConf.LEGACY_TIME_PARSER_POLICY.key, "legacy")
}
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/json/GlutenJsonSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/json/GlutenJsonSuite.scala
index 45e8e3e9e7ba..b8cb2a2d1099 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/json/GlutenJsonSuite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/json/GlutenJsonSuite.scala
@@ -136,5 +136,5 @@ class GlutenJsonLegacyTimeParserSuite extends GlutenJsonSuite with GlutenSQLTest
override def sparkConf: SparkConf =
super.sparkConf
- .set(SQLConf.LEGACY_TIME_PARSER_POLICY, "legacy")
+ .set(SQLConf.LEGACY_TIME_PARSER_POLICY.key, "legacy")
}
diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
index 5cf41b7a9ed5..570b6d5e0c1a 100644
--- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
+++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
@@ -42,7 +42,7 @@ class GlutenParquetRowIndexSuite extends ParquetRowIndexSuite with GlutenSQLTest
import testImplicits._
private def readRowGroupRowCounts(path: String): Seq[Long] = {
- ParquetFooterReader
+ ParquetFooterReaderShim
.readFooter(
spark.sessionState.newHadoopConf(),
new Path(path),
diff --git a/pom.xml b/pom.xml
index 1d13dc57477f..bdb8e43bb87b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -938,7 +938,7 @@
scala-2.13
- 2.13.16
+ 2.13.17
2.13
3.8.3
@@ -969,14 +969,13 @@
-deprecation
-feature
-explaintypes
- -release:8
+ -release:${java.version}
-Wconf:cat=deprecation:wv,any:e
-Wunused:imports
-Wconf:cat=scaladoc:wv
- -Wconf:cat=lint-multiarg-infix:wv
-Wconf:cat=other-nullary-override:wv