diff --git a/.github/workflows/velox_backend_x86.yml b/.github/workflows/velox_backend_x86.yml
index 5ef70a79bc0f..51f71e829c5e 100644
--- a/.github/workflows/velox_backend_x86.yml
+++ b/.github/workflows/velox_backend_x86.yml
@@ -107,7 +107,7 @@ jobs:
fail-fast: false
matrix:
os: [ "ubuntu:20.04", "ubuntu:22.04" ]
- spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5", "spark-4.0" ]
+ spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5", "spark-4.0", "spark-4.1" ]
java: [ "java-8", "java-11", "java-17", "java-21" ]
# Spark supports JDK17 since 3.3.
exclude:
@@ -141,6 +141,10 @@ jobs:
java: java-8
- spark: spark-4.0
java: java-11
+ - spark: spark-4.1
+ java: java-8
+ - spark: spark-4.1
+ java: java-11
runs-on: ubuntu-22.04
container: ${{ matrix.os }}
@@ -182,11 +186,14 @@ jobs:
cd $GITHUB_WORKSPACE/
export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64
echo "JAVA_HOME: $JAVA_HOME"
- if [ "${{ matrix.spark }}" = "spark-4.0" ]; then
- $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pscala-2.13 -Pbackends-velox -DskipTests
- else
- $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests
- fi
+ case "${{ matrix.spark }}" in
+ spark-4.0|spark-4.1)
+ $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pscala-2.13 -Pbackends-velox -DskipTests
+ ;;
+ *)
+ $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests
+ ;;
+ esac
cd $GITHUB_WORKSPACE/tools/gluten-it
$GITHUB_WORKSPACE/$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }}
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
@@ -200,7 +207,7 @@ jobs:
fail-fast: false
matrix:
os: [ "centos:8" ]
- spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5", "spark-4.0" ]
+ spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5", "spark-4.0", "spark-4.1" ]
java: [ "java-8", "java-11", "java-17" ]
# Spark supports JDK17 since 3.3.
exclude:
@@ -220,6 +227,10 @@ jobs:
java: java-8
- spark: spark-4.0
java: java-11
+ - spark: spark-4.1
+ java: java-8
+ - spark: spark-4.1
+ java: java-11
runs-on: ubuntu-22.04
container: ${{ matrix.os }}
@@ -263,11 +274,14 @@ jobs:
run: |
echo "JAVA_HOME: $JAVA_HOME"
cd $GITHUB_WORKSPACE/
- if [ "${{ matrix.spark }}" = "spark-4.0" ]; then
- $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pscala-2.13 -Pbackends-velox -DskipTests
- else
- $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests
- fi
+ case "${{ matrix.spark }}" in
+ spark-4.0|spark-4.1)
+ $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pscala-2.13 -Pbackends-velox -DskipTests
+ ;;
+ *)
+ $MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests
+ ;;
+ esac
cd $GITHUB_WORKSPACE/tools/gluten-it
$GITHUB_WORKSPACE/build/mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }}
- name: Run TPC-H / TPC-DS
@@ -1521,7 +1535,7 @@ jobs:
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean test -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox \
- -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/" \
+ -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/ -Dspark.sql.unionOutputPartitioning=false" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest
- name: Upload test report
if: always()
@@ -1570,7 +1584,7 @@ jobs:
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean test -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pspark-ut \
- -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/" \
+ -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/ -Dspark.sql.unionOutputPartitioning=false" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
diff --git a/backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala b/backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
index 976abb9e21fb..a3d6f738a2b5 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/expression/aggregate/VeloxBloomFilterAggregate.scala
@@ -25,10 +25,14 @@ import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate
import org.apache.spark.sql.catalyst.trees.TernaryLike
import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
import org.apache.spark.sql.types.DataType
import org.apache.spark.task.TaskResources
+import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.sketch.BloomFilter
+import java.io.Serializable
+
/**
* Velox's bloom-filter implementation uses different algorithms internally comparing to vanilla
* Spark so produces different intermediate aggregate data. Thus we use different filter function /
@@ -61,6 +65,15 @@ case class VeloxBloomFilterAggregate(
.toLong
)
+ // Mark as lazy so that `updater` is not evaluated during tree transformation.
+ private lazy val updater: BloomFilterUpdater = child.dataType match {
+ case LongType => LongUpdater
+ case IntegerType => IntUpdater
+ case ShortType => ShortUpdater
+ case ByteType => ByteUpdater
+ case _: StringType => BinaryUpdater
+ }
+
override def first: Expression = child
override def second: Expression = estimatedNumItemsExpression
@@ -97,7 +110,7 @@ case class VeloxBloomFilterAggregate(
if (value == null) {
return buffer
}
- buffer.putLong(value.asInstanceOf[Long])
+ updater.update(buffer, value)
buffer
}
@@ -128,3 +141,33 @@ case class VeloxBloomFilterAggregate(
copy(inputAggBufferOffset = newOffset)
}
+
+// see https://github.com/apache/spark/pull/42414
+private trait BloomFilterUpdater {
+ def update(bf: BloomFilter, v: Any): Boolean
+}
+
+private object LongUpdater extends BloomFilterUpdater with Serializable {
+ override def update(bf: BloomFilter, v: Any): Boolean =
+ bf.putLong(v.asInstanceOf[Long])
+}
+
+private object IntUpdater extends BloomFilterUpdater with Serializable {
+ override def update(bf: BloomFilter, v: Any): Boolean =
+ bf.putLong(v.asInstanceOf[Int])
+}
+
+private object ShortUpdater extends BloomFilterUpdater with Serializable {
+ override def update(bf: BloomFilter, v: Any): Boolean =
+ bf.putLong(v.asInstanceOf[Short])
+}
+
+private object ByteUpdater extends BloomFilterUpdater with Serializable {
+ override def update(bf: BloomFilter, v: Any): Boolean =
+ bf.putLong(v.asInstanceOf[Byte])
+}
+
+private object BinaryUpdater extends BloomFilterUpdater with Serializable {
+ override def update(bf: BloomFilter, v: Any): Boolean =
+ bf.putBinary(v.asInstanceOf[UTF8String].getBytes)
+}
diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
index a04e7d68fba6..c05eb4a2fa68 100644
--- a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
+++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
@@ -115,24 +115,24 @@ class ColumnarCachedBatchSerializer extends CachedBatchSerializer with Logging {
conf: SQLConf): RDD[CachedBatch] = {
val localSchema = toStructType(schema)
if (!validateSchema(localSchema)) {
- // we can not use columnar cache here, as the `RowToColumnar` does not support this schema
- return rowBasedCachedBatchSerializer.convertInternalRowToCachedBatch(
+ // we cannot use columnar cache here, as the `RowToColumnar` does not support this schema
+ rowBasedCachedBatchSerializer.convertInternalRowToCachedBatch(
input,
schema,
storageLevel,
conf)
+ } else {
+ val numRows = conf.columnBatchSize
+ val rddColumnarBatch = input.mapPartitions {
+ it =>
+ RowToVeloxColumnarExec.toColumnarBatchIterator(
+ it,
+ localSchema,
+ numRows,
+ VeloxConfig.get.veloxPreferredBatchBytes)
+ }
+ convertColumnarBatchToCachedBatch(rddColumnarBatch, schema, storageLevel, conf)
}
-
- val numRows = conf.columnBatchSize
- val rddColumnarBatch = input.mapPartitions {
- it =>
- RowToVeloxColumnarExec.toColumnarBatchIterator(
- it,
- localSchema,
- numRows,
- VeloxConfig.get.veloxPreferredBatchBytes)
- }
- convertColumnarBatchToCachedBatch(rddColumnarBatch, schema, storageLevel, conf)
}
override def convertCachedBatchToInternalRow(
@@ -141,18 +141,18 @@ class ColumnarCachedBatchSerializer extends CachedBatchSerializer with Logging {
selectedAttributes: Seq[Attribute],
conf: SQLConf): RDD[InternalRow] = {
if (!validateSchema(cacheAttributes)) {
- // if we do not support this schema that means we are using row-based serializer,
+ // if we do not support this schema, that means we are using row-based serializer,
// see `convertInternalRowToCachedBatch`, so fallback to vanilla Spark serializer
- return rowBasedCachedBatchSerializer.convertCachedBatchToInternalRow(
+ rowBasedCachedBatchSerializer.convertCachedBatchToInternalRow(
input,
cacheAttributes,
selectedAttributes,
conf)
+ } else {
+ val rddColumnarBatch =
+ convertCachedBatchToColumnarBatch(input, cacheAttributes, selectedAttributes, conf)
+ rddColumnarBatch.mapPartitions(it => VeloxColumnarToRowExec.toRowIterator(it))
}
-
- val rddColumnarBatch =
- convertCachedBatchToColumnarBatch(input, cacheAttributes, selectedAttributes, conf)
- rddColumnarBatch.mapPartitions(it => VeloxColumnarToRowExec.toRowIterator(it))
}
override def convertColumnarBatchToCachedBatch(
@@ -190,58 +190,68 @@ class ColumnarCachedBatchSerializer extends CachedBatchSerializer with Logging {
cacheAttributes: Seq[Attribute],
selectedAttributes: Seq[Attribute],
conf: SQLConf): RDD[ColumnarBatch] = {
- // Find the ordinals and data types of the requested columns.
- val requestedColumnIndices = selectedAttributes.map {
- a => cacheAttributes.map(_.exprId).indexOf(a.exprId)
- }
- val shouldSelectAttributes = cacheAttributes != selectedAttributes
- val localSchema = toStructType(cacheAttributes)
- val timezoneId = SQLConf.get.sessionLocalTimeZone
- input.mapPartitions {
- it =>
- val runtime = Runtimes.contextInstance(
- BackendsApiManager.getBackendName,
- "ColumnarCachedBatchSerializer#read")
- val jniWrapper = ColumnarBatchSerializerJniWrapper
- .create(runtime)
- val schema = SparkArrowUtil.toArrowSchema(localSchema, timezoneId)
- val arrowAlloc = ArrowBufferAllocators.contextInstance()
- val cSchema = ArrowSchema.allocateNew(arrowAlloc)
- ArrowAbiUtil.exportSchema(arrowAlloc, schema, cSchema)
- val deserializerHandle = jniWrapper
- .init(cSchema.memoryAddress())
- cSchema.close()
-
- Iterators
- .wrap(new Iterator[ColumnarBatch] {
- override def hasNext: Boolean = it.hasNext
-
- override def next(): ColumnarBatch = {
- val cachedBatch = it.next().asInstanceOf[CachedColumnarBatch]
- val batchHandle =
- jniWrapper
- .deserialize(deserializerHandle, cachedBatch.bytes)
- val batch = ColumnarBatches.create(batchHandle)
- if (shouldSelectAttributes) {
- try {
- ColumnarBatches.select(
- BackendsApiManager.getBackendName,
- batch,
- requestedColumnIndices.toArray)
- } finally {
- batch.close()
+ if (!validateSchema(cacheAttributes)) {
+ // if we do not support this schema, that means we are using row-based serializer,
+ // see `convertInternalRowToCachedBatch`, so fallback to vanilla Spark serializer
+ rowBasedCachedBatchSerializer.convertCachedBatchToColumnarBatch(
+ input,
+ cacheAttributes,
+ selectedAttributes,
+ conf)
+ } else {
+ // Find the ordinals and data types of the requested columns.
+ val requestedColumnIndices = selectedAttributes.map {
+ a => cacheAttributes.map(_.exprId).indexOf(a.exprId)
+ }
+ val shouldSelectAttributes = cacheAttributes != selectedAttributes
+ val localSchema = toStructType(cacheAttributes)
+ val timezoneId = SQLConf.get.sessionLocalTimeZone
+ input.mapPartitions {
+ it =>
+ val runtime = Runtimes.contextInstance(
+ BackendsApiManager.getBackendName,
+ "ColumnarCachedBatchSerializer#read")
+ val jniWrapper = ColumnarBatchSerializerJniWrapper
+ .create(runtime)
+ val schema = SparkArrowUtil.toArrowSchema(localSchema, timezoneId)
+ val arrowAlloc = ArrowBufferAllocators.contextInstance()
+ val cSchema = ArrowSchema.allocateNew(arrowAlloc)
+ ArrowAbiUtil.exportSchema(arrowAlloc, schema, cSchema)
+ val deserializerHandle = jniWrapper
+ .init(cSchema.memoryAddress())
+ cSchema.close()
+
+ Iterators
+ .wrap(new Iterator[ColumnarBatch] {
+ override def hasNext: Boolean = it.hasNext
+
+ override def next(): ColumnarBatch = {
+ val cachedBatch = it.next().asInstanceOf[CachedColumnarBatch]
+ val batchHandle =
+ jniWrapper
+ .deserialize(deserializerHandle, cachedBatch.bytes)
+ val batch = ColumnarBatches.create(batchHandle)
+ if (shouldSelectAttributes) {
+ try {
+ ColumnarBatches.select(
+ BackendsApiManager.getBackendName,
+ batch,
+ requestedColumnIndices.toArray)
+ } finally {
+ batch.close()
+ }
+ } else {
+ batch
}
- } else {
- batch
}
+ })
+ .protectInvocationFlow()
+ .recycleIterator {
+ jniWrapper.close(deserializerHandle)
}
- })
- .protectInvocationFlow()
- .recycleIterator {
- jniWrapper.close(deserializerHandle)
- }
- .recyclePayload(_.close())
- .create()
+ .recyclePayload(_.close())
+ .create()
+ }
}
}
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 418de8578f5c..a810a4ef1dbd 100644
--- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -831,7 +831,14 @@ object ExpressionConverter extends SQLConfHelper with Logging {
case t: TransformKeys =>
// default is `EXCEPTION`
val mapKeyDedupPolicy = SQLConf.get.getConf(SQLConf.MAP_KEY_DEDUP_POLICY)
- if (mapKeyDedupPolicy == SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
+
+ // Calling `.toString` on both sides ensures compatibility across all Spark versions.
+ // Starting from Spark 4.1, `SQLConf.get.getConf(SQLConf.MAP_KEY_DEDUP_POLICY)` returns
+ // an enum instead of a String. Without `.toString`, the comparison
+ // `mapKeyDedupPolicy == SQLConf.MapKeyDedupPolicy.LAST_WIN.toString` would silently fail
+ // in tests, producing only a "Comparing unrelated types" warning in IntelliJ IDEA,
+ // but no compile-time error.
+ if (mapKeyDedupPolicy.toString == SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
// TODO: Remove after fix ready for
// https://github.com/facebookincubator/velox/issues/10219
throw new GlutenNotSupportException(
diff --git a/gluten-ut/pom.xml b/gluten-ut/pom.xml
index ec0158c41e82..58b4e6d65835 100644
--- a/gluten-ut/pom.xml
+++ b/gluten-ut/pom.xml
@@ -230,5 +230,11 @@
spark40
+
+ spark-4.1
+
+ spark41
+
+
diff --git a/gluten-ut/spark41/pom.xml b/gluten-ut/spark41/pom.xml
new file mode 100644
index 000000000000..838649a21dc2
--- /dev/null
+++ b/gluten-ut/spark41/pom.xml
@@ -0,0 +1,306 @@
+
+
+ 4.0.0
+
+ org.apache.gluten
+ gluten-ut
+ 1.6.0-SNAPSHOT
+ ../pom.xml
+
+
+ gluten-ut-spark41
+ jar
+ Gluten Unit Test Spark41
+
+
+ 1.16.0
+
+
+
+
+ org.apache.gluten
+ gluten-ut-common
+ ${project.version}
+ test-jar
+ compile
+
+
+ org.apache.parquet
+ parquet-column
+ ${parquet.version}
+ tests
+ test
+
+
+ org.apache.parquet
+ parquet-common
+ ${parquet.version}
+ tests
+ test
+
+
+ org.apache.commons
+ commons-lang3
+ ${commons-lang3.version}
+
+
+ org.apache.hive
+ hive-llap-common
+ 4.0.0
+ test
+
+
+ org.apache.hive
+ hive-common
+
+
+ org.apache.hive
+ hive-serde
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+ org.apache.hive
+ hive-llap-client
+ 2.3.10
+ test
+
+
+ org.apache.hive
+ hive-common
+
+
+ org.apache.hive
+ hive-serde
+
+
+ org.apache.hive
+ hive-llap-common
+
+
+ org.apache.curator
+ curator-framework
+
+
+ org.apache.curator
+ apache-curator
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+
+ org.scalastyle
+ scalastyle-maven-plugin
+
+
+ com.diffplug.spotless
+ spotless-maven-plugin
+
+
+ org.apache.maven.plugins
+ maven-checkstyle-plugin
+
+
+ org.scalatest
+ scalatest-maven-plugin
+
+ .
+
+
+
+ test
+
+ test
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+ prepare-test-jar
+
+ test-jar
+
+ test-compile
+
+
+
+
+ target/scala-${scala.binary.version}/classes
+ target/scala-${scala.binary.version}/test-classes
+
+
+
+
+ backends-clickhouse
+
+ false
+
+
+
+ org.apache.gluten
+ backends-clickhouse
+ ${project.version}
+ test
+
+
+ org.apache.celeborn
+ celeborn-client-spark-${spark.major.version}-shaded_${scala.binary.version}
+ ${celeborn.version}
+ test
+
+
+ org.apache.arrow
+ arrow-memory-core
+ ${arrow.version}
+ provided
+
+
+ io.netty
+ netty-common
+
+
+ io.netty
+ netty-buffer
+
+
+
+
+ org.apache.arrow
+ arrow-vector
+ ${arrow.version}
+ provided
+
+
+ io.netty
+ netty-common
+
+
+ io.netty
+ netty-buffer
+
+
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-sources
+
+ add-test-source
+
+ generate-sources
+
+
+ src/test/backends-clickhouse
+
+
+
+
+
+
+
+
+
+ backends-velox
+
+ false
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ 5.9.3
+ test
+
+
+ net.aichler
+ jupiter-interface
+ 0.11.1
+ test
+
+
+ org.apache.gluten
+ backends-velox
+ ${project.version}
+ test
+
+
+
+ org.slf4j
+ slf4j-api
+ ${slf4j.version}
+ test
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+ ${log4j.version}
+ test
+
+
+ com.h2database
+ h2
+ 2.2.220
+ test
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-sources
+
+ add-test-source
+
+ generate-sources
+
+
+ src/test/backends-velox
+
+
+
+
+
+
+
+
+
+
diff --git a/gluten-ut/spark41/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala b/gluten-ut/spark41/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
new file mode 100644
index 000000000000..4258cd891a5a
--- /dev/null
+++ b/gluten-ut/spark41/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten
+
+import org.apache.spark.sql.execution.{ColumnarWriteFilesExec, SparkPlan}
+
+trait GlutenColumnarWriteTestSupport {
+
+ def checkWriteFilesAndGetChild(sparkPlan: SparkPlan): SparkPlan = {
+ assert(sparkPlan.isInstanceOf[ColumnarWriteFilesExec])
+ sparkPlan.asInstanceOf[ColumnarWriteFilesExec].child
+ }
+}
diff --git a/gluten-ut/spark41/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala b/gluten-ut/spark41/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala
new file mode 100644
index 000000000000..5b84888376aa
--- /dev/null
+++ b/gluten-ut/spark41/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.execution.parquet
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.execution.datasources.parquet.GlutenParquetV1FilterSuite
+
+
+/** testing use_local_format parquet reader. **/
+class GlutenParquetV1FilterSuite2 extends GlutenParquetV1FilterSuite {
+ override def sparkConf: SparkConf =
+ super.sparkConf
+ .set("spark.gluten.sql.columnar.backend.ch.runtime_config.use_local_format", "true")
+}
diff --git a/gluten-ut/spark41/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala b/gluten-ut/spark41/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
new file mode 100644
index 000000000000..68c0e1c932b5
--- /dev/null
+++ b/gluten-ut/spark41/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten
+
+import org.apache.spark.sql.execution.{SparkPlan, ColumnarWriteFilesExec}
+
+trait GlutenColumnarWriteTestSupport {
+
+ def checkWriteFilesAndGetChild(sparkPlan: SparkPlan): SparkPlan = {
+ assert(sparkPlan.isInstanceOf[ColumnarWriteFilesExec])
+ sparkPlan.asInstanceOf[ColumnarWriteFilesExec].child
+ }
+}
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/group-by-ordinal.sql b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/group-by-ordinal.sql
new file mode 100644
index 000000000000..b773396c050d
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/group-by-ordinal.sql
@@ -0,0 +1,96 @@
+-- group by ordinal positions
+
+create temporary view data as select * from values
+ (1, 1),
+ (1, 2),
+ (2, 1),
+ (2, 2),
+ (3, 1),
+ (3, 2)
+ as data(a, b);
+
+-- basic case
+select a, sum(b) from data group by 1;
+
+-- constant case
+select 1, 2, sum(b) from data group by 1, 2;
+
+-- duplicate group by column
+select a, 1, sum(b) from data group by a, 1;
+select a, 1, sum(b) from data group by 1, 2;
+
+-- group by a non-aggregate expression's ordinal
+select a, b + 2, count(2) from data group by a, 2;
+
+-- with alias
+select a as aa, b + 2 as bb, count(2) from data group by 1, 2;
+
+-- foldable non-literal: this should be the same as no grouping.
+select sum(b) from data group by 1 + 0;
+
+-- negative cases: ordinal out of range
+select a, b from data group by -1;
+select a, b from data group by 0;
+select a, b from data group by 3;
+
+-- negative case: position is an aggregate expression
+select a, b, sum(b) from data group by 3;
+select a, b, sum(b) + 2 from data group by 3;
+
+-- negative case: nondeterministic expression
+select a, rand(0), sum(b)
+from
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2;
+
+-- negative case: star
+select * from data group by a, b, 1;
+
+-- group by ordinal followed by order by
+select a, count(a) from (select 1 as a) tmp group by 1 order by 1;
+
+-- group by ordinal followed by having
+select count(a), a from (select 1 as a) tmp group by 2 having a > 0;
+
+-- mixed cases: group-by ordinals and aliases
+select a, a AS k, count(b) from data group by k, 1;
+
+-- can use ordinal in CUBE
+select a, b, count(1) from data group by cube(1, 2);
+
+-- mixed cases: can use ordinal in CUBE
+select a, b, count(1) from data group by cube(1, b);
+
+-- can use ordinal with cube
+select a, b, count(1) from data group by 1, 2 with cube;
+
+-- can use ordinal in ROLLUP
+select a, b, count(1) from data group by rollup(1, 2);
+
+-- mixed cases: can use ordinal in ROLLUP
+select a, b, count(1) from data group by rollup(1, b);
+
+-- can use ordinal with rollup
+select a, b, count(1) from data group by 1, 2 with rollup;
+
+-- can use ordinal in GROUPING SETS
+select a, b, count(1) from data group by grouping sets((1), (2), (1, 2));
+
+-- mixed cases: can use ordinal in GROUPING SETS
+select a, b, count(1) from data group by grouping sets((1), (b), (a, 2));
+
+select a, b, count(1) from data group by a, 2 grouping sets((1), (b), (a, 2));
+
+-- range error
+select a, b, count(1) from data group by a, -1;
+
+select a, b, count(1) from data group by a, 3;
+
+select a, b, count(1) from data group by cube(-1, 2);
+
+select a, b, count(1) from data group by cube(1, 3);
+
+-- turn off group by ordinal
+set spark.sql.groupByOrdinal=false;
+
+-- can now group by negative literal
+select sum(b) from data group by -1;
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/group-by.sql b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/group-by.sql
new file mode 100644
index 000000000000..c35cdb0de271
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/group-by.sql
@@ -0,0 +1,266 @@
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+-- Aggregate with empty GroupBy expressions.
+SELECT a, COUNT(b) FROM testData;
+SELECT COUNT(a), COUNT(b) FROM testData;
+
+-- Aggregate with non-empty GroupBy expressions.
+SELECT a, COUNT(b) FROM testData GROUP BY a;
+SELECT a, COUNT(b) FROM testData GROUP BY b;
+SELECT COUNT(a), COUNT(b) FROM testData GROUP BY a;
+
+-- Aggregate grouped by literals.
+SELECT 'foo', COUNT(a) FROM testData GROUP BY 1;
+
+-- Aggregate grouped by literals (whole stage code generation).
+SELECT 'foo' FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate grouped by literals (hash aggregate).
+SELECT 'foo', APPROX_COUNT_DISTINCT(a) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate grouped by literals (sort aggregate).
+SELECT 'foo', MAX(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with complex GroupBy expressions.
+SELECT a + b, COUNT(b) FROM testData GROUP BY a + b;
+SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1;
+SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1;
+
+-- struct() in group by
+SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa);
+
+-- Aggregate with nulls.
+SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
+FROM testData;
+
+-- Aggregate with foldable input and multiple distinct groups.
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Aliases in SELECT could be used in GROUP BY
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1;
+
+-- GROUP BY alias with invalid col in SELECT list
+SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k;
+
+-- Aggregate functions cannot be used in GROUP BY
+SELECT COUNT(b) AS k FROM testData GROUP BY k;
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
+SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a;
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Check analysis exceptions
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
+
+-- Aggregate with empty input and non-empty GroupBy expressions.
+SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a;
+
+-- Aggregate with empty input and empty GroupBy expressions.
+SELECT COUNT(1) FROM testData WHERE false;
+SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t;
+
+-- Aggregate with empty GroupBy expressions and filter on top
+SELECT 1 from (
+ SELECT 1 AS z,
+ MIN(a.x)
+ FROM (select 1 as x) a
+ WHERE false
+) b
+where b.z != b.z;
+
+-- SPARK-24369 multiple distinct aggregations having the same argument set
+SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
+ FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y);
+
+-- SPARK-25708 HAVING without GROUP BY means global aggregate
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true;
+
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
+
+-- Test data
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+ (1, true), (1, false),
+ (2, true),
+ (3, false), (3, null),
+ (4, null), (4, null),
+ (5, null), (5, true), (5, false) AS test_agg(k, v);
+
+-- empty table
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;
+
+-- all null values
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;
+
+-- aggregates are null Filtering
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;
+
+-- group by
+SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;
+
+-- having
+SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false;
+SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) IS NULL;
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT k,
+ Every(v) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Any(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY k;
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT k,
+ Every(v) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Every(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY k;
+
+-- input type checking Int
+SELECT every(1);
+
+-- input type checking Short
+SELECT some(1S);
+
+-- input type checking Long
+SELECT any(1L);
+
+-- input type checking String
+SELECT every("true");
+
+-- input type checking Decimal
+SELECT bool_and(1.0);
+
+-- input type checking double
+SELECT bool_or(1.0D);
+
+-- every/some/any aggregates/bool_and/bool_or are supported as windows expression.
+SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+
+-- Having referencing aggregate expressions is ok.
+SELECT count(*) FROM test_agg HAVING count(*) > 1L;
+SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true;
+
+-- Aggrgate expressions can be referenced through an alias
+SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L;
+
+-- Error when aggregate expressions are in where clause directly
+SELECT count(*) FROM test_agg WHERE count(*) > 1L;
+SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L;
+SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1;
+
+-- Aggregate with multiple distinct decimal columns
+SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col);
+
+-- SPARK-34581: Don't optimize out grouping expressions from aggregate expressions without aggregate function
+SELECT not(a IS NULL), count(*) AS c
+FROM testData
+GROUP BY a IS NULL;
+
+SELECT if(not(a IS NULL), rand(0), 1), count(*) AS c
+FROM testData
+GROUP BY a IS NULL;
+
+
+-- Histogram aggregates with different numeric input types
+SELECT
+ histogram_numeric(col, 2) as histogram_2,
+ histogram_numeric(col, 3) as histogram_3,
+ histogram_numeric(col, 5) as histogram_5,
+ histogram_numeric(col, 10) as histogram_10
+FROM VALUES
+ (1), (2), (3), (4), (5), (6), (7), (8), (9), (10),
+ (11), (12), (13), (14), (15), (16), (17), (18), (19), (20),
+ (21), (22), (23), (24), (25), (26), (27), (28), (29), (30),
+ (31), (32), (33), (34), (35), (3), (37), (38), (39), (40),
+ (41), (42), (43), (44), (45), (46), (47), (48), (49), (50) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1), (2), (3) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1L), (2L), (3L) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1F), (2F), (3F) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1D), (2D), (3D) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1S), (2S), (3S) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS BYTE)), (CAST(2 AS BYTE)), (CAST(3 AS BYTE)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS TINYINT)), (CAST(2 AS TINYINT)), (CAST(3 AS TINYINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS SMALLINT)), (CAST(2 AS SMALLINT)), (CAST(3 AS SMALLINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS BIGINT)), (CAST(2 AS BIGINT)), (CAST(3 AS BIGINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
+ (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '100-00' YEAR TO MONTH),
+ (INTERVAL '110-00' YEAR TO MONTH), (INTERVAL '120-00' YEAR TO MONTH) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '12 20:4:0' DAY TO SECOND),
+ (INTERVAL '12 21:4:0' DAY TO SECOND), (INTERVAL '12 22:4:0' DAY TO SECOND) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (NULL), (NULL), (NULL) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS INT)), (CAST(NULL AS INT)), (CAST(NULL AS INT)) AS tab(col);
+
+-- SPARK-27974: Support ANSI Aggregate Function: array_agg
+SELECT
+ collect_list(col),
+ array_agg(col)
+FROM VALUES
+ (1), (2), (1) AS tab(col);
+SELECT
+ a,
+ collect_list(b),
+ array_agg(b)
+FROM VALUES
+ (1,4),(2,3),(1,4),(2,4) AS v(a,b)
+GROUP BY a;
+
+
+SELECT mode(a), mode(b) FROM testData;
+SELECT a, mode(b) FROM testData GROUP BY a ORDER BY a;
+
+
+-- SPARK-44846: PushFoldableIntoBranches in complex grouping expressions cause bindReference error
+SELECT c * 2 AS d
+FROM (
+ SELECT if(b > 1, 1, b) AS c
+ FROM (
+ SELECT if(a < 0, 0, a) AS b
+ FROM VALUES (-1), (1), (2) AS t1(a)
+ ) t2
+ GROUP BY b
+ ) t3
+GROUP BY c;
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/linear-regression.sql b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/linear-regression.sql
new file mode 100644
index 000000000000..c7cb5bf1117a
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/linear-regression.sql
@@ -0,0 +1,52 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
+(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
+AS testRegression(k, y, x);
+
+-- SPARK-37613: Support ANSI Aggregate Function: regr_count
+SELECT regr_count(y, x) FROM testRegression;
+SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL;
+SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k;
+SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k;
+
+-- SPARK-37613: Support ANSI Aggregate Function: regr_r2
+SELECT regr_r2(y, x) FROM testRegression;
+SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL;
+SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k;
+SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k;
+
+-- SPARK-37614: Support ANSI Aggregate Function: regr_avgx & regr_avgy
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression;
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k;
+SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k;
+
+-- SPARK-37672: Support ANSI Aggregate Function: regr_sxx
+SELECT regr_sxx(y, x) FROM testRegression;
+SELECT regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_sxx(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
+
+-- SPARK-37681: Support ANSI Aggregate Function: regr_sxy
+SELECT regr_sxy(y, x) FROM testRegression;
+SELECT regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_sxy(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
+
+-- SPARK-37702: Support ANSI Aggregate Function: regr_syy
+SELECT regr_syy(y, x) FROM testRegression;
+SELECT regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_syy(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
+
+-- SPARK-39230: Support ANSI Aggregate Function: regr_slope
+SELECT regr_slope(y, x) FROM testRegression;
+SELECT regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_slope(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
+
+-- SPARK-37623: Support ANSI Aggregate Function: regr_intercept
+SELECT regr_intercept(y, x) FROM testRegression;
+SELECT regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_intercept(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000000..907ff33000d8
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/random.sql b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/random.sql
new file mode 100644
index 000000000000..a1aae7b8759d
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/random.sql
@@ -0,0 +1,17 @@
+-- rand with the seed 0
+SELECT rand(0);
+SELECT rand(cast(3 / 7 AS int));
+SELECT rand(NULL);
+SELECT rand(cast(NULL AS int));
+
+-- rand unsupported data type
+SELECT rand(1.0);
+
+-- randn with the seed 0
+SELECT randn(0L);
+SELECT randn(cast(3 / 7 AS long));
+SELECT randn(NULL);
+SELECT randn(cast(NULL AS long));
+
+-- randn unsupported data type
+SELECT rand('1')
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/table-valued-functions.sql b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/table-valued-functions.sql
new file mode 100644
index 000000000000..79d427bc2099
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/table-valued-functions.sql
@@ -0,0 +1,126 @@
+-- unresolved function
+select * from dummy(3);
+
+-- range call with end
+select * from range(6 + cos(3));
+
+-- range call with start and end
+select * from range(5, 10);
+
+-- range call with step
+select * from range(0, 10, 2);
+
+-- range call with numPartitions
+select * from range(0, 10, 1, 200);
+
+-- range call with invalid number of arguments
+select * from range(1, 1, 1, 1, 1);
+
+-- range call with null
+select * from range(1, null);
+
+-- range call with incompatible type
+select * from range(array(1, 2, 3));
+
+-- range call with illegal step
+select * from range(0, 5, 0);
+
+-- range call with a mixed-case function name
+select * from RaNgE(2);
+
+-- range call with alias
+select i from range(0, 2) t(i);
+
+-- explode
+select * from explode(array(1, 2));
+select * from explode(map('a', 1, 'b', 2));
+
+-- explode with empty values
+select * from explode(array());
+select * from explode(map());
+
+-- explode with column aliases
+select * from explode(array(1, 2)) t(c1);
+select * from explode(map('a', 1, 'b', 2)) t(k, v);
+
+-- explode with non-deterministic values
+select * from explode(array(rand(0)));
+
+-- explode with erroneous input
+select * from explode(null);
+select * from explode(null) t(c1);
+select * from explode(1);
+select * from explode(1, 2);
+select * from explode(explode(array(1)));
+select * from explode(array(1, 2)) t(c1, c2);
+
+-- explode_outer
+select * from explode_outer(array(1, 2));
+select * from explode_outer(map('a', 1, 'b', 2));
+select * from explode_outer(array());
+select * from explode_outer(map());
+
+-- table-valued functions with join
+select * from range(2) join explode(array(1, 2));
+select * from range(2) join explode_outer(array());
+
+-- inline
+select * from inline(array(struct(1, 'a'), struct(2, 'b')));
+select * from inline(array(struct(1, 'a'), struct(2, 'b'))) t(x, y);
+select * from inline(array_remove(array(struct(1, 'a')), struct(1, 'a')));
+
+-- inline with erroneous input
+select * from inline(null);
+select * from inline(array(struct(1, 2), struct(2, 3))) t(a, b, c);
+
+-- inline_outer
+select * from inline_outer(array(struct(1, 'a'), struct(2, 'b')));
+select * from inline_outer(array_remove(array(struct(1, 'a')), struct(1, 'a')));
+
+-- posexplode
+select * from posexplode(array());
+select * from posexplode(array(1, 2));
+select * from posexplode(array(1, 2)) t(pos, x);
+select * from posexplode(map());
+select * from posexplode(map('a', 1, 'b', 2));
+select * from posexplode(map('a', 1, 'b', 2)) t(pos, k, v);
+
+-- posexplode with erroneous input
+select * from posexplode(1);
+select * from posexplode(1, 2);
+select * from posexplode(explode(array(1)));
+select * from posexplode(array(1, 2)) t(x);
+
+-- posexplode
+select * from posexplode_outer(array());
+select * from posexplode_outer(array(1, 2));
+select * from posexplode_outer(map());
+select * from posexplode_outer(map('a', 1, 'b', 2));
+
+-- json_tuple
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b');
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'c');
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'a');
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b') AS t(x, y);
+select * from json_tuple('{"a": bad, "b": string}', 'a', 'b');
+
+-- json_tuple with erroneous input
+select * from json_tuple();
+select * from json_tuple('{"a": 1}');
+select * from json_tuple('{"a": 1}', 1);
+select * from json_tuple('{"a": 1}', null);
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b') AS t(x);
+
+-- stack
+select * from stack(1, 1, 2, 3);
+select * from stack(2, 1, 2, 3);
+select * from stack(3, 1, 2, 3) t(x);
+select * from stack(4, 1, 2, 3) t(x);
+select * from stack(2, 1, 1.1, 'a', 2, 2.2, 'b') t(a, b, c);
+select * from stack(2, 1, 1.1, null, 2, null, 'b') t(a, b, c);
+
+-- stack with erroneous input
+select * from stack();
+select * from stack(2, 1, 2, 3) t(a, b, c);
+select * from stack(2, 1, '1.1', 'a', 2, 2.2, 'b');
+select * from stack(2, explode(array(1, 2, 3)));
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/udf/udf-group-by.sql b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/udf/udf-group-by.sql
new file mode 100644
index 000000000000..0cc57c97b020
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/inputs/udf/udf-group-by.sql
@@ -0,0 +1,156 @@
+-- This test file was converted from group-by.sql.
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+-- Aggregate with empty GroupBy expressions.
+SELECT udf(a), udf(COUNT(b)) FROM testData;
+SELECT COUNT(udf(a)), udf(COUNT(b)) FROM testData;
+
+-- Aggregate with non-empty GroupBy expressions.
+SELECT udf(a), COUNT(udf(b)) FROM testData GROUP BY a;
+SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b;
+SELECT COUNT(udf(a)), COUNT(udf(b)) FROM testData GROUP BY udf(a);
+
+-- Aggregate grouped by literals.
+SELECT 'foo', COUNT(udf(a)) FROM testData GROUP BY 1;
+
+-- Aggregate grouped by literals (whole stage code generation).
+SELECT 'foo' FROM testData WHERE a = 0 GROUP BY udf(1);
+
+-- Aggregate grouped by literals (hash aggregate).
+SELECT 'foo', udf(APPROX_COUNT_DISTINCT(udf(a))) FROM testData WHERE a = 0 GROUP BY udf(1);
+
+-- Aggregate grouped by literals (sort aggregate).
+SELECT 'foo', MAX(STRUCT(udf(a))) FROM testData WHERE a = 0 GROUP BY udf(1);
+
+-- Aggregate with complex GroupBy expressions.
+SELECT udf(a + b), udf(COUNT(b)) FROM testData GROUP BY a + b;
+SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1;
+SELECT udf(a + 1) + 1, udf(COUNT(b)) FROM testData GROUP BY udf(a + 1);
+
+-- Aggregate with nulls.
+SELECT SKEWNESS(udf(a)), udf(KURTOSIS(a)), udf(MIN(a)), MAX(udf(a)), udf(AVG(udf(a))), udf(VARIANCE(a)), STDDEV(udf(a)), udf(SUM(a)), udf(COUNT(a))
+FROM testData;
+
+-- Aggregate with foldable input and multiple distinct groups.
+SELECT COUNT(DISTINCT udf(b)), udf(COUNT(DISTINCT b, c)) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY udf(a);
+
+-- Aliases in SELECT could be used in GROUP BY
+SELECT udf(a) AS k, COUNT(udf(b)) FROM testData GROUP BY k;
+SELECT a AS k, udf(COUNT(b)) FROM testData GROUP BY k HAVING k > 1;
+
+-- Aggregate functions cannot be used in GROUP BY
+SELECT udf(COUNT(b)) AS k FROM testData GROUP BY k;
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
+SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf(a);
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Check analysis exceptions
+SELECT a AS k, udf(COUNT(udf(b))) FROM testData GROUP BY k;
+
+-- Aggregate with empty input and non-empty GroupBy expressions.
+SELECT udf(a), COUNT(udf(1)) FROM testData WHERE false GROUP BY udf(a);
+
+-- Aggregate with empty input and empty GroupBy expressions.
+SELECT udf(COUNT(1)) FROM testData WHERE false;
+SELECT 1 FROM (SELECT udf(COUNT(1)) FROM testData WHERE false) t;
+
+-- Aggregate with empty GroupBy expressions and filter on top
+SELECT 1 from (
+ SELECT 1 AS z,
+ udf(MIN(a.x))
+ FROM (select 1 as x) a
+ WHERE false
+) b
+where b.z != b.z;
+
+-- SPARK-24369 multiple distinct aggregations having the same argument set
+SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*)
+ FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y);
+
+-- SPARK-25708 HAVING without GROUP BY means global aggregate
+SELECT udf(1) FROM range(10) HAVING true;
+
+SELECT udf(udf(1)) FROM range(10) HAVING MAX(id) > 0;
+
+SELECT udf(id) FROM range(10) HAVING id > 0;
+
+-- Test data
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+ (1, true), (1, false),
+ (2, true),
+ (3, false), (3, null),
+ (4, null), (4, null),
+ (5, null), (5, true), (5, false) AS test_agg(k, v);
+
+-- empty table
+SELECT udf(every(v)), udf(some(v)), any(v) FROM test_agg WHERE 1 = 0;
+
+-- all null values
+SELECT udf(every(udf(v))), some(v), any(v) FROM test_agg WHERE k = 4;
+
+-- aggregates are null Filtering
+SELECT every(v), udf(some(v)), any(v) FROM test_agg WHERE k = 5;
+
+-- group by
+SELECT udf(k), every(v), udf(some(v)), any(v) FROM test_agg GROUP BY udf(k);
+
+-- having
+SELECT udf(k), every(v) FROM test_agg GROUP BY k HAVING every(v) = false;
+SELECT udf(k), udf(every(v)) FROM test_agg GROUP BY udf(k) HAVING every(v) IS NULL;
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT udf(k),
+ udf(Every(v)) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Any(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY udf(k);
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT udf(udf(k)),
+ Every(v) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Every(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY udf(udf(k));
+
+-- input type checking Int
+SELECT every(udf(1));
+
+-- input type checking Short
+SELECT some(udf(1S));
+
+-- input type checking Long
+SELECT any(udf(1L));
+
+-- input type checking String
+SELECT udf(every("true"));
+
+-- every/some/any aggregates are supported as windows expression.
+SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, udf(udf(v)), some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT udf(udf(k)), v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+
+-- Having referencing aggregate expressions is ok.
+SELECT udf(count(*)) FROM test_agg HAVING count(*) > 1L;
+SELECT k, udf(max(v)) FROM test_agg GROUP BY k HAVING max(v) = true;
+
+-- Aggrgate expressions can be referenced through an alias
+SELECT * FROM (SELECT udf(COUNT(*)) AS cnt FROM test_agg) WHERE cnt > 1L;
+
+-- Error when aggregate expressions are in where clause directly
+SELECT udf(count(*)) FROM test_agg WHERE count(*) > 1L;
+SELECT udf(count(*)) FROM test_agg WHERE count(*) + 1L > 1L;
+SELECT udf(count(*)) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1;
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/group-by-ordinal.sql.out b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/group-by-ordinal.sql.out
new file mode 100644
index 000000000000..b968b4e09fac
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/group-by-ordinal.sql.out
@@ -0,0 +1,524 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view data as select * from values
+ (1, 1),
+ (1, 2),
+ (2, 1),
+ (2, 2),
+ (3, 1),
+ (3, 2)
+ as data(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select a, sum(b) from data group by 1
+-- !query schema
+struct
+-- !query output
+1 3
+2 3
+3 3
+
+
+-- !query
+select 1, 2, sum(b) from data group by 1, 2
+-- !query schema
+struct<1:int,2:int,sum(b):bigint>
+-- !query output
+1 2 9
+
+
+-- !query
+select a, 1, sum(b) from data group by a, 1
+-- !query schema
+struct
+-- !query output
+1 1 3
+2 1 3
+3 1 3
+
+
+-- !query
+select a, 1, sum(b) from data group by 1, 2
+-- !query schema
+struct
+-- !query output
+1 1 3
+2 1 3
+3 1 3
+
+
+-- !query
+select a, b + 2, count(2) from data group by a, 2
+-- !query schema
+struct
+-- !query output
+1 3 1
+1 4 1
+2 3 1
+2 4 1
+3 3 1
+3 4 1
+
+
+-- !query
+select a as aa, b + 2 as bb, count(2) from data group by 1, 2
+-- !query schema
+struct
+-- !query output
+1 3 1
+1 4 1
+2 3 1
+2 4 1
+3 3 1
+3 4 1
+
+
+-- !query
+select sum(b) from data group by 1 + 0
+-- !query schema
+struct
+-- !query output
+9
+
+
+-- !query
+select a, b from data group by -1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42805",
+ "messageParameters" : {
+ "index" : "-1",
+ "size" : "2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 32,
+ "stopIndex" : 33,
+ "fragment" : "-1"
+ } ]
+}
+
+
+-- !query
+select a, b from data group by 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42805",
+ "messageParameters" : {
+ "index" : "0",
+ "size" : "2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 32,
+ "stopIndex" : 32,
+ "fragment" : "0"
+ } ]
+}
+
+
+-- !query
+select a, b from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42805",
+ "messageParameters" : {
+ "index" : "3",
+ "size" : "2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 32,
+ "stopIndex" : 32,
+ "fragment" : "3"
+ } ]
+}
+
+
+-- !query
+select a, b, sum(b) from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_AGGREGATE",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "aggExpr" : "sum(data.b) AS `sum(b)`",
+ "index" : "3"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 40,
+ "stopIndex" : 40,
+ "fragment" : "3"
+ } ]
+}
+
+
+-- !query
+select a, b, sum(b) + 2 from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_AGGREGATE",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "aggExpr" : "(sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`",
+ "index" : "3"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 44,
+ "stopIndex" : 44,
+ "fragment" : "3"
+ } ]
+}
+
+
+-- !query
+select a, rand(0), sum(b)
+from
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2
+-- !query schema
+struct
+-- !query output
+1 0.5488135024422883 1
+1 0.7151893651681639 2
+2 0.5448831775801376 2
+2 0.6027633705776989 1
+3 0.4236547969336536 1
+3 0.6458941151817286 2
+
+
+-- !query
+select * from data group by a, b, 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "STAR_GROUP_BY_POS",
+ "sqlState" : "0A000"
+}
+
+
+-- !query
+select a, count(a) from (select 1 as a) tmp group by 1 order by 1
+-- !query schema
+struct
+-- !query output
+1 1
+
+
+-- !query
+select count(a), a from (select 1 as a) tmp group by 2 having a > 0
+-- !query schema
+struct
+-- !query output
+1 1
+
+
+-- !query
+select a, a AS k, count(b) from data group by k, 1
+-- !query schema
+struct
+-- !query output
+1 1 2
+2 2 2
+3 3 2
+
+
+-- !query
+select a, b, count(1) from data group by cube(1, 2)
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+NULL NULL 6
+
+
+-- !query
+select a, b, count(1) from data group by cube(1, b)
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+NULL NULL 6
+
+
+-- !query
+select a, b, count(1) from data group by 1, 2 with cube
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+NULL NULL 6
+
+
+-- !query
+select a, b, count(1) from data group by rollup(1, 2)
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL NULL 6
+
+
+-- !query
+select a, b, count(1) from data group by rollup(1, b)
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL NULL 6
+
+
+-- !query
+select a, b, count(1) from data group by 1, 2 with rollup
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL NULL 6
+
+
+-- !query
+select a, b, count(1) from data group by grouping sets((1), (2), (1, 2))
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+
+
+-- !query
+select a, b, count(1) from data group by grouping sets((1), (b), (a, 2))
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+
+
+-- !query
+select a, b, count(1) from data group by a, 2 grouping sets((1), (b), (a, 2))
+-- !query schema
+struct
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+
+
+-- !query
+select a, b, count(1) from data group by a, -1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42805",
+ "messageParameters" : {
+ "index" : "-1",
+ "size" : "3"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 45,
+ "stopIndex" : 46,
+ "fragment" : "-1"
+ } ]
+}
+
+
+-- !query
+select a, b, count(1) from data group by a, 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_AGGREGATE",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "aggExpr" : "count(1) AS `count(1)`",
+ "index" : "3"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 45,
+ "stopIndex" : 45,
+ "fragment" : "3"
+ } ]
+}
+
+
+-- !query
+select a, b, count(1) from data group by cube(-1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42805",
+ "messageParameters" : {
+ "index" : "-1",
+ "size" : "3"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 47,
+ "stopIndex" : 48,
+ "fragment" : "-1"
+ } ]
+}
+
+
+-- !query
+select a, b, count(1) from data group by cube(1, 3)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_AGGREGATE",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "aggExpr" : "count(1) AS `count(1)`",
+ "index" : "3"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 50,
+ "stopIndex" : 50,
+ "fragment" : "3"
+ } ]
+}
+
+
+-- !query
+set spark.sql.groupByOrdinal=false
+-- !query schema
+struct
+-- !query output
+spark.sql.groupByOrdinal false
+
+
+-- !query
+select sum(b) from data group by -1
+-- !query schema
+struct
+-- !query output
+9
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/group-by.sql.out b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/group-by.sql.out
new file mode 100644
index 000000000000..a4a3f76fa6a7
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/group-by.sql.out
@@ -0,0 +1,1123 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, COUNT(b) FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_GROUP_BY",
+ "sqlState" : "42803",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 32,
+ "fragment" : "SELECT a, COUNT(b) FROM testData"
+ } ]
+}
+
+
+-- !query
+SELECT COUNT(a), COUNT(b) FROM testData
+-- !query schema
+struct
+-- !query output
+7 7
+
+
+-- !query
+SELECT a, COUNT(b) FROM testData GROUP BY a
+-- !query schema
+struct
+-- !query output
+1 2
+2 2
+3 2
+NULL 1
+
+
+-- !query
+SELECT a, COUNT(b) FROM testData GROUP BY b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_AGGREGATION",
+ "sqlState" : "42803",
+ "messageParameters" : {
+ "expression" : "\"a\"",
+ "expressionAnyValue" : "\"any_value(a)\""
+ }
+}
+
+
+-- !query
+SELECT COUNT(a), COUNT(b) FROM testData GROUP BY a
+-- !query schema
+struct
+-- !query output
+0 1
+2 2
+2 2
+3 2
+
+
+-- !query
+SELECT 'foo', COUNT(a) FROM testData GROUP BY 1
+-- !query schema
+struct
+-- !query output
+foo 7
+
+
+-- !query
+SELECT 'foo' FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+SELECT 'foo', APPROX_COUNT_DISTINCT(a) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+SELECT 'foo', MAX(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct>
+-- !query output
+
+
+
+-- !query
+SELECT a + b, COUNT(b) FROM testData GROUP BY a + b
+-- !query schema
+struct<(a + b):int,count(b):bigint>
+-- !query output
+2 1
+3 2
+4 2
+5 1
+NULL 1
+
+
+-- !query
+SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_AGGREGATION",
+ "sqlState" : "42803",
+ "messageParameters" : {
+ "expression" : "\"a\"",
+ "expressionAnyValue" : "\"any_value(a)\""
+ }
+}
+
+
+-- !query
+SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1
+-- !query schema
+struct<((a + 1) + 1):int,count(b):bigint>
+-- !query output
+3 2
+4 2
+5 2
+NULL 1
+
+
+-- !query
+SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa)
+-- !query schema
+struct
+-- !query output
+2
+2
+2
+3
+
+
+-- !query
+SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
+FROM testData
+-- !query schema
+struct
+-- !query output
+-0.27238010581457284 -1.5069204152249138 1 3 2.142857142857143 0.8095238095238096 0.8997354108424375 15 7
+
+
+-- !query
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
+-- !query schema
+struct
+-- !query output
+1 1
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 2
+2 2
+3 2
+NULL 1
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1
+-- !query schema
+struct
+-- !query output
+2 2
+3 2
+
+
+-- !query
+SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`non_existing`",
+ "proposal" : "`testdata`.`a`, `testdata`.`b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 22,
+ "stopIndex" : 33,
+ "fragment" : "non_existing"
+ } ]
+}
+
+
+-- !query
+SELECT COUNT(b) AS k FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_AGGREGATE",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "sqlExpr" : "count(testdata.b)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 15,
+ "fragment" : "COUNT(b)"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_AGGREGATION",
+ "sqlState" : "42803",
+ "messageParameters" : {
+ "expression" : "\"k\"",
+ "expressionAnyValue" : "\"any_value(k)\""
+ }
+}
+
+
+-- !query
+set spark.sql.groupByAliases=false
+-- !query schema
+struct
+-- !query output
+spark.sql.groupByAliases false
+
+
+-- !query
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`k`",
+ "proposal" : "`testdata`.`a`, `testdata`.`b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 48,
+ "stopIndex" : 48,
+ "fragment" : "k"
+ } ]
+}
+
+
+-- !query
+SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+SELECT COUNT(1) FROM testData WHERE false
+-- !query schema
+struct
+-- !query output
+0
+
+
+-- !query
+SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT 1 from (
+ SELECT 1 AS z,
+ MIN(a.x)
+ FROM (select 1 as x) a
+ WHERE false
+) b
+where b.z != b.z
+-- !query schema
+struct<1:int>
+-- !query output
+
+
+
+-- !query
+SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
+ FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y)
+-- !query schema
+struct
+-- !query output
+0.9999999999999999 0.9999999999999999 3
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING true
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING MAX(id) > 0
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT id FROM range(10) HAVING id > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_GROUP_BY",
+ "sqlState" : "42803",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 38,
+ "fragment" : "SELECT id FROM range(10) HAVING id > 0"
+ } ]
+}
+
+
+-- !query
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true
+-- !query schema
+struct
+-- !query output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere true
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING true
+-- !query schema
+struct<1:int>
+-- !query output
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING MAX(id) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_WHERE_CONDITION",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "condition" : "\"(max(id) > 0)\"",
+ "expressionList" : "max(id)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 42,
+ "fragment" : "SELECT 1 FROM range(10) HAVING MAX(id) > 0"
+ } ]
+}
+
+
+-- !query
+SELECT id FROM range(10) HAVING id > 0
+-- !query schema
+struct
+-- !query output
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false
+-- !query schema
+struct
+-- !query output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere false
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+ (1, true), (1, false),
+ (2, true),
+ (3, false), (3, null),
+ (4, null), (4, null),
+ (5, null), (5, true), (5, false) AS test_agg(k, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0
+-- !query schema
+struct
+-- !query output
+NULL NULL NULL NULL NULL
+
+
+-- !query
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4
+-- !query schema
+struct
+-- !query output
+NULL NULL NULL NULL NULL
+
+
+-- !query
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5
+-- !query schema
+struct
+-- !query output
+false true true false true
+
+
+-- !query
+SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 false true true false true
+2 true true true true true
+3 false false false false false
+4 NULL NULL NULL NULL NULL
+5 false true true false true
+
+
+-- !query
+SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false
+-- !query schema
+struct
+-- !query output
+1 false
+3 false
+5 false
+
+
+-- !query
+SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) IS NULL
+-- !query schema
+struct
+-- !query output
+4 NULL
+
+
+-- !query
+SELECT k,
+ Every(v) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Any(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY k
+-- !query schema
+struct
+-- !query output
+2 true
+
+
+-- !query
+SELECT k,
+ Every(v) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Every(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY k
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+SELECT every(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1\"",
+ "inputType" : "\"INT\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"every(1)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 15,
+ "fragment" : "every(1)"
+ } ]
+}
+
+
+-- !query
+SELECT some(1S)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1\"",
+ "inputType" : "\"SMALLINT\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"some(1)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 15,
+ "fragment" : "some(1S)"
+ } ]
+}
+
+
+-- !query
+SELECT any(1L)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1\"",
+ "inputType" : "\"BIGINT\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"any(1)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 14,
+ "fragment" : "any(1L)"
+ } ]
+}
+
+
+-- !query
+SELECT every("true")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"true\"",
+ "inputType" : "\"STRING\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"every(true)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 20,
+ "fragment" : "every(\"true\")"
+ } ]
+}
+
+
+-- !query
+SELECT bool_and(1.0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1.0\"",
+ "inputType" : "\"DECIMAL(2,1)\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"bool_and(1.0)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 20,
+ "fragment" : "bool_and(1.0)"
+ } ]
+}
+
+
+-- !query
+SELECT bool_or(1.0D)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1.0\"",
+ "inputType" : "\"DOUBLE\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"bool_or(1.0)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 20,
+ "fragment" : "bool_or(1.0D)"
+ } ]
+}
+
+
+-- !query
+SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct
+-- !query output
+1 false false
+1 true false
+2 true true
+3 NULL NULL
+3 false false
+4 NULL NULL
+4 NULL NULL
+5 NULL NULL
+5 false false
+5 true false
+
+
+-- !query
+SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct
+-- !query output
+1 false false
+1 true true
+2 true true
+3 NULL NULL
+3 false false
+4 NULL NULL
+4 NULL NULL
+5 NULL NULL
+5 false false
+5 true true
+
+
+-- !query
+SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct
+-- !query output
+1 false false
+1 true true
+2 true true
+3 NULL NULL
+3 false false
+4 NULL NULL
+4 NULL NULL
+5 NULL NULL
+5 false false
+5 true true
+
+
+-- !query
+SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct
+-- !query output
+1 false false
+1 true false
+2 true true
+3 NULL NULL
+3 false false
+4 NULL NULL
+4 NULL NULL
+5 NULL NULL
+5 false false
+5 true false
+
+
+-- !query
+SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct
+-- !query output
+1 false false
+1 true true
+2 true true
+3 NULL NULL
+3 false false
+4 NULL NULL
+4 NULL NULL
+5 NULL NULL
+5 false false
+5 true true
+
+
+-- !query
+SELECT count(*) FROM test_agg HAVING count(*) > 1L
+-- !query schema
+struct
+-- !query output
+10
+
+
+-- !query
+SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true
+-- !query schema
+struct
+-- !query output
+1 true
+2 true
+5 true
+
+
+-- !query
+SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L
+-- !query schema
+struct
+-- !query output
+10
+
+
+-- !query
+SELECT count(*) FROM test_agg WHERE count(*) > 1L
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_WHERE_CONDITION",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "condition" : "\"(count(1) > 1)\"",
+ "expressionList" : "count(1)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 49,
+ "fragment" : "SELECT count(*) FROM test_agg WHERE count(*) > 1L"
+ } ]
+}
+
+
+-- !query
+SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_WHERE_CONDITION",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "condition" : "\"((count(1) + 1) > 1)\"",
+ "expressionList" : "count(1)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 54,
+ "fragment" : "SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L"
+ } ]
+}
+
+
+-- !query
+SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_WHERE_CONDITION",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "condition" : "\"(((k = 1) OR (k = 2)) OR (((count(1) + 1) > 1) OR (max(k) > 1)))\"",
+ "expressionList" : "count(1), max(test_agg.k)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 86,
+ "fragment" : "SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1"
+ } ]
+}
+
+
+-- !query
+SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col)
+-- !query schema
+struct
+-- !query output
+1.0000 1
+
+
+-- !query
+SELECT not(a IS NULL), count(*) AS c
+FROM testData
+GROUP BY a IS NULL
+-- !query schema
+struct<(NOT (a IS NULL)):boolean,c:bigint>
+-- !query output
+false 2
+true 7
+
+
+-- !query
+SELECT if(not(a IS NULL), rand(0), 1), count(*) AS c
+FROM testData
+GROUP BY a IS NULL
+-- !query schema
+struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint>
+-- !query output
+0.5488135024422883 7
+1.0 2
+
+
+-- !query
+SELECT
+ histogram_numeric(col, 2) as histogram_2,
+ histogram_numeric(col, 3) as histogram_3,
+ histogram_numeric(col, 5) as histogram_5,
+ histogram_numeric(col, 10) as histogram_10
+FROM VALUES
+ (1), (2), (3), (4), (5), (6), (7), (8), (9), (10),
+ (11), (12), (13), (14), (15), (16), (17), (18), (19), (20),
+ (21), (22), (23), (24), (25), (26), (27), (28), (29), (30),
+ (31), (32), (33), (34), (35), (3), (37), (38), (39), (40),
+ (41), (42), (43), (44), (45), (46), (47), (48), (49), (50) AS tab(col)
+-- !query schema
+struct>,histogram_3:array>,histogram_5:array>,histogram_10:array>>
+-- !query output
+[{"x":12,"y":26.0},{"x":38,"y":24.0}] [{"x":9,"y":20.0},{"x":25,"y":11.0},{"x":40,"y":19.0}] [{"x":5,"y":11.0},{"x":14,"y":8.0},{"x":22,"y":7.0},{"x":30,"y":10.0},{"x":43,"y":14.0}] [{"x":3,"y":6.0},{"x":8,"y":6.0},{"x":13,"y":4.0},{"x":17,"y":3.0},{"x":20,"y":4.0},{"x":25,"y":6.0},{"x":31,"y":7.0},{"x":39,"y":5.0},{"x":43,"y":4.0},{"x":48,"y":5.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1), (2), (3) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1L), (2L), (3L) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1F), (2F), (3F) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1.0,"y":1.0},{"x":2.0,"y":1.0},{"x":3.0,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1D), (2D), (3D) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1.0,"y":1.0},{"x":2.0,"y":1.0},{"x":3.0,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1S), (2S), (3S) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS BYTE)), (CAST(2 AS BYTE)), (CAST(3 AS BYTE)) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS TINYINT)), (CAST(2 AS TINYINT)), (CAST(3 AS TINYINT)) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS SMALLINT)), (CAST(2 AS SMALLINT)), (CAST(3 AS SMALLINT)) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS BIGINT)), (CAST(2 AS BIGINT)), (CAST(3 AS BIGINT)) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
+ (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":2017-03-01 00:00:00,"y":1.0},{"x":2017-04-01 00:00:00,"y":1.0},{"x":2017-05-01 00:00:00,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '100-00' YEAR TO MONTH),
+ (INTERVAL '110-00' YEAR TO MONTH), (INTERVAL '120-00' YEAR TO MONTH) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":100-0,"y":1.0},{"x":110-0,"y":1.0},{"x":120-0,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '12 20:4:0' DAY TO SECOND),
+ (INTERVAL '12 21:4:0' DAY TO SECOND), (INTERVAL '12 22:4:0' DAY TO SECOND) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+[{"x":12 20:04:00.000000000,"y":1.0},{"x":12 21:04:00.000000000,"y":1.0},{"x":12 22:04:00.000000000,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3)
+FROM VALUES (NULL), (NULL), (NULL) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+NULL
+
+
+-- !query
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+NULL
+
+
+-- !query
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS INT)), (CAST(NULL AS INT)), (CAST(NULL AS INT)) AS tab(col)
+-- !query schema
+struct>>
+-- !query output
+NULL
+
+
+-- !query
+SELECT
+ collect_list(col),
+ array_agg(col)
+FROM VALUES
+ (1), (2), (1) AS tab(col)
+-- !query schema
+struct,collect_list(col):array>
+-- !query output
+[1,2,1] [1,2,1]
+
+
+-- !query
+SELECT
+ a,
+ collect_list(b),
+ array_agg(b)
+FROM VALUES
+ (1,4),(2,3),(1,4),(2,4) AS v(a,b)
+GROUP BY a
+-- !query schema
+struct,collect_list(b):array>
+-- !query output
+1 [4,4] [4,4]
+2 [3,4] [3,4]
+
+
+-- !query
+SELECT mode(a), mode(b) FROM testData
+-- !query schema
+struct
+-- !query output
+3 1
+
+
+-- !query
+SELECT a, mode(b) FROM testData GROUP BY a ORDER BY a
+-- !query schema
+struct
+-- !query output
+NULL 1
+1 1
+2 1
+3 1
+
+
+-- !query
+SELECT c * 2 AS d
+FROM (
+ SELECT if(b > 1, 1, b) AS c
+ FROM (
+ SELECT if(a < 0, 0, a) AS b
+ FROM VALUES (-1), (1), (2) AS t1(a)
+ ) t2
+ GROUP BY b
+ ) t3
+GROUP BY c
+-- !query schema
+struct
+-- !query output
+0
+2
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/linear-regression.sql.out b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/linear-regression.sql.out
new file mode 100644
index 000000000000..a73347a2f7f5
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/linear-regression.sql.out
@@ -0,0 +1,276 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
+(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
+AS testRegression(k, y, x)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT regr_count(y, x) FROM testRegression
+-- !query schema
+struct
+-- !query output
+3
+
+
+-- !query
+SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL
+-- !query schema
+struct
+-- !query output
+3
+
+
+-- !query
+SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 1 0
+2 4 3
+
+
+-- !query
+SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 0 0
+2 3 3
+
+
+-- !query
+SELECT regr_r2(y, x) FROM testRegression
+-- !query schema
+struct
+-- !query output
+0.9976905311778291
+
+
+-- !query
+SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL
+-- !query schema
+struct
+-- !query output
+0.9976905311778291
+
+
+-- !query
+SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL NULL
+2 0.9988445981121532 0.9976905311778291
+
+
+-- !query
+SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL NULL
+2 0.9988445981121532 0.9976905311778291
+
+
+-- !query
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression
+-- !query schema
+struct
+-- !query output
+22.666666666666668 20.0
+
+
+-- !query
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct
+-- !query output
+22.666666666666668 20.0
+
+
+-- !query
+SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL 10.0 NULL NULL
+2 22.666666666666668 21.25 22.666666666666668 20.0
+
+
+-- !query
+SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL NULL NULL NULL
+2 22.666666666666668 20.0 22.666666666666668 20.0
+
+
+-- !query
+SELECT regr_sxx(y, x) FROM testRegression
+-- !query schema
+struct
+-- !query output
+288.66666666666663
+
+
+-- !query
+SELECT regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct
+-- !query output
+288.66666666666663
+
+
+-- !query
+SELECT k, regr_sxx(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL
+2 288.66666666666663
+
+
+-- !query
+SELECT k, regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct
+-- !query output
+2 288.66666666666663
+
+
+-- !query
+SELECT regr_sxy(y, x) FROM testRegression
+-- !query schema
+struct
+-- !query output
+240.0
+
+
+-- !query
+SELECT regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct
+-- !query output
+240.0
+
+
+-- !query
+SELECT k, regr_sxy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL
+2 240.0
+
+
+-- !query
+SELECT k, regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct
+-- !query output
+2 240.0
+
+
+-- !query
+SELECT regr_syy(y, x) FROM testRegression
+-- !query schema
+struct
+-- !query output
+200.0
+
+
+-- !query
+SELECT regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct
+-- !query output
+200.0
+
+
+-- !query
+SELECT k, regr_syy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL
+2 200.0
+
+
+-- !query
+SELECT k, regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct
+-- !query output
+2 200.0
+
+
+-- !query
+SELECT regr_slope(y, x) FROM testRegression
+-- !query schema
+struct
+-- !query output
+0.8314087759815244
+
+
+-- !query
+SELECT regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct
+-- !query output
+0.8314087759815244
+
+
+-- !query
+SELECT k, regr_slope(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL
+2 0.8314087759815244
+
+
+-- !query
+SELECT k, regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct
+-- !query output
+2 0.8314087759815244
+
+
+-- !query
+SELECT regr_intercept(y, x) FROM testRegression
+-- !query schema
+struct
+-- !query output
+1.1547344110854487
+
+
+-- !query
+SELECT regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct
+-- !query output
+1.1547344110854487
+
+
+-- !query
+SELECT k, regr_intercept(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 NULL
+2 1.1547344110854487
+
+
+-- !query
+SELECT k, regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct
+-- !query output
+2 1.1547344110854487
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000000..d6d1289a5adb
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,134 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select typeof(null)
+-- !query schema
+struct
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct
+-- !query output
+tinyint smallint int bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct
+-- !query output
+float double decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days')
+-- !query schema
+struct
+-- !query output
+date timestamp interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark'))
+-- !query schema
+struct
+-- !query output
+array map struct
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct
+-- !query output
+NULL NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/random.sql.out b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/random.sql.out
new file mode 100644
index 000000000000..17e6f871b9c5
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/random.sql.out
@@ -0,0 +1,115 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT rand(0)
+-- !query schema
+struct
+-- !query output
+0.5488135024422883
+
+
+-- !query
+SELECT rand(cast(3 / 7 AS int))
+-- !query schema
+struct
+-- !query output
+0.5488135024422883
+
+
+-- !query
+SELECT rand(NULL)
+-- !query schema
+struct
+-- !query output
+0.5488135024422883
+
+
+-- !query
+SELECT rand(cast(NULL AS int))
+-- !query schema
+struct
+-- !query output
+0.5488135024422883
+
+
+-- !query
+SELECT rand(1.0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1.0\"",
+ "inputType" : "\"DECIMAL(2,1)\"",
+ "paramIndex" : "1",
+ "requiredType" : "(\"INT\" or \"BIGINT\")",
+ "sqlExpr" : "\"rand(1.0)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 16,
+ "fragment" : "rand(1.0)"
+ } ]
+}
+
+
+-- !query
+SELECT randn(0L)
+-- !query schema
+struct
+-- !query output
+1.6034991609278433
+
+
+-- !query
+SELECT randn(cast(3 / 7 AS long))
+-- !query schema
+struct
+-- !query output
+1.6034991609278433
+
+
+-- !query
+SELECT randn(NULL)
+-- !query schema
+struct
+-- !query output
+1.6034991609278433
+
+
+-- !query
+SELECT randn(cast(NULL AS long))
+-- !query schema
+struct
+-- !query output
+1.6034991609278433
+
+
+-- !query
+SELECT rand('1')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1\"",
+ "inputType" : "\"STRING\"",
+ "paramIndex" : "1",
+ "requiredType" : "(\"INT\" or \"BIGINT\")",
+ "sqlExpr" : "\"rand(1)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 16,
+ "fragment" : "rand('1')"
+ } ]
+}
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/table-valued-functions.sql.out b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/table-valued-functions.sql.out
new file mode 100644
index 000000000000..0d5675fa6fde
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/table-valued-functions.sql.out
@@ -0,0 +1,1017 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select * from dummy(3)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVABLE_TABLE_VALUED_FUNCTION",
+ "messageParameters" : {
+ "name" : "`dummy`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 22,
+ "fragment" : "dummy(3)"
+ } ]
+}
+
+
+-- !query
+select * from range(6 + cos(3))
+-- !query schema
+struct
+-- !query output
+0
+1
+2
+3
+4
+
+
+-- !query
+select * from range(5, 10)
+-- !query schema
+struct
+-- !query output
+5
+6
+7
+8
+9
+
+
+-- !query
+select * from range(0, 10, 2)
+-- !query schema
+struct
+-- !query output
+0
+2
+4
+6
+8
+
+
+-- !query
+select * from range(0, 10, 1, 200)
+-- !query schema
+struct
+-- !query output
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query
+select * from range(1, 1, 1, 1, 1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "5",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "[1, 2, 3, 4]",
+ "functionName" : "`range`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 34,
+ "fragment" : "range(1, 1, 1, 1, 1)"
+ } ]
+}
+
+
+-- !query
+select * from range(1, null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "functionName" : "`range`",
+ "inputSql" : "\"NULL\"",
+ "inputType" : "\"VOID\"",
+ "paramIndex" : "2",
+ "requiredType" : "\"BIGINT\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 28,
+ "fragment" : "range(1, null)"
+ } ]
+}
+
+
+-- !query
+select * from range(array(1, 2, 3))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "functionName" : "`range`",
+ "inputSql" : "\"array(1, 2, 3)\"",
+ "inputType" : "\"ARRAY\"",
+ "paramIndex" : "2",
+ "requiredType" : "\"BIGINT\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 35,
+ "fragment" : "range(array(1, 2, 3))"
+ } ]
+}
+
+
+-- !query
+select * from range(0, 5, 0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "FAILED_FUNCTION_CALL",
+ "sqlState" : "38000",
+ "messageParameters" : {
+ "funcName" : "`range`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 28,
+ "fragment" : "range(0, 5, 0)"
+ } ]
+}
+
+
+-- !query
+select * from RaNgE(2)
+-- !query schema
+struct
+-- !query output
+0
+1
+
+
+-- !query
+select i from range(0, 2) t(i)
+-- !query schema
+struct
+-- !query output
+0
+1
+
+
+-- !query
+select * from explode(array(1, 2))
+-- !query schema
+struct
+-- !query output
+1
+2
+
+
+-- !query
+select * from explode(map('a', 1, 'b', 2))
+-- !query schema
+struct
+-- !query output
+a 1
+b 2
+
+
+-- !query
+select * from explode(array())
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+select * from explode(map())
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+select * from explode(array(1, 2)) t(c1)
+-- !query schema
+struct
+-- !query output
+1
+2
+
+
+-- !query
+select * from explode(map('a', 1, 'b', 2)) t(k, v)
+-- !query schema
+struct
+-- !query output
+a 1
+b 2
+
+
+-- !query
+select * from explode(array(rand(0)))
+-- !query schema
+struct
+-- !query output
+0.5488135024422883
+
+
+-- !query
+select * from explode(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"NULL\"",
+ "inputType" : "\"VOID\"",
+ "paramIndex" : "1",
+ "requiredType" : "(\"ARRAY\" or \"MAP\")",
+ "sqlExpr" : "\"explode(NULL)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 27,
+ "fragment" : "explode(null)"
+ } ]
+}
+
+
+-- !query
+select * from explode(null) t(c1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"NULL\"",
+ "inputType" : "\"VOID\"",
+ "paramIndex" : "1",
+ "requiredType" : "(\"ARRAY\" or \"MAP\")",
+ "sqlExpr" : "\"explode(NULL)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 33,
+ "fragment" : "explode(null) t(c1)"
+ } ]
+}
+
+
+-- !query
+select * from explode(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1\"",
+ "inputType" : "\"INT\"",
+ "paramIndex" : "1",
+ "requiredType" : "(\"ARRAY\" or \"MAP\")",
+ "sqlExpr" : "\"explode(1)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 24,
+ "fragment" : "explode(1)"
+ } ]
+}
+
+
+-- !query
+select * from explode(1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "2",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "1",
+ "functionName" : "`explode`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 27,
+ "fragment" : "explode(1, 2)"
+ } ]
+}
+
+
+-- !query
+select * from explode(explode(array(1)))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+ "sqlState" : "0A000",
+ "messageParameters" : {
+ "expression" : "\"explode(explode(array(1)))\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 40,
+ "fragment" : "explode(explode(array(1)))"
+ } ]
+}
+
+
+-- !query
+select * from explode(array(1, 2)) t(c1, c2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "NUM_TABLE_VALUE_ALIASES_MISMATCH",
+ "messageParameters" : {
+ "aliasesNum" : "2",
+ "funcName" : "`explode`",
+ "outColsNum" : "1"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 44,
+ "fragment" : "explode(array(1, 2)) t(c1, c2)"
+ } ]
+}
+
+
+-- !query
+select * from explode_outer(array(1, 2))
+-- !query schema
+struct
+-- !query output
+1
+2
+
+
+-- !query
+select * from explode_outer(map('a', 1, 'b', 2))
+-- !query schema
+struct
+-- !query output
+a 1
+b 2
+
+
+-- !query
+select * from explode_outer(array())
+-- !query schema
+struct
+-- !query output
+NULL
+
+
+-- !query
+select * from explode_outer(map())
+-- !query schema
+struct
+-- !query output
+NULL NULL
+
+
+-- !query
+select * from range(2) join explode(array(1, 2))
+-- !query schema
+struct
+-- !query output
+0 1
+0 2
+1 1
+1 2
+
+
+-- !query
+select * from range(2) join explode_outer(array())
+-- !query schema
+struct
+-- !query output
+0 NULL
+1 NULL
+
+
+-- !query
+select * from inline(array(struct(1, 'a'), struct(2, 'b')))
+-- !query schema
+struct
+-- !query output
+1 a
+2 b
+
+
+-- !query
+select * from inline(array(struct(1, 'a'), struct(2, 'b'))) t(x, y)
+-- !query schema
+struct
+-- !query output
+1 a
+2 b
+
+
+-- !query
+select * from inline(array_remove(array(struct(1, 'a')), struct(1, 'a')))
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+select * from inline(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"NULL\"",
+ "inputType" : "\"VOID\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"ARRAY\"",
+ "sqlExpr" : "\"inline(NULL)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 26,
+ "fragment" : "inline(null)"
+ } ]
+}
+
+
+-- !query
+select * from inline(array(struct(1, 2), struct(2, 3))) t(a, b, c)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "NUM_TABLE_VALUE_ALIASES_MISMATCH",
+ "messageParameters" : {
+ "aliasesNum" : "3",
+ "funcName" : "`inline`",
+ "outColsNum" : "2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 66,
+ "fragment" : "inline(array(struct(1, 2), struct(2, 3))) t(a, b, c)"
+ } ]
+}
+
+
+-- !query
+select * from inline_outer(array(struct(1, 'a'), struct(2, 'b')))
+-- !query schema
+struct
+-- !query output
+1 a
+2 b
+
+
+-- !query
+select * from inline_outer(array_remove(array(struct(1, 'a')), struct(1, 'a')))
+-- !query schema
+struct
+-- !query output
+NULL NULL
+
+
+-- !query
+select * from posexplode(array())
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+select * from posexplode(array(1, 2))
+-- !query schema
+struct
+-- !query output
+0 1
+1 2
+
+
+-- !query
+select * from posexplode(array(1, 2)) t(pos, x)
+-- !query schema
+struct
+-- !query output
+0 1
+1 2
+
+
+-- !query
+select * from posexplode(map())
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+select * from posexplode(map('a', 1, 'b', 2))
+-- !query schema
+struct
+-- !query output
+0 a 1
+1 b 2
+
+
+-- !query
+select * from posexplode(map('a', 1, 'b', 2)) t(pos, k, v)
+-- !query schema
+struct
+-- !query output
+0 a 1
+1 b 2
+
+
+-- !query
+select * from posexplode(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1\"",
+ "inputType" : "\"INT\"",
+ "paramIndex" : "1",
+ "requiredType" : "(\"ARRAY\" or \"MAP\")",
+ "sqlExpr" : "\"posexplode(1)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 27,
+ "fragment" : "posexplode(1)"
+ } ]
+}
+
+
+-- !query
+select * from posexplode(1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "2",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "1",
+ "functionName" : "`posexplode`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 30,
+ "fragment" : "posexplode(1, 2)"
+ } ]
+}
+
+
+-- !query
+select * from posexplode(explode(array(1)))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+ "sqlState" : "0A000",
+ "messageParameters" : {
+ "expression" : "\"posexplode(explode(array(1)))\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 43,
+ "fragment" : "posexplode(explode(array(1)))"
+ } ]
+}
+
+
+-- !query
+select * from posexplode(array(1, 2)) t(x)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "NUM_TABLE_VALUE_ALIASES_MISMATCH",
+ "messageParameters" : {
+ "aliasesNum" : "1",
+ "funcName" : "`posexplode`",
+ "outColsNum" : "2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 42,
+ "fragment" : "posexplode(array(1, 2)) t(x)"
+ } ]
+}
+
+
+-- !query
+select * from posexplode_outer(array())
+-- !query schema
+struct
+-- !query output
+NULL NULL
+
+
+-- !query
+select * from posexplode_outer(array(1, 2))
+-- !query schema
+struct
+-- !query output
+0 1
+1 2
+
+
+-- !query
+select * from posexplode_outer(map())
+-- !query schema
+struct
+-- !query output
+NULL NULL NULL
+
+
+-- !query
+select * from posexplode_outer(map('a', 1, 'b', 2))
+-- !query schema
+struct
+-- !query output
+0 a 1
+1 b 2
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b')
+-- !query schema
+struct
+-- !query output
+1 2
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'c')
+-- !query schema
+struct
+-- !query output
+1 NULL
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'a')
+-- !query schema
+struct
+-- !query output
+1 1
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b') AS t(x, y)
+-- !query schema
+struct
+-- !query output
+1 2
+
+
+-- !query
+select * from json_tuple('{"a": bad, "b": string}', 'a', 'b')
+-- !query schema
+struct
+-- !query output
+NULL NULL
+
+
+-- !query
+select * from json_tuple()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "0",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "> 1",
+ "functionName" : "`json_tuple`"
+ }
+}
+
+
+-- !query
+select * from json_tuple('{"a": 1}')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "1",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "> 1",
+ "functionName" : "`json_tuple`"
+ }
+}
+
+
+-- !query
+select * from json_tuple('{"a": 1}', 1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.NON_STRING_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "funcName" : "`json_tuple`",
+ "sqlExpr" : "\"json_tuple({\"a\": 1}, 1)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 39,
+ "fragment" : "json_tuple('{\"a\": 1}', 1)"
+ } ]
+}
+
+
+-- !query
+select * from json_tuple('{"a": 1}', null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.NON_STRING_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "funcName" : "`json_tuple`",
+ "sqlExpr" : "\"json_tuple({\"a\": 1}, NULL)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 42,
+ "fragment" : "json_tuple('{\"a\": 1}', null)"
+ } ]
+}
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b') AS t(x)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "NUM_TABLE_VALUE_ALIASES_MISMATCH",
+ "messageParameters" : {
+ "aliasesNum" : "1",
+ "funcName" : "`json_tuple`",
+ "outColsNum" : "2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 62,
+ "fragment" : "json_tuple('{\"a\": 1, \"b\": 2}', 'a', 'b') AS t(x)"
+ } ]
+}
+
+
+-- !query
+select * from stack(1, 1, 2, 3)
+-- !query schema
+struct
+-- !query output
+1 2 3
+
+
+-- !query
+select * from stack(2, 1, 2, 3)
+-- !query schema
+struct
+-- !query output
+1 2
+3 NULL
+
+
+-- !query
+select * from stack(3, 1, 2, 3) t(x)
+-- !query schema
+struct
+-- !query output
+1
+2
+3
+
+
+-- !query
+select * from stack(4, 1, 2, 3) t(x)
+-- !query schema
+struct
+-- !query output
+1
+2
+3
+NULL
+
+
+-- !query
+select * from stack(2, 1, 1.1, 'a', 2, 2.2, 'b') t(a, b, c)
+-- !query schema
+struct
+-- !query output
+1 1.1 a
+2 2.2 b
+
+
+-- !query
+select * from stack(2, 1, 1.1, null, 2, null, 'b') t(a, b, c)
+-- !query schema
+struct
+-- !query output
+1 1.1 NULL
+2 NULL b
+
+
+-- !query
+select * from stack()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
+ "messageParameters" : {
+ "actualNum" : "0",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "> 1",
+ "functionName" : "`stack`"
+ }
+}
+
+
+-- !query
+select * from stack(2, 1, 2, 3) t(a, b, c)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "NUM_TABLE_VALUE_ALIASES_MISMATCH",
+ "messageParameters" : {
+ "aliasesNum" : "3",
+ "funcName" : "`stack`",
+ "outColsNum" : "2"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 42,
+ "fragment" : "stack(2, 1, 2, 3) t(a, b, c)"
+ } ]
+}
+
+
+-- !query
+select * from stack(2, 1, '1.1', 'a', 2, 2.2, 'b')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "columnIndex" : "1",
+ "leftParamIndex" : "2",
+ "leftType" : "\"STRING\"",
+ "rightParamIndex" : "5",
+ "rightType" : "\"DECIMAL(2,1)\"",
+ "sqlExpr" : "\"stack(2, 1, 1.1, a, 2, 2.2, b)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 50,
+ "fragment" : "stack(2, 1, '1.1', 'a', 2, 2.2, 'b')"
+ } ]
+}
+
+
+-- !query
+select * from stack(2, explode(array(1, 2, 3)))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+ "sqlState" : "0A000",
+ "messageParameters" : {
+ "expression" : "\"stack(2, explode(array(1, 2, 3)))\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 15,
+ "stopIndex" : 47,
+ "fragment" : "stack(2, explode(array(1, 2, 3)))"
+ } ]
+}
diff --git a/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/udf/udf-group-by.sql.out b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/udf/udf-group-by.sql.out
new file mode 100644
index 000000000000..d3735acf0f08
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-clickhouse/sql-tests/results/udf/udf-group-by.sql.out
@@ -0,0 +1,689 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT udf(a), udf(COUNT(b)) FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_GROUP_BY",
+ "sqlState" : "42803",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 42,
+ "fragment" : "SELECT udf(a), udf(COUNT(b)) FROM testData"
+ } ]
+}
+
+
+-- !query
+SELECT COUNT(udf(a)), udf(COUNT(b)) FROM testData
+-- !query schema
+struct
+-- !query output
+7 7
+
+
+-- !query
+SELECT udf(a), COUNT(udf(b)) FROM testData GROUP BY a
+-- !query schema
+struct
+-- !query output
+1 2
+2 2
+3 2
+NULL 1
+
+
+-- !query
+SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_AGGREGATION",
+ "sqlState" : "42803",
+ "messageParameters" : {
+ "expression" : "\"a\"",
+ "expressionAnyValue" : "\"any_value(a)\""
+ }
+}
+
+
+-- !query
+SELECT COUNT(udf(a)), COUNT(udf(b)) FROM testData GROUP BY udf(a)
+-- !query schema
+struct
+-- !query output
+0 1
+2 2
+2 2
+3 2
+
+
+-- !query
+SELECT 'foo', COUNT(udf(a)) FROM testData GROUP BY 1
+-- !query schema
+struct
+-- !query output
+foo 7
+
+
+-- !query
+SELECT 'foo' FROM testData WHERE a = 0 GROUP BY udf(1)
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+SELECT 'foo', udf(APPROX_COUNT_DISTINCT(udf(a))) FROM testData WHERE a = 0 GROUP BY udf(1)
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+SELECT 'foo', MAX(STRUCT(udf(a))) FROM testData WHERE a = 0 GROUP BY udf(1)
+-- !query schema
+struct>
+-- !query output
+
+
+
+-- !query
+SELECT udf(a + b), udf(COUNT(b)) FROM testData GROUP BY a + b
+-- !query schema
+struct
+-- !query output
+2 1
+3 2
+4 2
+5 1
+NULL 1
+
+
+-- !query
+SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_AGGREGATION",
+ "sqlState" : "42803",
+ "messageParameters" : {
+ "expression" : "\"a\"",
+ "expressionAnyValue" : "\"any_value(a)\""
+ }
+}
+
+
+-- !query
+SELECT udf(a + 1) + 1, udf(COUNT(b)) FROM testData GROUP BY udf(a + 1)
+-- !query schema
+struct<(udf((a + 1)) + 1):int,udf(count(b)):bigint>
+-- !query output
+3 2
+4 2
+5 2
+NULL 1
+
+
+-- !query
+SELECT SKEWNESS(udf(a)), udf(KURTOSIS(a)), udf(MIN(a)), MAX(udf(a)), udf(AVG(udf(a))), udf(VARIANCE(a)), STDDEV(udf(a)), udf(SUM(a)), udf(COUNT(a))
+FROM testData
+-- !query schema
+struct
+-- !query output
+-0.27238010581457284 -1.5069204152249138 1 3 2.142857142857143 0.8095238095238096 0.8997354108424375 15 7
+
+
+-- !query
+SELECT COUNT(DISTINCT udf(b)), udf(COUNT(DISTINCT b, c)) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY udf(a)
+-- !query schema
+struct
+-- !query output
+1 1
+
+
+-- !query
+SELECT udf(a) AS k, COUNT(udf(b)) FROM testData GROUP BY k
+-- !query schema
+struct
+-- !query output
+1 2
+2 2
+3 2
+NULL 1
+
+
+-- !query
+SELECT a AS k, udf(COUNT(b)) FROM testData GROUP BY k HAVING k > 1
+-- !query schema
+struct
+-- !query output
+2 2
+3 2
+
+
+-- !query
+SELECT udf(COUNT(b)) AS k FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_AGGREGATE",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "sqlExpr" : "CAST(udf(cast(count(b) as string)) AS BIGINT)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 20,
+ "fragment" : "udf(COUNT(b))"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf(a)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_AGGREGATION",
+ "sqlState" : "42803",
+ "messageParameters" : {
+ "expression" : "\"k\"",
+ "expressionAnyValue" : "\"any_value(k)\""
+ }
+}
+
+
+-- !query
+set spark.sql.groupByAliases=false
+-- !query schema
+struct
+-- !query output
+spark.sql.groupByAliases false
+
+
+-- !query
+SELECT a AS k, udf(COUNT(udf(b))) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`k`",
+ "proposal" : "`testdata`.`a`, `testdata`.`b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 58,
+ "stopIndex" : 58,
+ "fragment" : "k"
+ } ]
+}
+
+
+-- !query
+SELECT udf(a), COUNT(udf(1)) FROM testData WHERE false GROUP BY udf(a)
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+SELECT udf(COUNT(1)) FROM testData WHERE false
+-- !query schema
+struct
+-- !query output
+0
+
+
+-- !query
+SELECT 1 FROM (SELECT udf(COUNT(1)) FROM testData WHERE false) t
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT 1 from (
+ SELECT 1 AS z,
+ udf(MIN(a.x))
+ FROM (select 1 as x) a
+ WHERE false
+) b
+where b.z != b.z
+-- !query schema
+struct<1:int>
+-- !query output
+
+
+
+-- !query
+SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*)
+ FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y)
+-- !query schema
+struct
+-- !query output
+0.9999999999999999 0.9999999999999999 3
+
+
+-- !query
+SELECT udf(1) FROM range(10) HAVING true
+-- !query schema
+struct
+-- !query output
+1
+
+
+-- !query
+SELECT udf(udf(1)) FROM range(10) HAVING MAX(id) > 0
+-- !query schema
+struct
+-- !query output
+1
+
+
+-- !query
+SELECT udf(id) FROM range(10) HAVING id > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "MISSING_GROUP_BY",
+ "sqlState" : "42803",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 43,
+ "fragment" : "SELECT udf(id) FROM range(10) HAVING id > 0"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+ (1, true), (1, false),
+ (2, true),
+ (3, false), (3, null),
+ (4, null), (4, null),
+ (5, null), (5, true), (5, false) AS test_agg(k, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT udf(every(v)), udf(some(v)), any(v) FROM test_agg WHERE 1 = 0
+-- !query schema
+struct
+-- !query output
+NULL NULL NULL
+
+
+-- !query
+SELECT udf(every(udf(v))), some(v), any(v) FROM test_agg WHERE k = 4
+-- !query schema
+struct
+-- !query output
+NULL NULL NULL
+
+
+-- !query
+SELECT every(v), udf(some(v)), any(v) FROM test_agg WHERE k = 5
+-- !query schema
+struct
+-- !query output
+false true true
+
+
+-- !query
+SELECT udf(k), every(v), udf(some(v)), any(v) FROM test_agg GROUP BY udf(k)
+-- !query schema
+struct
+-- !query output
+1 false true true
+2 true true true
+3 false false false
+4 NULL NULL NULL
+5 false true true
+
+
+-- !query
+SELECT udf(k), every(v) FROM test_agg GROUP BY k HAVING every(v) = false
+-- !query schema
+struct
+-- !query output
+1 false
+3 false
+5 false
+
+
+-- !query
+SELECT udf(k), udf(every(v)) FROM test_agg GROUP BY udf(k) HAVING every(v) IS NULL
+-- !query schema
+struct
+-- !query output
+4 NULL
+
+
+-- !query
+SELECT udf(k),
+ udf(Every(v)) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Any(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY udf(k)
+-- !query schema
+struct
+-- !query output
+2 true
+
+
+-- !query
+SELECT udf(udf(k)),
+ Every(v) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Every(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY udf(udf(k))
+-- !query schema
+struct
+-- !query output
+
+
+
+-- !query
+SELECT every(udf(1))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"udf(1)\"",
+ "inputType" : "\"INT\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"every(udf(1))\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 20,
+ "fragment" : "every(udf(1))"
+ } ]
+}
+
+
+-- !query
+SELECT some(udf(1S))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"udf(1)\"",
+ "inputType" : "\"SMALLINT\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"some(udf(1))\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 20,
+ "fragment" : "some(udf(1S))"
+ } ]
+}
+
+
+-- !query
+SELECT any(udf(1L))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"udf(1)\"",
+ "inputType" : "\"BIGINT\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"any(udf(1))\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 19,
+ "fragment" : "any(udf(1L))"
+ } ]
+}
+
+
+-- !query
+SELECT udf(every("true"))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"true\"",
+ "inputType" : "\"STRING\"",
+ "paramIndex" : "1",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"every(true)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 12,
+ "stopIndex" : 24,
+ "fragment" : "every(\"true\")"
+ } ]
+}
+
+
+-- !query
+SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct
+-- !query output
+1 false false
+1 true false
+2 true true
+3 NULL NULL
+3 false false
+4 NULL NULL
+4 NULL NULL
+5 NULL NULL
+5 false false
+5 true false
+
+
+-- !query
+SELECT k, udf(udf(v)), some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct
+-- !query output
+1 false false
+1 true true
+2 true true
+3 NULL NULL
+3 false false
+4 NULL NULL
+4 NULL NULL
+5 NULL NULL
+5 false false
+5 true true
+
+
+-- !query
+SELECT udf(udf(k)), v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct
+-- !query output
+1 false false
+1 true true
+2 true true
+3 NULL NULL
+3 false false
+4 NULL NULL
+4 NULL NULL
+5 NULL NULL
+5 false false
+5 true true
+
+
+-- !query
+SELECT udf(count(*)) FROM test_agg HAVING count(*) > 1L
+-- !query schema
+struct
+-- !query output
+10
+
+
+-- !query
+SELECT k, udf(max(v)) FROM test_agg GROUP BY k HAVING max(v) = true
+-- !query schema
+struct
+-- !query output
+1 true
+2 true
+5 true
+
+
+-- !query
+SELECT * FROM (SELECT udf(COUNT(*)) AS cnt FROM test_agg) WHERE cnt > 1L
+-- !query schema
+struct
+-- !query output
+10
+
+
+-- !query
+SELECT udf(count(*)) FROM test_agg WHERE count(*) > 1L
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_WHERE_CONDITION",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "condition" : "\"(count(1) > 1)\"",
+ "expressionList" : "count(1)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 54,
+ "fragment" : "SELECT udf(count(*)) FROM test_agg WHERE count(*) > 1L"
+ } ]
+}
+
+
+-- !query
+SELECT udf(count(*)) FROM test_agg WHERE count(*) + 1L > 1L
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_WHERE_CONDITION",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "condition" : "\"((count(1) + 1) > 1)\"",
+ "expressionList" : "count(1)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 59,
+ "fragment" : "SELECT udf(count(*)) FROM test_agg WHERE count(*) + 1L > 1L"
+ } ]
+}
+
+
+-- !query
+SELECT udf(count(*)) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_WHERE_CONDITION",
+ "sqlState" : "42903",
+ "messageParameters" : {
+ "condition" : "\"(((k = 1) OR (k = 2)) OR (((count(1) + 1) > 1) OR (max(k) > 1)))\"",
+ "expressionList" : "count(1), max(test_agg.k)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 91,
+ "fragment" : "SELECT udf(count(*)) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1"
+ } ]
+}
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/array.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/array.sql
new file mode 100644
index 000000000000..865dc8bac4ea
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/array.sql
@@ -0,0 +1,183 @@
+-- test cases for array functions
+
+create temporary view data as select * from values
+ ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
+ ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
+ as data(a, b, c);
+
+select * from data;
+
+-- index into array
+select a, b[0], b[0] + b[1] from data;
+
+-- index into array of arrays
+select a, c[0][0] + c[0][0 + 1] from data;
+
+
+create temporary view primitive_arrays as select * from values (
+ array(true),
+ array(2Y, 1Y),
+ array(2S, 1S),
+ array(2, 1),
+ array(2L, 1L),
+ array(9223372036854775809, 9223372036854775808),
+ array(2.0D, 1.0D),
+ array(float(2.0), float(1.0)),
+ array(date '2016-03-14', date '2016-03-13'),
+ array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000')
+) as primitive_arrays(
+ boolean_array,
+ tinyint_array,
+ smallint_array,
+ int_array,
+ bigint_array,
+ decimal_array,
+ double_array,
+ float_array,
+ date_array,
+ timestamp_array
+);
+
+select * from primitive_arrays;
+
+-- array_contains on all primitive types: result should alternate between true and false
+select
+ array_contains(boolean_array, true), array_contains(boolean_array, false),
+ array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y),
+ array_contains(smallint_array, 2S), array_contains(smallint_array, 0S),
+ array_contains(int_array, 2), array_contains(int_array, 0),
+ array_contains(bigint_array, 2L), array_contains(bigint_array, 0L),
+ array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1),
+ array_contains(double_array, 2.0D), array_contains(double_array, 0.0D),
+ array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)),
+ array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'),
+ array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
+from primitive_arrays;
+
+-- array_contains on nested arrays
+select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data;
+
+-- sort_array
+select
+ sort_array(boolean_array),
+ sort_array(tinyint_array),
+ sort_array(smallint_array),
+ sort_array(int_array),
+ sort_array(bigint_array),
+ sort_array(decimal_array),
+ sort_array(double_array),
+ sort_array(float_array),
+ sort_array(date_array),
+ sort_array(timestamp_array)
+from primitive_arrays;
+
+-- sort_array with an invalid string literal for the argument of sort order.
+select sort_array(array('b', 'd'), '1');
+
+-- sort_array with an invalid null literal casted as boolean for the argument of sort order.
+select sort_array(array('b', 'd'), cast(NULL as boolean));
+
+-- size
+select
+ size(boolean_array),
+ size(tinyint_array),
+ size(smallint_array),
+ size(int_array),
+ size(bigint_array),
+ size(decimal_array),
+ size(double_array),
+ size(float_array),
+ size(date_array),
+ size(timestamp_array)
+from primitive_arrays;
+
+-- index out of range for array elements
+select element_at(array(1, 2, 3), 5);
+select element_at(array(1, 2, 3), -5);
+select element_at(array(1, 2, 3), 0);
+
+select elt(4, '123', '456');
+select elt(0, '123', '456');
+select elt(-1, '123', '456');
+select elt(null, '123', '456');
+select elt(null, '123', null);
+select elt(1, '123', null);
+select elt(2, '123', null);
+
+select array(1, 2, 3)[5];
+select array(1, 2, 3)[-1];
+
+-- array_size
+select array_size(array());
+select array_size(array(true));
+select array_size(array(2, 1));
+select array_size(NULL);
+select array_size(map('a', 1, 'b', 2));
+
+-- size(arrays_zip)
+select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)));
+select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)));
+select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)));
+
+-- isnotnull(arrays_zip)
+select isnotnull(arrays_zip(array(), array(4), array(7, 8, 9, 10)));
+select isnotnull(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)));
+select isnotnull(arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10)));
+
+-- function get()
+select get(array(1, 2, 3), 0);
+select get(array(1, 2, 3), 3);
+select get(array(1, 2, 3), null);
+select get(array(1, 2, 3), -1);
+
+-- function array_insert()
+select array_insert(array(1, 2, 3), 3, 4);
+select array_insert(array(2, 3, 4), 0, 1);
+select array_insert(array(2, 3, 4), 1, 1);
+select array_insert(array(1, 3, 4), -2, 2);
+select array_insert(array(1, 2, 3), 3, "4");
+select array_insert(cast(NULL as ARRAY), 1, 1);
+select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4);
+select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT));
+select array_insert(array(2, 3, NULL, 4), 5, 5);
+select array_insert(array(2, 3, NULL, 4), -5, 1);
+select array_insert(array(1), 2, cast(2 as tinyint));
+
+set spark.sql.legacy.negativeIndexInArrayInsert=true;
+select array_insert(array(1, 3, 4), -2, 2);
+select array_insert(array(2, 3, NULL, 4), -5, 1);
+set spark.sql.legacy.negativeIndexInArrayInsert=false;
+
+-- function array_compact
+select array_compact(id) from values (1) as t(id);
+select array_compact(array("1", null, "2", null));
+select array_compact(array("a", "b", "c"));
+select array_compact(array(1D, null, 2D, null));
+select array_compact(array(array(1, 2, 3, null), null, array(4, null, 6)));
+select array_compact(array(null));
+
+-- function array_append
+select array_append(array(1, 2, 3), 4);
+select array_append(array('a', 'b', 'c'), 'd');
+select array_append(array(1, 2, 3, NULL), NULL);
+select array_append(array('a', 'b', 'c', NULL), NULL);
+select array_append(CAST(null AS ARRAY), 'a');
+select array_append(CAST(null AS ARRAY), CAST(null as String));
+select array_append(array(), 1);
+select array_append(CAST(array() AS ARRAY), CAST(NULL AS String));
+select array_append(array(CAST(NULL AS String)), CAST(NULL AS String));
+
+-- function array_prepend
+select array_prepend(array(1, 2, 3), 4);
+select array_prepend(array('a', 'b', 'c'), 'd');
+select array_prepend(array(1, 2, 3, NULL), NULL);
+select array_prepend(array('a', 'b', 'c', NULL), NULL);
+select array_prepend(CAST(null AS ARRAY), 'a');
+select array_prepend(CAST(null AS ARRAY), CAST(null as String));
+select array_prepend(array(), 1);
+select array_prepend(CAST(array() AS ARRAY), CAST(NULL AS String));
+select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String));
+
+-- SPARK-45599: Confirm 0.0, -0.0, and NaN are handled appropriately.
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")));
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")));
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary.sql
new file mode 100644
index 000000000000..8da97e466341
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary.sql
@@ -0,0 +1,8 @@
+--SET spark.sql.binaryOutputStyle=UTF-8
+
+SELECT X'';
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333';
+SELECT CAST('Spark' as BINARY);
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY));
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'));
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', binary('Kindergarten Cop')));
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_base64.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_base64.sql
new file mode 100644
index 000000000000..853eedd51773
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_base64.sql
@@ -0,0 +1,3 @@
+--IMPORT binary.sql
+
+--SET spark.sql.binaryOutputStyle=BASE64
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_basic.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_basic.sql
new file mode 100644
index 000000000000..1a5b64bdf7e0
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_basic.sql
@@ -0,0 +1,4 @@
+--IMPORT binary.sql
+
+--SET spark.sql.binaryOutputStyle=BASIC
+
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_hex.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_hex.sql
new file mode 100644
index 000000000000..7863da737a72
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_hex.sql
@@ -0,0 +1,3 @@
+--IMPORT binary.sql
+
+--SET spark.sql.binaryOutputStyle=HEX
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_hex_discrete.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_hex_discrete.sql
new file mode 100644
index 000000000000..282a7634cbc5
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/binary_hex_discrete.sql
@@ -0,0 +1,3 @@
+--IMPORT binary.sql
+
+--SET spark.sql.binaryOutputStyle=HEX_DISCRETE
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/bitwise.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/bitwise.sql
new file mode 100644
index 000000000000..e080fdd32a4a
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/bitwise.sql
@@ -0,0 +1,93 @@
+-- test cases for bitwise functions
+
+-- null
+select bit_count(null);
+
+-- boolean
+select bit_count(true);
+select bit_count(false);
+
+-- byte/tinyint
+select bit_count(cast(1 as tinyint));
+select bit_count(cast(2 as tinyint));
+select bit_count(cast(3 as tinyint));
+
+-- short/smallint
+select bit_count(1S);
+select bit_count(2S);
+select bit_count(3S);
+
+-- int
+select bit_count(1);
+select bit_count(2);
+select bit_count(3);
+
+-- long/bigint
+select bit_count(1L);
+select bit_count(2L);
+select bit_count(3L);
+
+-- negative num
+select bit_count(-1L);
+
+-- edge value
+select bit_count(9223372036854775807L);
+select bit_count(-9223372036854775808L);
+
+-- other illegal arguments
+select bit_count("bit count");
+select bit_count('a');
+
+-- test for bit_xor
+--
+CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
+ (1, 1, 1, 1L),
+ (2, 3, 4, null),
+ (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4);
+
+-- empty case
+SELECT BIT_XOR(b3) AS n1 FROM bitwise_test where 1 = 0;
+
+-- null case
+SELECT BIT_XOR(b4) AS n1 FROM bitwise_test where b4 is null;
+
+-- the suffix numbers show the expected answer
+SELECT
+ BIT_XOR(cast(b1 as tinyint)) AS a4,
+ BIT_XOR(cast(b2 as smallint)) AS b5,
+ BIT_XOR(b3) AS c2,
+ BIT_XOR(b4) AS d2,
+ BIT_XOR(distinct b4) AS e2
+FROM bitwise_test;
+
+-- group by
+SELECT bit_xor(b3) FROM bitwise_test GROUP BY b1 & 1;
+
+--having
+SELECT b1, bit_xor(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7;
+
+-- window
+SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test;
+
+-- getbit
+select getbit(11L, 3), getbit(11L, 2), getbit(11L, 1), getbit(11L, 0);
+select getbit(11L, 2 + 1), getbit(11L, 3 - 1), getbit(10L + 1, 1 * 1), getbit(cast(11L / 1 AS long), 1 - 1);
+select getbit(11L, 63);
+select getbit(11L, -1);
+select getbit(11L, 64);
+
+SELECT 20181117 >> 2;
+SELECT 20181117 << 2;
+SELECT 20181117 >>> 2;
+SELECT 20181117 > > 2;
+SELECT 20181117 < < 2;
+SELECT 20181117 > >> 2;
+SELECT 20181117 <<< 2;
+SELECT 20181117 >>>> 2;
+select cast(null as array>), 20181117 >> 2;
+select cast(null as array>), 20181117 >>> 2;
+select cast(null as map>), 20181117 >> 2;
+
+select 1 << 1 + 2 as plus_over_shift; -- if correct, the result is 8. otherwise, 4
+select 2 >> 1 << 1 as left_to_right; -- if correct, the result is 2. otherwise, 0
+select 1 & 2 >> 1 as shift_over_ampersand; -- if correct, the result is 1. otherwise, 0
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cast.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cast.sql
new file mode 100644
index 000000000000..2bf53f9730b3
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cast.sql
@@ -0,0 +1,179 @@
+-- cast string representing a valid fractional number to integral should truncate the number
+SELECT CAST('1.23' AS int);
+SELECT CAST('1.23' AS long);
+SELECT CAST('-4.56' AS int);
+SELECT CAST('-4.56' AS long);
+
+-- cast string which are not numbers to numeric types
+SELECT CAST('abc' AS int);
+SELECT CAST('abc' AS long);
+SELECT CAST('abc' AS float);
+SELECT CAST('abc' AS double);
+
+-- cast string representing a very large number to integral should return null
+SELECT CAST('1234567890123' AS int);
+SELECT CAST('12345678901234567890123' AS long);
+
+-- cast empty string to integral should return null
+SELECT CAST('' AS int);
+SELECT CAST('' AS long);
+SELECT CAST('' AS float);
+SELECT CAST('' AS double);
+
+-- cast null to integral should return null
+SELECT CAST(NULL AS int);
+SELECT CAST(NULL AS long);
+
+-- cast invalid decimal string to numeric types
+SELECT CAST('123.a' AS int);
+SELECT CAST('123.a' AS long);
+SELECT CAST('123.a' AS float);
+SELECT CAST('123.a' AS double);
+
+-- '-2147483648' is the smallest int value
+SELECT CAST('-2147483648' AS int);
+SELECT CAST('-2147483649' AS int);
+
+-- '2147483647' is the largest int value
+SELECT CAST('2147483647' AS int);
+SELECT CAST('2147483648' AS int);
+
+-- '-9223372036854775808' is the smallest long value
+SELECT CAST('-9223372036854775808' AS long);
+SELECT CAST('-9223372036854775809' AS long);
+
+-- '9223372036854775807' is the largest long value
+SELECT CAST('9223372036854775807' AS long);
+SELECT CAST('9223372036854775808' AS long);
+
+-- cast string to its binary representation
+SELECT HEX(CAST('abc' AS binary));
+
+-- cast integral values to their corresponding binary representation
+SELECT HEX(CAST(CAST(123 AS byte) AS binary));
+SELECT HEX(CAST(CAST(-123 AS byte) AS binary));
+SELECT HEX(CAST(123S AS binary));
+SELECT HEX(CAST(-123S AS binary));
+SELECT HEX(CAST(123 AS binary));
+SELECT HEX(CAST(-123 AS binary));
+SELECT HEX(CAST(123L AS binary));
+SELECT HEX(CAST(-123L AS binary));
+
+DESC FUNCTION boolean;
+DESC FUNCTION EXTENDED boolean;
+-- TODO: migrate all cast tests here.
+
+-- cast string to interval and interval to string
+SELECT CAST('interval 3 month 1 hour' AS interval);
+SELECT CAST("interval '3-1' year to month" AS interval year to month);
+SELECT CAST("interval '3 00:00:01' day to second" AS interval day to second);
+SELECT CAST(interval 3 month 1 hour AS string);
+SELECT CAST(interval 3 year 1 month AS string);
+SELECT CAST(interval 3 day 1 second AS string);
+
+-- trim string before cast to numeric
+select cast(' 1' as tinyint);
+select cast(' 1\t' as tinyint);
+select cast(' 1' as smallint);
+select cast(' 1' as INT);
+select cast(' 1' as bigint);
+select cast(' 1' as float);
+select cast(' 1 ' as DOUBLE);
+select cast('1.0 ' as DEC);
+select cast('1中文' as tinyint);
+select cast('1中文' as smallint);
+select cast('1中文' as INT);
+select cast('中文1' as bigint);
+select cast('1中文' as bigint);
+
+-- trim string before cast to boolean
+select cast('\t\t true \n\r ' as boolean);
+select cast('\t\n false \t\r' as boolean);
+select cast('\t\n xyz \t\r' as boolean);
+
+select cast('23.45' as decimal(4, 2));
+select cast('123.45' as decimal(4, 2));
+select cast('xyz' as decimal(4, 2));
+
+select cast('2022-01-01' as date);
+select cast('a' as date);
+select cast('2022-01-01 00:00:00' as timestamp);
+select cast('a' as timestamp);
+select cast('2022-01-01 00:00:00' as timestamp_ntz);
+select cast('a' as timestamp_ntz);
+
+select cast(cast('inf' as double) as timestamp);
+select cast(cast('inf' as float) as timestamp);
+
+-- cast ANSI intervals to integrals
+select cast(interval '1' year as tinyint);
+select cast(interval '-10-2' year to month as smallint);
+select cast(interval '1000' month as int);
+select cast(interval -'10.123456' second as tinyint);
+select cast(interval '23:59:59' hour to second as smallint);
+select cast(interval -'1 02:03:04.123' day to second as int);
+select cast(interval '10' day as bigint);
+
+select cast(interval '-1000' month as tinyint);
+select cast(interval '1000000' second as smallint);
+
+-- cast integrals to ANSI intervals
+select cast(1Y as interval year);
+select cast(-122S as interval year to month);
+select cast(ym as interval year to month) from values(-122S) as t(ym);
+select cast(1000 as interval month);
+select cast(-10L as interval second);
+select cast(100Y as interval hour to second);
+select cast(dt as interval hour to second) from values(100Y) as t(dt);
+select cast(-1000S as interval day to second);
+select cast(10 as interval day);
+
+select cast(2147483647 as interval year);
+select cast(-9223372036854775808L as interval day);
+
+-- cast ANSI intervals to decimals
+select cast(interval '-1' year as decimal(10, 0));
+select cast(interval '1.000001' second as decimal(10, 6));
+select cast(interval '08:11:10.001' hour to second as decimal(10, 4));
+select cast(interval '1 01:02:03.1' day to second as decimal(8, 1));
+select cast(interval '10.123' second as decimal(4, 2));
+select cast(interval '10.005' second as decimal(4, 2));
+select cast(interval '10.123' second as decimal(5, 2));
+select cast(interval '10.123' second as decimal(1, 0));
+
+-- cast decimals to ANSI intervals
+select cast(10.123456BD as interval day to second);
+select cast(80.654321BD as interval hour to minute);
+select cast(-10.123456BD as interval year to month);
+select cast(10.654321BD as interval month);
+
+-- cast double colon syntax tests
+SELECT '1.23' :: int;
+SELECT 'abc' :: int;
+SELECT '12345678901234567890123' :: long;
+SELECT '' :: int;
+SELECT NULL :: int;
+SELECT '123.a' :: int;
+SELECT '-2147483648' :: int;
+SELECT HEX('abc' :: binary);
+SELECT HEX((123 :: byte) :: binary);
+SELECT 'interval 3 month 1 hour' :: interval;
+SELECT interval 3 day 1 second :: string;
+select ' 1 ' :: DOUBLE;
+select '1.0 ' :: DEC;
+select '\t\t true \n\r ' :: boolean;
+select '2022-01-01 00:00:00' :: timestamp;
+select interval '-10-2' year to month :: smallint;
+select -10L :: interval second;
+select interval '08:11:10.001' hour to second :: decimal(10, 4);
+select 10.123456BD :: interval day to second;
+
+SELECT '1.23' :: int :: long;
+SELECT '2147483648' :: long :: int;
+SELECT CAST('2147483648' :: long AS int);
+SELECT map(1, '123', 2, '456')[1] :: int;
+
+-- cast double colon syntax negative tests
+SELECT '2147483648' :: BINT;
+SELECT '2147483648' :: SELECT;
+SELECT FALSE IS NOT NULL :: string;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/ceil-floor-with-scale-param.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/ceil-floor-with-scale-param.sql
new file mode 100644
index 000000000000..c05429b3ef77
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/ceil-floor-with-scale-param.sql
@@ -0,0 +1,31 @@
+-- Tests different scenarios of ceil and floor functions with scale parameters
+SELECT CEIL(2.5, 0);
+SELECT CEIL(3.5, 0);
+SELECT CEIL(-2.5, 0);
+SELECT CEIL(-3.5, 0);
+SELECT CEIL(-0.35, 1);
+SELECT CEIL(-35, -1);
+SELECT CEIL(-0.1, 0);
+SELECT CEIL(5, 0);
+SELECT CEIL(3.14115, -3);
+SELECT CEIL(9.9, 0);
+SELECT CEIL(CAST(99 AS DECIMAL(2, 0)), -1);
+SELECT CEIL(2.5, null);
+SELECT CEIL(2.5, 'a');
+SELECT CEIL(2.5, 0, 0);
+
+-- Same inputs with floor function
+SELECT FLOOR(2.5, 0);
+SELECT FLOOR(3.5, 0);
+SELECT FLOOR(-2.5, 0);
+SELECT FLOOR(-3.5, 0);
+SELECT FLOOR(-0.35, 1);
+SELECT FLOOR(-35, -1);
+SELECT FLOOR(-0.1, 0);
+SELECT FLOOR(5, 0);
+SELECT FLOOR(3.14115, -3);
+SELECT FLOOR(-9.9, 0);
+SELECT FLOOR(CAST(-99 AS DECIMAL(2, 0)), -1);
+SELECT FLOOR(2.5, null);
+SELECT FLOOR(2.5, 'a');
+SELECT FLOOR(2.5, 0, 0);
\ No newline at end of file
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/change-column.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/change-column.sql
new file mode 100644
index 000000000000..2b57891cfcbc
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/change-column.sql
@@ -0,0 +1,49 @@
+-- Create the origin table
+CREATE TABLE test_change(a INT, b STRING, c INT) using parquet;
+DESC test_change;
+
+-- ALTER TABLE CHANGE COLUMN must change either type or comment
+ALTER TABLE test_change CHANGE a;
+DESC test_change;
+
+-- Change column name (not supported on v1 table)
+ALTER TABLE test_change RENAME COLUMN a TO a1;
+DESC test_change;
+
+-- Change column dataType (not supported yet)
+ALTER TABLE test_change CHANGE a TYPE STRING;
+DESC test_change;
+
+-- Change column position (not supported yet)
+ALTER TABLE test_change CHANGE a AFTER b;
+ALTER TABLE test_change CHANGE b FIRST;
+DESC test_change;
+
+-- Change column comment
+ALTER TABLE test_change CHANGE a COMMENT 'this is column a';
+ALTER TABLE test_change CHANGE b COMMENT '#*02?`';
+ALTER TABLE test_change CHANGE c COMMENT '';
+DESC test_change;
+
+-- Don't change anything.
+ALTER TABLE test_change CHANGE a TYPE INT;
+ALTER TABLE test_change CHANGE a COMMENT 'this is column a';
+DESC test_change;
+
+-- Change a invalid column
+ALTER TABLE test_change CHANGE invalid_col TYPE INT;
+DESC test_change;
+
+-- Check case insensitivity.
+ALTER TABLE test_change CHANGE A COMMENT 'case insensitivity';
+DESC test_change;
+
+-- Change column can't apply to a temporary/global_temporary view
+CREATE TEMPORARY VIEW temp_view(a, b) AS SELECT 1, "one";
+ALTER TABLE temp_view CHANGE a TYPE INT;
+CREATE GLOBAL TEMPORARY VIEW global_temp_view(a, b) AS SELECT 1, "one";
+ALTER TABLE global_temp.global_temp_view CHANGE a TYPE INT;
+
+-- DROP TEST TABLE
+DROP TABLE test_change;
+DROP VIEW global_temp.global_temp_view;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/charvarchar.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/charvarchar.sql
new file mode 100644
index 000000000000..f5b20f99c8a0
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/charvarchar.sql
@@ -0,0 +1,128 @@
+create table char_tbl(c char(5), v varchar(6)) using parquet;
+desc formatted char_tbl;
+desc formatted char_tbl c;
+show create table char_tbl;
+
+create table char_tbl2 using parquet as select * from char_tbl;
+show create table char_tbl2;
+desc formatted char_tbl2;
+desc formatted char_tbl2 c;
+
+create table char_tbl3 like char_tbl;
+desc formatted char_tbl3;
+desc formatted char_tbl3 c;
+show create table char_tbl3;
+
+create view char_view as select * from char_tbl;
+desc formatted char_view;
+desc formatted char_view c;
+show create table char_view;
+
+alter table char_tbl rename to char_tbl1;
+desc formatted char_tbl1;
+
+alter table char_tbl1 change column c type char(6);
+alter table char_tbl1 change column c type char(5);
+desc formatted char_tbl1;
+
+alter table char_tbl1 add columns (d char(5));
+desc formatted char_tbl1;
+
+alter view char_view as select * from char_tbl2;
+desc formatted char_view;
+
+alter table char_tbl1 SET TBLPROPERTIES('yes'='no');
+desc formatted char_tbl1;
+
+alter view char_view SET TBLPROPERTIES('yes'='no');
+desc formatted char_view;
+
+alter table char_tbl1 UNSET TBLPROPERTIES('yes');
+desc formatted char_tbl1;
+
+alter view char_view UNSET TBLPROPERTIES('yes');
+desc formatted char_view;
+
+alter table char_tbl1 SET SERDEPROPERTIES('yes'='no');
+desc formatted char_tbl1;
+
+create table char_part(c1 char(5), c2 char(2), v1 varchar(6), v2 varchar(2)) using parquet partitioned by (v2, c2);
+desc formatted char_part;
+
+alter table char_part change column c1 comment 'char comment';
+alter table char_part change column v1 comment 'varchar comment';
+alter table char_part add partition (v2='ke', c2='nt') location 'loc1';
+desc formatted char_part;
+
+alter table char_part partition (v2='ke') rename to partition (v2='nt');
+desc formatted char_part;
+
+alter table char_part partition (v2='ke', c2='nt') set location 'loc2';
+desc formatted char_part;
+
+MSCK REPAIR TABLE char_part;
+desc formatted char_part;
+
+create temporary view str_view as select c, v from values
+ (null, null),
+ (null, 'S'),
+ ('N', 'N '),
+ ('Ne', 'Sp'),
+ ('Net ', 'Spa '),
+ ('NetE', 'Spar'),
+ ('NetEa ', 'Spark '),
+ ('NetEas ', 'Spark'),
+ ('NetEase', 'Spark-') t(c, v);
+
+create table char_tbl4(c7 char(7), c8 char(8), v varchar(6), s string) using parquet;
+insert into char_tbl4 select c, c, v, c from str_view;
+
+select c7, c8, v, s from char_tbl4;
+select c7, c8, v, s from char_tbl4 where c7 = c8;
+select c7, c8, v, s from char_tbl4 where c7 = v;
+select c7, c8, v, s from char_tbl4 where c7 = s;
+select c7, c8, v, s from char_tbl4 where c7 = 'NetEase ';
+select c7, c8, v, s from char_tbl4 where v = 'Spark ';
+select c7, c8, v, s from char_tbl4 order by c7;
+select c7, c8, v, s from char_tbl4 order by v;
+
+select ascii(c7), ascii(c8), ascii(v), ascii(s) from char_tbl4;
+select base64(c7), base64(c8), base64(v), ascii(s) from char_tbl4;
+select bit_length(c7), bit_length(c8), bit_length(v), bit_length(s) from char_tbl4;
+select char_length(c7), char_length(c8), char_length(v), char_length(s) from char_tbl4;
+select octet_length(c7), octet_length(c8), octet_length(v), octet_length(s) from char_tbl4;
+select concat_ws('|', c7, c8), concat_ws('|', c7, v), concat_ws('|', c7, s), concat_ws('|', v, s) from char_tbl4;
+select concat(c7, c8), concat(c7, v), concat(c7, s), concat(v, s) from char_tbl4;
+select like(c7, 'Ne _'), like(c8, 'Ne _') from char_tbl4;
+select like(v, 'Spark_') from char_tbl4;
+select c7 = c8, upper(c7) = upper(c8), lower(c7) = lower(c8) from char_tbl4 where s = 'NetEase';
+select c7 = s, upper(c7) = upper(s), lower(c7) = lower(s) from char_tbl4 where s = 'NetEase';
+select c7 = 'NetEase', upper(c7) = upper('NetEase'), lower(c7) = lower('NetEase') from char_tbl4 where s = 'NetEase';
+select printf('Hey, %s%s%s%s', c7, c8, v, s) from char_tbl4;
+select repeat(c7, 2), repeat(c8, 2), repeat(v, 2), repeat(s, 2) from char_tbl4;
+select replace(c7, 'Net', 'Apache'), replace(c8, 'Net', 'Apache'), replace(v, 'Spark', 'Kyuubi'), replace(s, 'Net', 'Apache') from char_tbl4;
+select rpad(c7, 10), rpad(c8, 5), rpad(v, 5), rpad(s, 5) from char_tbl4;
+select rtrim(c7), rtrim(c8), rtrim(v), rtrim(s) from char_tbl4;
+select split(c7, 'e'), split(c8, 'e'), split(v, 'a'), split(s, 'e') from char_tbl4;
+select substring(c7, 2), substring(c8, 2), substring(v, 3), substring(s, 2) from char_tbl4;
+select left(c7, 2), left(c8, 2), left(v, 3), left(s, 2) from char_tbl4;
+select right(c7, 2), right(c8, 2), right(v, 3), right(s, 2) from char_tbl4;
+set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation,org.apache.spark.sql.catalyst.optimizer.NullPropagation;
+select typeof(c7), typeof(c8), typeof(v), typeof(s) from char_tbl4 limit 1;
+set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation,org.apache.spark.sql.catalyst.optimizer.ConstantFolding,org.apache.spark.sql.catalyst.optimizer.NullPropagation;
+select cast(c7 as char(1)), cast(c8 as char(10)), cast(v as char(1)), cast(v as varchar(1)), cast(s as char(5)) from char_tbl4;
+
+-- char_tbl has renamed to char_tbl1
+drop table char_tbl1;
+drop table char_tbl2;
+drop table char_tbl3;
+drop table char_tbl4;
+
+-- ascii value for Latin-1 Supplement characters
+select ascii('§'), ascii('÷'), ascii('×10');
+select chr(167), chr(247), chr(215);
+
+-- to_varchar is an alias for to_char
+SELECT to_varchar(78.12, '$99.99');
+SELECT to_varchar(111.11, '99.9');
+SELECT to_varchar(12454.8, '99,999.9S');
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/collations.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/collations.sql
new file mode 100644
index 000000000000..17815ed5dde6
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/collations.sql
@@ -0,0 +1,527 @@
+-- test cases for collation support
+
+-- Create a test table with data
+create table t1(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t1 values('aaa', 'aaa');
+insert into t1 values('AAA', 'AAA');
+insert into t1 values('bbb', 'bbb');
+insert into t1 values('BBB', 'BBB');
+
+-- describe
+describe table t1;
+
+-- group by and count utf8_binary
+select count(*) from t1 group by utf8_binary;
+
+-- group by and count utf8_lcase
+select count(*) from t1 group by utf8_lcase;
+
+-- filter equal utf8_binary
+select * from t1 where utf8_binary = 'aaa';
+
+-- filter equal utf8_lcase
+select * from t1 where utf8_lcase = 'aaa' collate utf8_lcase;
+
+-- filter less then utf8_binary
+select * from t1 where utf8_binary < 'bbb';
+
+-- filter less then utf8_lcase
+select * from t1 where utf8_lcase < 'bbb' collate utf8_lcase;
+
+-- inner join
+select l.utf8_binary, r.utf8_lcase from t1 l join t1 r on l.utf8_lcase = r.utf8_lcase;
+
+-- create second table for anti-join
+create table t2(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t2 values('aaa', 'aaa');
+insert into t2 values('bbb', 'bbb');
+
+-- anti-join on lcase
+select * from t1 anti join t2 on t1.utf8_lcase = t2.utf8_lcase;
+
+drop table t2;
+drop table t1;
+
+-- set operations
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+
+-- set operations with conflicting collations
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate unicode_ci from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate unicode_ci from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate unicode_ci from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate unicode_ci from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate unicode_ci from values ('aaa'), ('bbb');
+
+-- create table with struct field
+create table t1 (c1 struct) USING PARQUET;
+
+insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_lcase', 'aaa'));
+insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_lcase', 'AAA'));
+
+-- aggregate against nested field utf8_binary
+select count(*) from t1 group by c1.utf8_binary;
+
+-- aggregate against nested field utf8_lcase
+select count(*) from t1 group by c1.utf8_lcase;
+
+drop table t1;
+
+-- array function tests
+select array_contains(ARRAY('aaa' collate utf8_lcase),'AAA' collate utf8_lcase);
+select array_position(ARRAY('aaa' collate utf8_lcase, 'bbb' collate utf8_lcase),'BBB' collate utf8_lcase);
+
+-- utility
+select nullif('aaa' COLLATE utf8_lcase, 'AAA' COLLATE utf8_lcase);
+select least('aaa' COLLATE utf8_lcase, 'AAA' collate utf8_lcase, 'a' collate utf8_lcase);
+
+-- array operations
+select arrays_overlap(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase));
+select array_distinct(array('aaa' collate utf8_lcase, 'AAA' collate utf8_lcase));
+select array_union(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase));
+select array_intersect(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase));
+select array_except(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase));
+
+-- ICU collations (all statements return true)
+select 'a' collate unicode < 'A';
+select 'a' collate unicode_ci = 'A';
+select 'a' collate unicode_ai = 'å';
+select 'a' collate unicode_ci_ai = 'Å';
+select 'a' collate en < 'A';
+select 'a' collate en_ci = 'A';
+select 'a' collate en_ai = 'å';
+select 'a' collate en_ci_ai = 'Å';
+select 'Kypper' collate sv < 'Köpfe';
+select 'Kypper' collate de > 'Köpfe';
+select 'I' collate tr_ci = 'ı';
+
+-- create table for str_to_map
+create table t4 (text string collate utf8_binary, pairDelim string collate utf8_lcase, keyValueDelim string collate utf8_binary) using parquet;
+
+insert into t4 values('a:1,b:2,c:3', ',', ':');
+
+select str_to_map(text, pairDelim, keyValueDelim) from t4;
+select str_to_map(text collate utf8_binary, pairDelim collate utf8_lcase, keyValueDelim collate utf8_binary) from t4;
+select str_to_map(text collate utf8_binary, pairDelim collate utf8_binary, keyValueDelim collate utf8_binary) from t4;
+select str_to_map(text collate unicode_ai, pairDelim collate unicode_ai, keyValueDelim collate unicode_ai) from t4;
+
+drop table t4;
+
+create table t5(s string, utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t5 values ('Spark', 'Spark', 'SQL');
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaAAaA');
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaaAaA');
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaaAaAaaAaaAaAaaAaaAaA');
+insert into t5 values ('bbAbaAbA', 'bbAbAAbA', 'a');
+insert into t5 values ('İo', 'İo', 'İo');
+insert into t5 values ('İo', 'İo', 'İo ');
+insert into t5 values ('İo', 'İo ', 'İo');
+insert into t5 values ('İo', 'İo', 'i̇o');
+insert into t5 values ('efd2', 'efd2', 'efd2');
+insert into t5 values ('Hello, world! Nice day.', 'Hello, world! Nice day.', 'Hello, world! Nice day.');
+insert into t5 values ('Something else. Nothing here.', 'Something else. Nothing here.', 'Something else. Nothing here.');
+insert into t5 values ('kitten', 'kitten', 'sitTing');
+insert into t5 values ('abc', 'abc', 'abc');
+insert into t5 values ('abcdcba', 'abcdcba', 'aBcDCbA');
+
+create table t6(ascii long) using parquet;
+insert into t6 values (97);
+insert into t6 values (66);
+
+create table t7(ascii double) using parquet;
+insert into t7 values (97.52143);
+insert into t7 values (66.421);
+
+create table t8(format string collate utf8_binary, utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t8 values ('%s%s', 'abCdE', 'abCdE');
+
+create table t9(num long) using parquet;
+insert into t9 values (97);
+insert into t9 values (66);
+
+create table t10(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t10 values ('aaAaAAaA', 'aaAaaAaA');
+insert into t10 values ('efd2', 'efd2');
+
+-- ConcatWs
+select concat_ws(' ', utf8_lcase, utf8_lcase) from t5;
+select concat_ws(' ', utf8_binary, utf8_lcase) from t5;
+select concat_ws(' ' collate utf8_binary, utf8_binary, 'SQL' collate utf8_lcase) from t5;
+select concat_ws(' ' collate utf8_lcase, utf8_binary, 'SQL' collate utf8_lcase) from t5;
+select concat_ws(',', utf8_lcase, 'word'), concat_ws(',', utf8_binary, 'word') from t5;
+select concat_ws(',', utf8_lcase, 'word' collate utf8_binary), concat_ws(',', utf8_binary, 'word' collate utf8_lcase) from t5;
+
+-- Elt
+select elt(2, s, utf8_binary) from t5;
+select elt(2, utf8_binary, utf8_lcase, s) from t5;
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_lcase) from t5;
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase) from t5;
+select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5;
+select elt(1, utf8_binary, 'word' collate utf8_lcase), elt(1, utf8_lcase, 'word' collate utf8_binary) from t5;
+
+-- SplitPart
+select split_part(utf8_binary, utf8_lcase, 3) from t5;
+select split_part(s, utf8_binary, 1) from t5;
+select split_part(utf8_binary collate utf8_binary, s collate utf8_lcase, 1) from t5;
+select split_part(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5;
+select split_part(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5;
+select split_part(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2) from t5;
+select split_part(utf8_binary, 'a', 3), split_part(utf8_lcase, 'a', 3) from t5;
+select split_part(utf8_binary, 'a' collate utf8_lcase, 3), split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t5;
+select split_part(utf8_binary, 'a ' collate utf8_lcase_rtrim, 3), split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t5;
+
+-- Contains
+select contains(utf8_binary, utf8_lcase) from t5;
+select contains(s, utf8_binary) from t5;
+select contains(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select contains(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select contains(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select contains(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select contains(utf8_binary, 'a'), contains(utf8_lcase, 'a') from t5;
+select contains(utf8_binary, 'AaAA' collate utf8_lcase), contains(utf8_lcase, 'AAa' collate utf8_binary) from t5;
+select contains(utf8_binary, 'AaAA ' collate utf8_lcase_rtrim), contains(utf8_lcase, 'AAa ' collate utf8_binary_rtrim) from t5;
+
+-- SubstringIndex
+select substring_index(utf8_binary, utf8_lcase, 2) from t5;
+select substring_index(s, utf8_binary,1) from t5;
+select substring_index(utf8_binary collate utf8_binary, s collate utf8_lcase, 3) from t5;
+select substring_index(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5;
+select substring_index(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5;
+select substring_index(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2) from t5;
+select substring_index(utf8_binary, 'a', 2), substring_index(utf8_lcase, 'a', 2) from t5;
+select substring_index(utf8_binary, 'AaAA' collate utf8_lcase, 2), substring_index(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5;
+select substring_index(utf8_binary, 'AaAA ' collate utf8_lcase_rtrim, 2), substring_index(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5;
+
+-- StringInStr
+select instr(utf8_binary, utf8_lcase) from t5;
+select instr(s, utf8_binary) from t5;
+select instr(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select instr(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select instr(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select instr(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select instr(utf8_binary, 'a'), instr(utf8_lcase, 'a') from t5;
+select instr(utf8_binary, 'AaAA' collate utf8_lcase), instr(utf8_lcase, 'AAa' collate utf8_binary) from t5;
+
+-- FindInSet
+select find_in_set(utf8_binary, utf8_lcase) from t5;
+select find_in_set(s, utf8_binary) from t5;
+select find_in_set(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select find_in_set(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select find_in_set(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o'), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o') from t5;
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o' collate utf8_lcase), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o' collate utf8_binary) from t5;
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o ' collate utf8_lcase_rtrim), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o' collate utf8_binary) from t5;
+-- StartsWith
+select startswith(utf8_binary, utf8_lcase) from t5;
+select startswith(s, utf8_binary) from t5;
+select startswith(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select startswith(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select startswith(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select startswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select startswith(utf8_binary, 'aaAaaAaA'), startswith(utf8_lcase, 'aaAaaAaA') from t5;
+select startswith(utf8_binary, 'aaAaaAaA' collate utf8_lcase), startswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5;
+select startswith(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim), startswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5;
+
+-- StringTranslate
+select translate(utf8_lcase, utf8_lcase, '12345') from t5;
+select translate(utf8_binary, utf8_lcase, '12345') from t5;
+select translate(utf8_binary, 'aBc' collate utf8_lcase, '12345' collate utf8_binary) from t5;
+select translate(utf8_binary, 'SQL' collate utf8_lcase, '12345' collate utf8_lcase) from t5;
+select translate(utf8_binary, 'SQL' collate unicode_ai, '12345' collate unicode_ai) from t5;
+select translate(utf8_lcase, 'aaAaaAaA', '12345'), translate(utf8_binary, 'aaAaaAaA', '12345') from t5;
+select translate(utf8_lcase, 'aBc' collate utf8_binary, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5;
+select translate(utf8_lcase, 'aBc ' collate utf8_binary_rtrim, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5;
+
+-- Replace
+select replace(utf8_binary, utf8_lcase, 'abc') from t5;
+select replace(s, utf8_binary, 'abc') from t5;
+select replace(utf8_binary collate utf8_binary, s collate utf8_lcase, 'abc') from t5;
+select replace(utf8_binary, utf8_lcase collate utf8_binary, 'abc') from t5;
+select replace(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 'abc') from t5;
+select replace(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 'abc') from t5;
+select replace(utf8_binary, 'aaAaaAaA', 'abc'), replace(utf8_lcase, 'aaAaaAaA', 'abc') from t5;
+select replace(utf8_binary, 'aaAaaAaA' collate utf8_lcase, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5;
+select replace(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5;
+
+-- EndsWith
+select endswith(utf8_binary, utf8_lcase) from t5;
+select endswith(s, utf8_binary) from t5;
+select endswith(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select endswith(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select endswith(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select endswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select endswith(utf8_binary, 'aaAaaAaA'), endswith(utf8_lcase, 'aaAaaAaA') from t5;
+select endswith(utf8_binary, 'aaAaaAaA' collate utf8_lcase), endswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5;
+select endswith(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim), endswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5;
+
+-- StringRepeat
+select repeat(utf8_binary, 3), repeat(utf8_lcase, 2) from t5;
+select repeat(utf8_binary collate utf8_lcase, 3), repeat(utf8_lcase collate utf8_binary, 2) from t5;
+
+-- Ascii & UnBase64 string expressions
+select ascii(utf8_binary), ascii(utf8_lcase) from t5;
+select ascii(utf8_binary collate utf8_lcase), ascii(utf8_lcase collate utf8_binary) from t5;
+select unbase64(utf8_binary), unbase64(utf8_lcase) from t10;
+select unbase64(utf8_binary collate utf8_lcase), unbase64(utf8_lcase collate utf8_binary) from t10;
+
+-- Chr
+select chr(ascii) from t6;
+
+-- Base64, Decode
+select base64(utf8_binary), base64(utf8_lcase) from t5;
+select base64(utf8_binary collate utf8_lcase), base64(utf8_lcase collate utf8_binary) from t5;
+select decode(encode(utf8_binary, 'utf-8'), 'utf-8'), decode(encode(utf8_lcase, 'utf-8'), 'utf-8') from t5;
+select decode(encode(utf8_binary collate utf8_lcase, 'utf-8'), 'utf-8'), decode(encode(utf8_lcase collate utf8_binary, 'utf-8'), 'utf-8') from t5;
+
+-- FormatNumber
+select format_number(ascii, '###.###') from t7;
+select format_number(ascii, '###.###' collate utf8_lcase) from t7;
+
+-- Encode, ToBinary
+select encode(utf8_binary, 'utf-8'), encode(utf8_lcase, 'utf-8') from t5;
+select encode(utf8_binary collate utf8_lcase, 'utf-8'), encode(utf8_lcase collate utf8_binary, 'utf-8') from t5;
+select to_binary(utf8_binary, 'utf-8'), to_binary(utf8_lcase, 'utf-8') from t5;
+select to_binary(utf8_binary collate utf8_lcase, 'utf-8'), to_binary(utf8_lcase collate utf8_binary, 'utf-8') from t5;
+
+-- Sentences
+select sentences(utf8_binary), sentences(utf8_lcase) from t5;
+select sentences(utf8_binary collate utf8_lcase), sentences(utf8_lcase collate utf8_binary) from t5;
+
+-- Upper
+select upper(utf8_binary), upper(utf8_lcase) from t5;
+select upper(utf8_binary collate utf8_lcase), upper(utf8_lcase collate utf8_binary) from t5;
+
+-- Lower
+select lower(utf8_binary), lower(utf8_lcase) from t5;
+select lower(utf8_binary collate utf8_lcase), lower(utf8_lcase collate utf8_binary) from t5;
+
+-- InitCap
+select initcap(utf8_binary), initcap(utf8_lcase) from t5;
+select initcap(utf8_binary collate utf8_lcase), initcap(utf8_lcase collate utf8_binary) from t5;
+
+-- Overlay
+select overlay(utf8_binary, utf8_lcase, 2) from t5;
+select overlay(s, utf8_binary,1) from t5;
+select overlay(utf8_binary collate utf8_binary, s collate utf8_lcase, 3) from t5;
+select overlay(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5;
+select overlay(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5;
+select overlay(utf8_binary, 'a', 2), overlay(utf8_lcase, 'a', 2) from t5;
+select overlay(utf8_binary, 'AaAA' collate utf8_lcase, 2), overlay(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5;
+
+-- FormatString
+select format_string(format, utf8_binary, utf8_lcase) from t8;
+select format_string(format collate utf8_lcase, utf8_lcase, utf8_binary collate utf8_lcase, 3), format_string(format, utf8_lcase collate utf8_binary, utf8_binary) from t8;
+select format_string(format, utf8_binary, utf8_lcase) from t8;
+
+-- SoundEx
+select soundex(utf8_binary), soundex(utf8_lcase) from t5;
+select soundex(utf8_binary collate utf8_lcase), soundex(utf8_lcase collate utf8_binary) from t5;
+
+-- Length, BitLength & OctetLength
+select length(utf8_binary), length(utf8_lcase) from t5;
+select length(utf8_binary collate utf8_lcase), length(utf8_lcase collate utf8_binary) from t5;
+select bit_length(utf8_binary), bit_length(utf8_lcase) from t5;
+select bit_length(utf8_binary collate utf8_lcase), bit_length(utf8_lcase collate utf8_binary) from t5;
+select octet_length(utf8_binary), octet_length(utf8_lcase) from t5;
+select octet_length(utf8_binary collate utf8_lcase), octet_length(utf8_lcase collate utf8_binary) from t5;
+select octet_length(utf8_binary collate utf8_lcase_rtrim), octet_length(utf8_lcase collate utf8_binary_rtrim) from t5;
+
+-- Luhncheck
+select luhn_check(num) from t9;
+
+-- Levenshtein
+select levenshtein(utf8_binary, utf8_lcase) from t5;
+select levenshtein(s, utf8_binary) from t5;
+select levenshtein(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select levenshtein(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select levenshtein(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select levenshtein(utf8_binary, 'a'), levenshtein(utf8_lcase, 'a') from t5;
+select levenshtein(utf8_binary, 'AaAA' collate utf8_lcase, 3), levenshtein(utf8_lcase, 'AAa' collate utf8_binary, 4) from t5;
+
+-- IsValidUTF8
+select is_valid_utf8(utf8_binary), is_valid_utf8(utf8_lcase) from t5;
+select is_valid_utf8(utf8_binary collate utf8_lcase), is_valid_utf8(utf8_lcase collate utf8_binary) from t5;
+select is_valid_utf8(utf8_binary collate utf8_lcase_rtrim), is_valid_utf8(utf8_lcase collate utf8_binary_rtrim) from t5;
+
+-- MakeValidUTF8
+select make_valid_utf8(utf8_binary), make_valid_utf8(utf8_lcase) from t5;
+select make_valid_utf8(utf8_binary collate utf8_lcase), make_valid_utf8(utf8_lcase collate utf8_binary) from t5;
+select make_valid_utf8(utf8_binary collate utf8_lcase_rtrim), make_valid_utf8(utf8_lcase collate utf8_binary_rtrim) from t5;
+
+-- ValidateUTF8
+select validate_utf8(utf8_binary), validate_utf8(utf8_lcase) from t5;
+select validate_utf8(utf8_binary collate utf8_lcase), validate_utf8(utf8_lcase collate utf8_binary) from t5;
+select validate_utf8(utf8_binary collate utf8_lcase_rtrim), validate_utf8(utf8_lcase collate utf8_binary_rtrim) from t5;
+
+-- TryValidateUTF8
+select try_validate_utf8(utf8_binary), try_validate_utf8(utf8_lcase) from t5;
+select try_validate_utf8(utf8_binary collate utf8_lcase), try_validate_utf8(utf8_lcase collate utf8_binary) from t5;
+select try_validate_utf8(utf8_binary collate utf8_lcase_rtrim), try_validate_utf8(utf8_lcase collate utf8_binary_rtrim) from t5;
+
+-- Left/Right/Substr
+select substr(utf8_binary, 2, 2), substr(utf8_lcase, 2, 2) from t5;
+select substr(utf8_binary collate utf8_lcase, 2, 2), substr(utf8_lcase collate utf8_binary, 2, 2) from t5;
+select right(utf8_binary, 2), right(utf8_lcase, 2) from t5;
+select right(utf8_binary collate utf8_lcase, 2), right(utf8_lcase collate utf8_binary, 2) from t5;
+select left(utf8_binary, '2' collate utf8_lcase), left(utf8_lcase, 2) from t5;
+select left(utf8_binary collate utf8_lcase, 2), left(utf8_lcase collate utf8_binary, 2) from t5;
+
+-- StringRPad
+select rpad(utf8_binary, 8, utf8_lcase) from t5;
+select rpad(s, 8, utf8_binary) from t5;
+select rpad(utf8_binary collate utf8_binary, 8, s collate utf8_lcase) from t5;
+select rpad(utf8_binary, 8, utf8_lcase collate utf8_binary) from t5;
+select rpad(utf8_binary collate utf8_lcase, 8, utf8_lcase collate utf8_lcase) from t5;
+select lpad(utf8_binary collate utf8_binary_rtrim, 8, utf8_lcase collate utf8_binary_rtrim) from t5;
+select rpad(utf8_binary, 8, 'a'), rpad(utf8_lcase, 8, 'a') from t5;
+select rpad(utf8_binary, 8, 'AaAA' collate utf8_lcase), rpad(utf8_lcase, 8, 'AAa' collate utf8_binary) from t5;
+
+-- StringLPad
+select lpad(utf8_binary, 8, utf8_lcase) from t5;
+select lpad(s, 8, utf8_binary) from t5;
+select lpad(utf8_binary collate utf8_binary, 8, s collate utf8_lcase) from t5;
+select lpad(utf8_binary, 8, utf8_lcase collate utf8_binary) from t5;
+select lpad(utf8_binary collate utf8_lcase, 8, utf8_lcase collate utf8_lcase) from t5;
+select lpad(utf8_binary collate utf8_binary_rtrim, 8, utf8_lcase collate utf8_binary_rtrim) from t5;
+select lpad(utf8_binary, 8, 'a'), lpad(utf8_lcase, 8, 'a') from t5;
+select lpad(utf8_binary, 8, 'AaAA' collate utf8_lcase), lpad(utf8_lcase, 8, 'AAa' collate utf8_binary) from t5;
+
+-- Locate
+select locate(utf8_binary, utf8_lcase) from t5;
+select locate(s, utf8_binary) from t5;
+select locate(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select locate(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select locate(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 3) from t5;
+select locate(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 3) from t5;
+select locate(utf8_binary, 'a'), locate(utf8_lcase, 'a') from t5;
+select locate(utf8_binary, 'AaAA' collate utf8_lcase, 4), locate(utf8_lcase, 'AAa' collate utf8_binary, 4) from t5;
+select locate(utf8_binary, 'AaAA ' collate utf8_binary_rtrim, 4), locate(utf8_lcase, 'AAa ' collate utf8_binary, 4) from t5;
+
+-- StringTrim
+select TRIM(utf8_binary, utf8_lcase) from t5;
+select TRIM(s, utf8_binary) from t5;
+select TRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select TRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select TRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select TRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select TRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5;
+select TRIM('ABc', utf8_binary), TRIM('ABc', utf8_lcase) from t5;
+select TRIM('ABc' collate utf8_lcase, utf8_binary), TRIM('AAa' collate utf8_binary, utf8_lcase) from t5;
+-- StringTrimBoth
+select BTRIM(utf8_binary, utf8_lcase) from t5;
+select BTRIM(s, utf8_binary) from t5;
+select BTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select BTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select BTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select BTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select BTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5;
+select BTRIM('ABc', utf8_binary), BTRIM('ABc', utf8_lcase) from t5;
+select BTRIM('ABc' collate utf8_lcase, utf8_binary), BTRIM('AAa' collate utf8_binary, utf8_lcase) from t5;
+-- StringTrimLeft
+select LTRIM(utf8_binary, utf8_lcase) from t5;
+select LTRIM(s, utf8_binary) from t5;
+select LTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select LTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select LTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select LTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select LTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5;
+select LTRIM('ABc', utf8_binary), LTRIM('ABc', utf8_lcase) from t5;
+select LTRIM('ABc' collate utf8_lcase, utf8_binary), LTRIM('AAa' collate utf8_binary, utf8_lcase) from t5;
+-- StringTrimRight
+select RTRIM(utf8_binary, utf8_lcase) from t5;
+select RTRIM(s, utf8_binary) from t5;
+select RTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select RTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select RTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select RTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select RTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5;
+select RTRIM('ABc', utf8_binary), RTRIM('ABc', utf8_lcase) from t5;
+select RTRIM('ABc' collate utf8_lcase, utf8_binary), RTRIM('AAa' collate utf8_binary, utf8_lcase) from t5;
+
+-- Implicit aliases to collated expression trees are correctly generated
+
+-- Simple select
+select concat_ws(' ', utf8_lcase, utf8_lcase) from t5;
+
+-- Select by implicit alias
+select `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+);
+
+-- Select by star
+select * from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+);
+
+-- Select by qualified star
+select subq1.* from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+) AS subq1;
+
+-- Implicit alias in CTE output
+with cte as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+select * from cte;
+
+-- Implicit alias in EXISTS subquery output
+select * from values (1) where exists (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+);
+
+-- Implicit alias in scalar subquery output
+select (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5 limit 1
+);
+
+-- Scalar subquery with CTE with implicit alias
+select (
+ with cte as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ )
+ select * from cte limit 1
+);
+
+-- Outer reference to implicit alias
+select * from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5 limit 1
+)
+where (
+ `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` == 'aaa'
+);
+
+-- Implicit alias reference in Sort
+select lower(`concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)`) from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ group by 1
+ order by 1
+);
+
+-- Implciit alias from aggregate in Sort
+select lower(`concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)`) from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ group by 1
+ order by max(concat_ws(' ', utf8_lcase, utf8_lcase))
+);
+
+-- Implicit alias in view schema
+create temporary view v1 as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+);
+
+select * from v1;
+
+select `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` from v1;
+
+drop view v1;
+
+drop table t5;
+drop table t6;
+drop table t7;
+drop table t8;
+drop table t9;
+drop table t10;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/column-resolution-aggregate.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/column-resolution-aggregate.sql
new file mode 100644
index 000000000000..4f879fc809d9
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/column-resolution-aggregate.sql
@@ -0,0 +1,33 @@
+-- Tests covering column resolution priority in Aggregate.
+
+CREATE TEMPORARY VIEW v1 AS VALUES (1, 1, 1), (2, 2, 1) AS t(a, b, k);
+CREATE TEMPORARY VIEW v2 AS VALUES (1, 1, 1), (2, 2, 1) AS t(x, y, all);
+
+-- Relation output columns have higher priority than lateral column alias. This query
+-- should fail as `b` is not in GROUP BY.
+SELECT max(a) AS b, b FROM v1 GROUP BY k;
+
+-- Lateral column alias has higher priority than outer reference.
+SELECT a FROM v1 WHERE (12, 13) IN (SELECT max(x + 10) AS a, a + 1 FROM v2);
+
+-- Relation output columns have higher priority than GROUP BY alias. This query should
+-- fail as `a` is not in GROUP BY.
+SELECT a AS k FROM v1 GROUP BY k;
+
+-- Relation output columns have higher priority than GROUP BY ALL. This query should
+-- fail as `x` is not in GROUP BY.
+SELECT x FROM v2 GROUP BY all;
+
+-- GROUP BY alias has higher priority than GROUP BY ALL, this query fails as `b` is not in GROUP BY.
+SELECT a AS all, b FROM v1 GROUP BY all;
+
+-- GROUP BY alias/ALL does not support lateral column alias.
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col;
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all;
+
+-- GROUP BY alias still works if it does not directly reference lateral column alias.
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY lca;
+
+-- GROUP BY ALL has higher priority than outer reference. This query should run as `a` and `b` are
+-- in GROUP BY due to the GROUP BY ALL resolution.
+SELECT * FROM v2 WHERE EXISTS (SELECT a, b FROM v1 GROUP BY all);
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/column-resolution-sort.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/column-resolution-sort.sql
new file mode 100644
index 000000000000..2c5b9f9e9dfc
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/column-resolution-sort.sql
@@ -0,0 +1,20 @@
+--SET spark.sql.leafNodeDefaultParallelism=1
+-- Tests covering column resolution priority in Sort.
+
+CREATE TEMPORARY VIEW v1 AS VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, k);
+CREATE TEMPORARY VIEW v2 AS VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, all);
+
+-- Relation output columns have higher priority than missing reference.
+-- Query will fail if we order by the column `v1.b`, as it's not in GROUP BY.
+-- Actually results are [1, 2] as we order by `max(a) AS b`.
+SELECT max(a) AS b FROM v1 GROUP BY k ORDER BY b;
+
+-- Missing reference has higher priority than ORDER BY ALL.
+-- Results will be [1, 2] if we order by `max(a)`.
+-- Actually results are [2, 1] as we order by the grouping column `v2.all`.
+SELECT max(a) FROM v2 GROUP BY all ORDER BY all;
+
+-- ORDER BY ALL has higher priority than outer reference.
+-- Results will be [1, 1] if we order by outer reference 'v2.all'.
+-- Actually results are [2, 2] as we order by column `v1.b`
+SELECT (SELECT b FROM v1 ORDER BY all LIMIT 1) FROM v2;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution-negative.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution-negative.sql
new file mode 100644
index 000000000000..d100023b4ee1
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution-negative.sql
@@ -0,0 +1,50 @@
+-- Negative testcases for column resolution
+CREATE DATABASE mydb1;
+USE mydb1;
+CREATE TABLE t1 USING parquet AS SELECT 1 AS i1;
+
+CREATE DATABASE mydb2;
+USE mydb2;
+CREATE TABLE t1 USING parquet AS SELECT 20 AS i1;
+
+-- Negative tests: column resolution scenarios with ambiguous cases in join queries
+SET spark.sql.crossJoin.enabled = true;
+USE mydb1;
+SELECT i1 FROM t1, mydb1.t1;
+SELECT t1.i1 FROM t1, mydb1.t1;
+SELECT mydb1.t1.i1 FROM t1, mydb1.t1;
+SELECT i1 FROM t1, mydb2.t1;
+SELECT t1.i1 FROM t1, mydb2.t1;
+USE mydb2;
+SELECT i1 FROM t1, mydb1.t1;
+SELECT t1.i1 FROM t1, mydb1.t1;
+SELECT i1 FROM t1, mydb2.t1;
+SELECT t1.i1 FROM t1, mydb2.t1;
+SELECT db1.t1.i1 FROM t1, mydb2.t1;
+SET spark.sql.crossJoin.enabled = false;
+
+-- Negative tests
+USE mydb1;
+SELECT mydb1.t1 FROM t1;
+SELECT t1.x.y.* FROM t1;
+SELECT t1 FROM mydb1.t1;
+USE mydb2;
+SELECT mydb1.t1.i1 FROM t1;
+
+-- Negative tests: view cannot resolve column after incompatible schema change
+USE mydb1;
+CREATE VIEW v1 AS SELECT * FROM t1;
+DROP TABLE t1;
+CREATE TABLE t1 USING parquet AS SELECT 1 AS i2;
+SELECT * FROM v1;
+
+-- Negative tests: temp view cannot resolve column after incompatible schema change
+USE mydb2;
+CREATE TEMP VIEW v2 AS SELECT * FROM t1;
+DROP TABLE t1;
+CREATE TABLE t1 USING parquet AS SELECT 1 AS i2;
+SELECT * FROM v2;
+
+-- reset
+DROP DATABASE mydb1 CASCADE;
+DROP DATABASE mydb2 CASCADE;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution-views.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution-views.sql
new file mode 100644
index 000000000000..83c32a5bf243
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution-views.sql
@@ -0,0 +1,23 @@
+-- Tests for qualified column names for the view code-path
+-- Test scenario with Temporary view
+CREATE OR REPLACE TEMPORARY VIEW view1 AS SELECT 2 AS i1;
+SELECT view1.* FROM view1;
+SELECT * FROM view1;
+SELECT view1.i1 FROM view1;
+SELECT i1 FROM view1;
+SELECT a.i1 FROM view1 AS a;
+SELECT i1 FROM view1 AS a;
+-- cleanup
+DROP VIEW view1;
+
+-- Test scenario with Global Temp view
+CREATE OR REPLACE GLOBAL TEMPORARY VIEW view1 as SELECT 1 as i1;
+SELECT * FROM global_temp.view1;
+SELECT global_temp.view1.* FROM global_temp.view1;
+SELECT i1 FROM global_temp.view1;
+SELECT global_temp.view1.i1 FROM global_temp.view1;
+SELECT view1.i1 FROM global_temp.view1;
+SELECT a.i1 FROM global_temp.view1 AS a;
+SELECT i1 FROM global_temp.view1 AS a;
+-- cleanup
+DROP VIEW global_temp.view1;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution.sql
new file mode 100644
index 000000000000..d001185a7393
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/columnresolution.sql
@@ -0,0 +1,78 @@
+-- Tests covering different scenarios with qualified column names
+-- Scenario: column resolution scenarios with datasource table
+CREATE DATABASE mydb1;
+USE mydb1;
+CREATE TABLE t1 USING parquet AS SELECT 1 AS i1;
+
+CREATE DATABASE mydb2;
+USE mydb2;
+CREATE TABLE t1 USING parquet AS SELECT 20 AS i1;
+
+USE mydb1;
+SELECT i1 FROM t1;
+SELECT i1 FROM mydb1.t1;
+SELECT t1.i1 FROM t1;
+SELECT t1.i1 FROM mydb1.t1;
+
+SELECT mydb1.t1.i1 FROM t1;
+SELECT mydb1.t1.i1 FROM mydb1.t1;
+
+USE mydb2;
+SELECT i1 FROM t1;
+SELECT i1 FROM mydb1.t1;
+SELECT t1.i1 FROM t1;
+SELECT t1.i1 FROM mydb1.t1;
+SELECT mydb1.t1.i1 FROM mydb1.t1;
+
+-- Scenario: resolve fully qualified table name in star expansion
+USE mydb1;
+SELECT t1.* FROM t1;
+SELECT mydb1.t1.* FROM mydb1.t1;
+SELECT t1.* FROM mydb1.t1;
+USE mydb2;
+SELECT t1.* FROM t1;
+SELECT mydb1.t1.* FROM mydb1.t1;
+SELECT t1.* FROM mydb1.t1;
+SELECT a.* FROM mydb1.t1 AS a;
+
+-- Scenario: resolve in case of subquery
+
+USE mydb1;
+CREATE TABLE t3 USING parquet AS SELECT * FROM VALUES (4,1), (3,1) AS t3(c1, c2);
+CREATE TABLE t4 USING parquet AS SELECT * FROM VALUES (4,1), (2,1) AS t4(c2, c3);
+
+SELECT * FROM t3 WHERE c1 IN (SELECT c2 FROM t4 WHERE t4.c3 = t3.c2);
+
+SELECT * FROM mydb1.t3 WHERE c1 IN
+ (SELECT mydb1.t4.c2 FROM mydb1.t4 WHERE mydb1.t4.c3 = mydb1.t3.c2);
+
+-- Scenario: column resolution scenarios in join queries
+SET spark.sql.crossJoin.enabled = true;
+
+SELECT mydb1.t1.i1 FROM t1, mydb2.t1;
+
+SELECT mydb1.t1.i1 FROM mydb1.t1, mydb2.t1;
+
+USE mydb2;
+SELECT mydb1.t1.i1 FROM t1, mydb1.t1;
+SET spark.sql.crossJoin.enabled = false;
+
+-- Scenario: Table with struct column
+USE mydb1;
+CREATE TABLE t5(i1 INT, t5 STRUCT) USING parquet;
+INSERT INTO t5 VALUES(1, (2, 3));
+SELECT t5.i1 FROM t5;
+SELECT t5.t5.i1 FROM t5;
+SELECT t5.t5.i1 FROM mydb1.t5;
+SELECT t5.i1 FROM mydb1.t5;
+SELECT t5.* FROM mydb1.t5;
+SELECT t5.t5.* FROM mydb1.t5;
+SELECT mydb1.t5.t5.i1 FROM mydb1.t5;
+SELECT mydb1.t5.t5.i2 FROM mydb1.t5;
+SELECT mydb1.t5.* FROM mydb1.t5;
+SELECT mydb1.t5.* FROM t5;
+
+-- Cleanup and Reset
+USE default;
+DROP DATABASE mydb1 CASCADE;
+DROP DATABASE mydb2 CASCADE;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/comments.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/comments.sql
new file mode 100644
index 000000000000..da5e57a94292
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/comments.sql
@@ -0,0 +1,119 @@
+-- Test comments.
+
+-- the first case of bracketed comment
+--QUERY-DELIMITER-START
+/* This is the first example of bracketed comment.
+SELECT 'ommented out content' AS first;
+*/
+SELECT 'selected content' AS first;
+--QUERY-DELIMITER-END
+
+-- the second case of bracketed comment
+--QUERY-DELIMITER-START
+/* This is the second example of bracketed comment.
+SELECT '/', 'ommented out content' AS second;
+*/
+SELECT '/', 'selected content' AS second;
+--QUERY-DELIMITER-END
+
+-- the third case of bracketed comment
+--QUERY-DELIMITER-START
+/* This is the third example of bracketed comment.
+ *SELECT '*', 'ommented out content' AS third;
+ */
+SELECT '*', 'selected content' AS third;
+--QUERY-DELIMITER-END
+
+-- the first case of empty bracketed comment
+--QUERY-DELIMITER-START
+/**/
+SELECT 'selected content' AS fourth;
+--QUERY-DELIMITER-END
+
+-- the first case of nested bracketed comment
+--QUERY-DELIMITER-START
+/* This is the first example of nested bracketed comment.
+/* I am a nested bracketed comment.*/
+*/
+SELECT 'selected content' AS fifth;
+--QUERY-DELIMITER-END
+
+-- the second case of nested bracketed comment
+--QUERY-DELIMITER-START
+/* This is the second example of nested bracketed comment.
+/* I am a nested bracketed comment.
+ */
+ */
+SELECT 'selected content' AS sixth;
+--QUERY-DELIMITER-END
+
+-- the third case of nested bracketed comment
+--QUERY-DELIMITER-START
+/*
+ * This is the third example of nested bracketed comment.
+ /*
+ * I am a nested bracketed comment.
+ */
+ */
+SELECT 'selected content' AS seventh;
+--QUERY-DELIMITER-END
+
+-- the fourth case of nested bracketed comment
+--QUERY-DELIMITER-START
+/*
+ * This is the fourth example of nested bracketed comment.
+SELECT /* I am a nested bracketed comment.*/ * FROM testData;
+ */
+SELECT 'selected content' AS eighth;
+--QUERY-DELIMITER-END
+
+-- the fifth case of nested bracketed comment
+--QUERY-DELIMITER-START
+SELECT /*
+ * This is the fifth example of nested bracketed comment.
+/* I am a second level of nested bracketed comment.
+/* I am a third level of nested bracketed comment.
+Other information of third level.
+SELECT 'ommented out content' AS ninth;
+*/
+Other information of second level.
+*/
+Other information of first level.
+*/
+'selected content' AS ninth;
+--QUERY-DELIMITER-END
+
+-- the first case of empty nested bracketed comment
+--QUERY-DELIMITER-START
+/*/**/*/
+SELECT 'selected content' AS tenth;
+--QUERY-DELIMITER-END
+
+-- the first case of unclosed bracketed comment
+--QUERY-DELIMITER-START
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+;
+--QUERY-DELIMITER-END
+
+-- the second case of unclosed bracketed comment
+--QUERY-DELIMITER-START
+/*abc*/
+select 1 as a
+/*
+
+2 as b
+/*abc*/
+, 3 as c
+
+/**/
+select 4 as d
+;
+--QUERY-DELIMITER-END
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/comparator.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/comparator.sql
new file mode 100644
index 000000000000..70af4f75ac43
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/comparator.sql
@@ -0,0 +1,13 @@
+-- binary type
+select x'00' < x'0f';
+select x'00' < x'ff';
+
+-- trim string to numeric
+select '1 ' = 1Y;
+select '\t1 ' = 1Y;
+select '1 ' = 1S;
+select '1 ' = 1;
+select ' 1' = 1L;
+select ' 1' = cast(1.0 as float);
+select ' 1.0 ' = 1.0D;
+select ' 1.0 ' = 1.0BD;
\ No newline at end of file
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/conditional-functions.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/conditional-functions.sql
new file mode 100644
index 000000000000..c7a4b055f024
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/conditional-functions.sql
@@ -0,0 +1,38 @@
+-- Tests for conditional functions
+
+CREATE TABLE conditional_t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2);
+
+SELECT nanvl(c2, c1/c2 + c1/c2) FROM conditional_t;
+SELECT nanvl(c2, 1/0) FROM conditional_t;
+SELECT nanvl(1-0, 1/0) FROM conditional_t;
+
+SELECT if(c2 >= 0, 1-0, 1/0) from conditional_t;
+SELECT if(1 == 1, 1, 1/0);
+SELECT if(1 != 1, 1/0, 1);
+
+SELECT coalesce(c2, 1/0) from conditional_t;
+SELECT coalesce(1, 1/0);
+SELECT coalesce(null, 1, 1/0);
+
+SELECT case when c2 >= 0 then 1 else 1/0 end from conditional_t;
+SELECT case when 1 < 2 then 1 else 1/0 end;
+SELECT case when 1 > 2 then 1/0 else 1 end;
+
+SELECT nullifzero(0),
+ nullifzero(cast(0 as tinyint)),
+ nullifzero(cast(0 as bigint)),
+ nullifzero('0'),
+ nullifzero(0.0),
+ nullifzero(1),
+ nullifzero(null);
+
+SELECT nullifzero('abc');
+
+SELECT zeroifnull(null),
+ zeroifnull(1),
+ zeroifnull(cast(1 as tinyint)),
+ zeroifnull(cast(1 as bigint));
+
+SELECT zeroifnull('abc');
+
+DROP TABLE conditional_t;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/count.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/count.sql
new file mode 100644
index 000000000000..644e808cc04c
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/count.sql
@@ -0,0 +1,55 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
+AS testData(a, b);
+
+-- count with single expression
+SELECT
+ count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b))
+FROM testData;
+
+-- distinct count with single expression
+SELECT
+ count(DISTINCT 1),
+ count(DISTINCT null),
+ count(DISTINCT a),
+ count(DISTINCT b),
+ count(DISTINCT (a + b)),
+ count(DISTINCT (a, b))
+FROM testData;
+
+-- count with multiple expressions
+SELECT count(a, b), count(b, a), count(testData.*, testData.*) FROM testData;
+
+-- distinct count with multiple expressions
+SELECT
+ count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*, testData.*)
+FROM testData;
+
+-- distinct count with multiple literals
+SELECT count(DISTINCT 3,2);
+SELECT count(DISTINCT 2), count(DISTINCT 2,3);
+SELECT count(DISTINCT 2), count(DISTINCT 3,2);
+SELECT count(DISTINCT a), count(DISTINCT 2,3) FROM testData;
+SELECT count(DISTINCT a), count(DISTINCT 3,2) FROM testData;
+SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 2,3) FROM testData;
+SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 3,2) FROM testData;
+SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData;
+
+-- legacy behavior: allow calling function count without parameters
+set spark.sql.legacy.allowParameterlessCount=true;
+set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation,org.apache.spark.sql.catalyst.optimizer.ConstantFolding;
+SELECT count() FROM testData;
+
+-- count without expressions
+set spark.sql.legacy.allowParameterlessCount=false;
+set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation,org.apache.spark.sql.catalyst.optimizer.ConstantFolding,org.apache.spark.sql.catalyst.optimizer.NullPropagation;
+SELECT count() FROM testData;
+
+-- legacy behavior: allow count(testData.*)
+set spark.sql.legacy.allowStarWithSingleTableIdentifierInCount=true;
+SELECT count(testData.*) FROM testData;
+
+-- count with a single tblName.* as parameter
+set spark.sql.legacy.allowStarWithSingleTableIdentifierInCount=false;
+SELECT count(testData.*) FROM testData;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cross-join.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cross-join.sql
new file mode 100644
index 000000000000..b64197e2bc70
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cross-join.sql
@@ -0,0 +1,36 @@
+-- Cross join detection and error checking is done in JoinSuite since explain output is
+-- used in the error message and the ids are not stable. Only positive cases are checked here.
+
+create temporary view nt1 as select * from values
+ ("one", 1),
+ ("two", 2),
+ ("three", 3)
+ as nt1(k, v1);
+
+create temporary view nt2 as select * from values
+ ("one", 1),
+ ("two", 22),
+ ("one", 5)
+ as nt2(k, v2);
+
+-- Cross joins with and without predicates
+SELECT * FROM nt1 cross join nt2;
+SELECT * FROM nt1 cross join nt2 where nt1.k = nt2.k;
+SELECT * FROM nt1 cross join nt2 on (nt1.k = nt2.k);
+SELECT * FROM nt1 cross join nt2 where nt1.v1 = 1 and nt2.v2 = 22;
+
+SELECT a.key, b.key FROM
+(SELECT k key FROM nt1 WHERE v1 < 2) a
+CROSS JOIN
+(SELECT k key FROM nt2 WHERE v2 = 22) b;
+
+-- Join reordering
+create temporary view A(a, va) as select * from nt1;
+create temporary view B(b, vb) as select * from nt1;
+create temporary view C(c, vc) as select * from nt1;
+create temporary view D(d, vd) as select * from nt1;
+
+-- Allowed since cross join with C is explicit
+select * from ((A join B on (a = b)) cross join C) join D on (a = d);
+-- Cross joins with non-equal predicates
+SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k);
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/csv-functions.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/csv-functions.sql
new file mode 100644
index 000000000000..01d436534a10
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/csv-functions.sql
@@ -0,0 +1,24 @@
+-- from_csv
+select from_csv('1, 3.14', 'a INT, f FLOAT');
+select from_csv('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
+-- Check if errors handled
+select from_csv('1', 1);
+select from_csv('1', 'a InvalidType');
+select from_csv('1', 'Array');
+select from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE'));
+select from_csv('1', 'a INT', map('mode', 1));
+select from_csv();
+-- infer schema of json literal
+select from_csv('1,abc', schema_of_csv('1,abc'));
+select schema_of_csv('1|abc', map('delimiter', '|'));
+select schema_of_csv(null);
+CREATE TEMPORARY VIEW csvTable(csvField, a) AS SELECT * FROM VALUES ('1,abc', 'a');
+SELECT schema_of_csv(csvField) FROM csvTable;
+-- Clean up
+DROP VIEW IF EXISTS csvTable;
+-- to_csv
+select to_csv(named_struct('a', 1, 'b', 2));
+select to_csv(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));
+-- Check if errors handled
+select to_csv(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'));
+select to_csv(named_struct('a', 1, 'b', 2), map('mode', 1));
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-command.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-command.sql
new file mode 100644
index 000000000000..ee90c2de49eb
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-command.sql
@@ -0,0 +1,33 @@
+-- WITH inside CTE
+CREATE TABLE cte_tbl USING csv AS WITH s AS (SELECT 42 AS col) SELECT * FROM s;
+
+SELECT * FROM cte_tbl;
+
+-- WITH inside CREATE VIEW
+CREATE TEMPORARY VIEW cte_view AS WITH s AS (SELECT 42 AS col) SELECT * FROM s;
+
+SELECT * FROM cte_view;
+
+-- INSERT inside WITH
+WITH s AS (SELECT 43 AS col)
+INSERT INTO cte_tbl SELECT * FROM S;
+
+SELECT * FROM cte_tbl;
+
+-- WITH inside INSERT
+INSERT INTO cte_tbl WITH s AS (SELECT 44 AS col) SELECT * FROM s;
+
+SELECT * FROM cte_tbl;
+
+CREATE TABLE cte_tbl2 (col INT) USING csv;
+-- Multi-INSERT
+WITH s AS (SELECT 45 AS col)
+FROM s
+INSERT INTO cte_tbl SELECT col
+INSERT INTO cte_tbl2 SELECT col;
+
+SELECT * FROM cte_tbl;
+SELECT * FROM cte_tbl2;
+
+DROP TABLE cte_tbl;
+DROP TABLE cte_tbl2;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-legacy.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-legacy.sql
new file mode 100644
index 000000000000..29dee1a3afd3
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-legacy.sql
@@ -0,0 +1,2 @@
+--SET spark.sql.legacy.ctePrecedencePolicy = legacy
+--IMPORT cte-nested.sql
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-nested.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-nested.sql
new file mode 100644
index 000000000000..3b2ba1fcdd66
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-nested.sql
@@ -0,0 +1,228 @@
+-- CTE in CTE definition
+WITH t as (
+ WITH t2 AS (SELECT 1)
+ SELECT * FROM t2
+)
+SELECT * FROM t;
+
+-- CTE in subquery
+SELECT max(c) FROM (
+ WITH t(c) AS (SELECT 1)
+ SELECT * FROM t
+);
+
+-- CTE in subquery expression
+SELECT (
+ WITH t AS (SELECT 1)
+ SELECT * FROM t
+);
+
+-- un-referenced CTE in subquery expression: outer reference in CTE relation
+SELECT (
+ WITH unreferenced AS (SELECT id)
+ SELECT 1
+) FROM range(1);
+
+-- un-referenced CTE in subquery expression: outer reference in CTE main query
+SELECT (
+ WITH unreferenced AS (SELECT 1)
+ SELECT id
+) FROM range(1);
+
+-- Make sure CTE in subquery is scoped to that subquery rather than global
+-- the 2nd half of the union should fail because the cte is scoped to the first half
+SELECT * FROM
+ (
+ WITH cte AS (SELECT * FROM range(10))
+ SELECT * FROM cte WHERE id = 8
+ ) a
+UNION
+SELECT * FROM cte;
+
+-- CTE in CTE definition shadows outer
+WITH
+ t AS (SELECT 1),
+ t2 AS (
+ WITH t AS (SELECT 2)
+ SELECT * FROM t
+ )
+SELECT * FROM t2;
+
+-- CTE in CTE definition shadows outer 2
+WITH
+ t(c) AS (SELECT 1),
+ t2 AS (
+ SELECT (
+ SELECT max(c) FROM (
+ WITH t(c) AS (SELECT 2)
+ SELECT * FROM t
+ )
+ )
+ )
+SELECT * FROM t2;
+
+-- CTE in CTE definition shadows outer 3
+WITH
+ t AS (SELECT 1),
+ t2 AS (
+ WITH t AS (SELECT 2),
+ t2 AS (
+ WITH t AS (SELECT 3)
+ SELECT * FROM t
+ )
+ SELECT * FROM t2
+ )
+SELECT * FROM t2;
+
+-- CTE in subquery shadows outer
+WITH t(c) AS (SELECT 1)
+SELECT max(c) FROM (
+ WITH t(c) AS (SELECT 2)
+ SELECT * FROM t
+);
+
+-- CTE in subquery shadows outer 2
+WITH t(c) AS (SELECT 1)
+SELECT sum(c) FROM (
+ SELECT max(c) AS c FROM (
+ WITH t(c) AS (SELECT 2)
+ SELECT * FROM t
+ )
+);
+
+-- CTE in subquery shadows outer 3
+WITH t(c) AS (SELECT 1)
+SELECT sum(c) FROM (
+ WITH t(c) AS (SELECT 2)
+ SELECT max(c) AS c FROM (
+ WITH t(c) AS (SELECT 3)
+ SELECT * FROM t
+ )
+);
+
+-- CTE in subquery expression shadows outer
+WITH t AS (SELECT 1)
+SELECT (
+ WITH t AS (SELECT 2)
+ SELECT * FROM t
+);
+
+-- CTE in subquery expression shadows outer 2
+WITH t AS (SELECT 1)
+SELECT (
+ SELECT (
+ WITH t AS (SELECT 2)
+ SELECT * FROM t
+ )
+);
+
+-- CTE in subquery expression shadows outer 3
+WITH t AS (SELECT 1)
+SELECT (
+ WITH t AS (SELECT 2)
+ SELECT (
+ WITH t AS (SELECT 3)
+ SELECT * FROM t
+ )
+);
+
+-- CTE in subquery expression shadows outer 4
+WITH t(c) AS (SELECT 1)
+SELECT * FROM t
+WHERE c IN (
+ WITH t(c) AS (SELECT 2)
+ SELECT * FROM t
+);
+
+-- forward name conflict is not a real conflict
+WITH
+ t AS (
+ WITH t2 AS (SELECT 1)
+ SELECT * FROM t2
+ ),
+ t2 AS (SELECT 2)
+SELECT * FROM t;
+
+-- case insensitive name conflicts: in other CTE relations
+WITH
+ abc AS (SELECT 1),
+ t AS (
+ WITH aBc AS (SELECT 2)
+ SELECT * FROM aBC
+ )
+SELECT * FROM t;
+
+-- case insensitive name conflicts: in subquery expressions
+WITH abc AS (SELECT 1)
+SELECT (
+ WITH aBc AS (SELECT 2)
+ SELECT * FROM aBC
+);
+
+-- SPARK-38404: CTE in CTE definition references outer
+WITH
+ t1 AS (SELECT 1),
+ t2 AS (
+ WITH t3 AS (
+ SELECT * FROM t1
+ )
+ SELECT * FROM t3
+ )
+SELECT * FROM t2;
+
+-- CTE nested in CTE main body FROM clause references outer CTE def
+WITH cte_outer AS (
+ SELECT 1
+)
+SELECT * FROM (
+ WITH cte_inner AS (
+ SELECT * FROM cte_outer
+ )
+ SELECT * FROM cte_inner
+);
+
+-- CTE double nested in CTE main body FROM clause references outer CTE def
+WITH cte_outer AS (
+ SELECT 1
+)
+SELECT * FROM (
+ WITH cte_inner AS (
+ SELECT * FROM (
+ WITH cte_inner_inner AS (
+ SELECT * FROM cte_outer
+ )
+ SELECT * FROM cte_inner_inner
+ )
+ )
+ SELECT * FROM cte_inner
+);
+
+-- Invalid reference to invisible CTE def nested CTE def
+WITH cte_outer AS (
+ WITH cte_invisible_inner AS (
+ SELECT 1
+ )
+ SELECT * FROM cte_invisible_inner
+)
+SELECT * FROM (
+ WITH cte_inner AS (
+ SELECT * FROM cte_invisible_inner
+ )
+ SELECT * FROM cte_inner
+);
+
+-- Invalid reference to invisible CTE def nested CTE def (in FROM)
+WITH cte_outer AS (
+ SELECT * FROM (
+ WITH cte_invisible_inner AS (
+ SELECT 1
+ )
+ SELECT * FROM cte_invisible_inner
+ )
+)
+SELECT * FROM (
+ WITH cte_inner AS (
+ SELECT * FROM cte_invisible_inner
+ )
+ SELECT * FROM cte_inner
+);
\ No newline at end of file
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-nonlegacy.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-nonlegacy.sql
new file mode 100644
index 000000000000..3316f6740281
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte-nonlegacy.sql
@@ -0,0 +1,2 @@
+--SET spark.sql.legacy.ctePrecedencePolicy = EXCEPTION
+--IMPORT cte-nested.sql
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte.sql
new file mode 100644
index 000000000000..ecf760ecac70
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/cte.sql
@@ -0,0 +1,184 @@
+create temporary view t as select * from values 0, 1, 2 as t(id);
+create temporary view t2 as select * from values 0, 1 as t(id);
+create temporary view t3 as select * from t;
+
+-- WITH clause should not fall into infinite loop by referencing self
+WITH s AS (SELECT 1 FROM s) SELECT * FROM s;
+
+WITH r AS (SELECT (SELECT * FROM r))
+SELECT * FROM r;
+
+-- WITH clause should reference the base table
+WITH t AS (SELECT 1 FROM t) SELECT * FROM t;
+
+-- Table `t` referenced by a view should take precedence over the top CTE `t`
+WITH t AS (SELECT 1) SELECT * FROM t3;
+
+-- WITH clause should not allow cross reference
+WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2;
+
+-- WITH clause should reference the previous CTE
+WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2;
+
+-- SPARK-18609 CTE with self-join
+WITH CTE1 AS (
+ SELECT b.id AS id
+ FROM T2 a
+ CROSS JOIN (SELECT id AS id FROM T2) b
+)
+SELECT t1.id AS c1,
+ t2.id AS c2
+FROM CTE1 t1
+ CROSS JOIN CTE1 t2;
+
+-- CTE with column alias
+WITH t(x) AS (SELECT 1)
+SELECT * FROM t WHERE x = 1;
+
+-- CTE with multiple column aliases
+WITH t(x, y) AS (SELECT 1, 2)
+SELECT * FROM t WHERE x = 1 AND y = 2;
+
+-- CTE with duplicate column aliases
+WITH t(x, x) AS (SELECT 1, 2)
+SELECT * FROM t;
+
+-- CTE with empty column alias list is not allowed
+WITH t() AS (SELECT 1)
+SELECT * FROM t;
+
+-- CTEs with duplicate names are not allowed
+WITH
+ t(x) AS (SELECT 1),
+ t(x) AS (SELECT 2)
+SELECT * FROM t;
+
+-- invalid CTE relation should fail the query even if it's not referenced
+WITH t AS (SELECT 1 FROM non_existing_table)
+SELECT 2;
+
+-- The following tests are ported from Postgres
+-- Multiple uses are evaluated only once
+SELECT count(*) FROM (
+ WITH q1(x) AS (SELECT random() FROM range(1, 5))
+ SELECT * FROM q1
+ UNION
+ SELECT * FROM q1
+) ss;
+
+-- Deeply nested
+WITH w1(c1) AS
+ (WITH w2(c2) AS
+ (WITH w3(c3) AS
+ (WITH w4(c4) AS
+ (WITH w5(c5) AS
+ (WITH w6(c6) AS
+ (WITH w7(c7) AS
+ (WITH w8(c8) AS
+ (SELECT 1)
+ SELECT * FROM w8)
+ SELECT * FROM w7)
+ SELECT * FROM w6)
+ SELECT * FROM w5)
+ SELECT * FROM w4)
+ SELECT * FROM w3)
+ SELECT * FROM w2)
+SELECT * FROM w1;
+
+-- CTE referencing an outer-level variable, should fail
+SELECT ( WITH cte(foo) AS ( VALUES(id) )
+ SELECT (SELECT foo FROM cte) )
+FROM t;
+
+-- CTE name collision with subquery name
+WITH same_name AS (SELECT 42)
+SELECT * FROM same_name, (SELECT 10) AS same_name;
+
+-- CTE name collision with subquery name, should fail
+WITH same_name(x) AS (SELECT 42)
+SELECT same_name.x FROM (SELECT 10) AS same_name(x), same_name;
+
+set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation,org.apache.spark.sql.catalyst.optimizer.NullPropagation;
+-- Test behavior with an unknown-type literal in the WITH
+WITH q AS (SELECT 'foo' AS x)
+SELECT x, typeof(x) FROM q;
+
+set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation,org.apache.spark.sql.catalyst.optimizer.ConstantFolding,org.apache.spark.sql.catalyst.optimizer.NullPropagation;
+-- The following tests are ported from ZetaSQL
+-- Alias inside the with hides the underlying column name, should fail
+with cte as (select id as id_alias from t)
+select id from cte;
+
+-- Reference of later WITH, should fail.
+with r1 as (select * from r2),
+ r2 as (select 1)
+select 2;
+
+-- WITH in a table subquery
+SELECT * FROM
+ (WITH q AS (select 1 x) SELECT x+1 AS y FROM q);
+
+-- WITH in an expression subquery
+select (with q as (select 1 x) select * from q);
+
+-- WITH in an IN subquery
+select 1 in (with q as (select 1) select * from q);
+
+-- WITH alias referenced outside its scope, should fail
+SELECT * FROM
+ (WITH q AS (select 1 x) SELECT x+1 AS y FROM q),
+ q;
+
+-- References to CTEs of the same name should be resolved properly
+WITH T1 as (select 1 a)
+select *
+from
+ T1 x,
+ (WITH T1 as (select 2 b) select * from T1) y,
+ T1 z;
+
+-- References to CTEs of the same name should be resolved properly
+WITH TTtt as (select 1 a),
+ `tTTt_2` as (select 2 a)
+select *
+from
+ (WITH TtTt as (select 3 c) select * from ttTT, `tttT_2`);
+
+-- Correlated CTE subquery
+select
+ (WITH q AS (select T.x) select * from q)
+from (select 1 x, 2 y) T;
+
+-- The main query inside WITH can be correlated.
+select
+ (WITH q AS (select 3 z) select x + t.y + z from q)
+from (select 1 x, 2 y) T;
+
+-- A WITH subquery alias is visible inside a WITH clause subquery.
+WITH q1 as (select 1 x)
+select * from
+ (with q2 as (select * from q1) select * from q2);
+
+-- A WITH subquery alias is visible inside a WITH clause subquery, and they have the same name.
+WITH q1 as (select 1 x)
+select * from
+ (with q1 as (select x+1 from q1) select * from q1);
+
+-- The following tests are ported from DuckDB
+-- Duplicate CTE alias, should fail
+with cte1 as (select 42), cte1 as (select 42) select * FROM cte1;
+
+-- Refer to CTE in subquery
+with cte1 as (Select id as j from t)
+select * from cte1 where j = (select max(j) from cte1 as cte2);
+
+-- Nested CTE views that re-use CTE aliases
+with cte AS (SELECT * FROM va) SELECT * FROM cte;
+
+-- Self-refer to non-existent cte, should fail.
+with cte as (select * from cte) select * from cte;
+
+-- Clean up
+DROP VIEW IF EXISTS t;
+DROP VIEW IF EXISTS t2;
+DROP VIEW IF EXISTS t3;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/current_database_catalog.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/current_database_catalog.sql
new file mode 100644
index 000000000000..d7aed6afaa17
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/current_database_catalog.sql
@@ -0,0 +1,2 @@
+-- get current_database/current_schema and current_catalog
+select current_database(), current_schema(), current_catalog();
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/date.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/date.sql
new file mode 100644
index 000000000000..c1ae432d6316
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/date.sql
@@ -0,0 +1,168 @@
+-- date literals, functions and operations
+
+create temporary view date_view as select '2011-11-11' date_str, '1' int_str;
+
+select date '2019-01-01\t';
+select date '2020-01-01中文';
+
+select make_date(2019, 1, 1), make_date(12, 12, 12);
+-- invalid month
+select make_date(2000, 13, 1);
+-- invalid day
+select make_date(2000, 1, 33);
+
+-- invalid: year field must have at least 4 digits
+select date'015';
+-- invalid: month field can have at most 2 digits
+select date'2021-4294967297-11';
+
+select current_date = current_date;
+-- under ANSI mode, `current_date` can't be a function name.
+select current_date() = current_date();
+select curdate(1);
+
+-- conversions between date and unix_date (number of days from epoch)
+select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null);
+select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null);
+
+select to_date(null), to_date('2016-12-31'), to_date('2016-12-31', 'yyyy-MM-dd');
+
+-- `dayofweek` accepts both date and timestamp ltz/ntz inputs.
+select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null),
+ dayofweek('1582-10-15 13:10:15'), dayofweek(timestamp_ltz'1582-10-15 13:10:15'), dayofweek(timestamp_ntz'1582-10-15 13:10:15');
+
+-- `weekday` accepts both date and timestamp ltz/ntz inputs.
+select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null),
+ weekday('1582-10-15 13:10:15'), weekday(timestamp_ltz'1582-10-15 13:10:15'), weekday(timestamp_ntz'1582-10-15 13:10:15');
+
+-- `year` accepts both date and timestamp ltz/ntz inputs.
+select year('1500-01-01'), year('1582-10-15 13:10:15'), year(timestamp_ltz'1582-10-15 13:10:15'), year(timestamp_ntz'1582-10-15 13:10:15');
+
+-- `month` accepts both date and timestamp ltz/ntz inputs.
+select month('1500-01-01'), month('1582-10-15 13:10:15'), month(timestamp_ltz'1582-10-15 13:10:15'), month(timestamp_ntz'1582-10-15 13:10:15');
+
+-- `dayOfYear` accepts both date and timestamp ltz/ntz inputs.
+select dayOfYear('1500-01-01'), dayOfYear('1582-10-15 13:10:15'), dayOfYear(timestamp_ltz'1582-10-15 13:10:15'), dayOfYear(timestamp_ntz'1582-10-15 13:10:15');
+
+-- next_day
+select next_day("2015-07-23", "Mon");
+select next_day("2015-07-23", "xx");
+select next_day("2015-07-23 12:12:12", "Mon");
+-- next_date does not accept timestamp lzt/ntz input
+select next_day(timestamp_ltz"2015-07-23 12:12:12", "Mon");
+select next_day(timestamp_ntz"2015-07-23 12:12:12", "Mon");
+select next_day("xx", "Mon");
+select next_day(null, "Mon");
+select next_day(null, "xx");
+
+-- date add
+select date_add(date'2011-11-11', 1);
+select date_add('2011-11-11', 1);
+select date_add('2011-11-11', 1Y);
+select date_add('2011-11-11', 1S);
+-- invalid cases: the second parameter can only be byte/short/int
+select date_add('2011-11-11', 1L);
+select date_add('2011-11-11', 1.0);
+select date_add('2011-11-11', 1E1);
+-- the second parameter can be a string literal if it can be parsed to int
+select date_add('2011-11-11', '1');
+select date_add('2011-11-11', '1.2');
+-- null input leads to null result.
+select date_add(null, 1);
+select date_add(date'2011-11-11', null);
+-- `date_add` accepts both date and timestamp ltz/ntz inputs (non-ANSI mode).
+select date_add(timestamp_ltz'2011-11-11 12:12:12', 1);
+select date_add(timestamp_ntz'2011-11-11 12:12:12', 1);
+
+-- date sub
+select date_sub(date'2011-11-11', 1);
+select date_sub('2011-11-11', 1);
+select date_sub('2011-11-11', 1Y);
+select date_sub('2011-11-11', 1S);
+-- invalid cases: the second parameter can only be byte/short/int
+select date_sub('2011-11-11', 1L);
+select date_sub('2011-11-11', 1.0);
+select date_sub('2011-11-11', 1E1);
+-- the second parameter can be a string literal if it can be parsed to int
+select date_sub(date'2011-11-11', '1');
+select date_sub(date'2011-11-11', '1.2');
+-- null input leads to null result.
+select date_sub(null, 1);
+select date_sub(date'2011-11-11', null);
+-- `date_sub` accepts both date and timestamp ltz/ntz inputs (non-ANSI mode).
+select date_sub(timestamp_ltz'2011-11-11 12:12:12', 1);
+select date_sub(timestamp_ntz'2011-11-11 12:12:12', 1);
+
+-- date add/sub with non-literal string column
+select date_add('2011-11-11', int_str) from date_view;
+select date_sub('2011-11-11', int_str) from date_view;
+select date_add(date_str, 1) from date_view;
+select date_sub(date_str, 1) from date_view;
+
+-- date +/- number
+select date '2011-11-11' + 1E1;
+select date '2001-09-28' + 7Y;
+select 7S + date '2001-09-28';
+select date '2001-10-01' - 7;
+
+-- date - date
+select date '2001-10-01' - date '2001-09-28';
+-- if one side is string/null literal, promote it to date type.
+select date '2001-10-01' - '2001-09-28';
+select '2001-10-01' - date '2001-09-28';
+select date '2001-09-28' - null;
+select null - date '2019-10-06';
+-- invalid: non-literal string column
+select date_str - date '2001-09-28' from date_view;
+select date '2001-09-28' - date_str from date_view;
+
+-- invalid: date + string literal
+select date'2011-11-11' + '1';
+select '1' + date'2011-11-11';
+
+-- null result: date + null
+select date'2011-11-11' + null;
+select null + date'2011-11-11';
+
+-- date +/- interval and interval + date
+select date '2012-01-01' - interval '2-2' year to month,
+ date '2011-11-11' - interval '2' day,
+ date '2012-01-01' + interval '-2-2' year to month,
+ date '2011-11-11' + interval '-2' month,
+ - interval '2-2' year to month + date '2012-01-01',
+ interval '-2' day + date '2011-11-11';
+
+-- Unsupported narrow text style
+select to_date('26/October/2015', 'dd/MMMMM/yyyy');
+select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
+select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
+
+-- Add a number of units to a timestamp or a date
+select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123');
+select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456');
+select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03');
+select date_add(MINUTE, -100, date'2022-02-25');
+select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03');
+select date_add(DAY, 367, date'2022-02-25');
+select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03');
+select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03');
+select dateadd(QUARTER, 5, date'2022-02-25');
+select date_add(YEAR, 1, date'2022-02-25');
+
+select dateadd('MICROSECOND', 1001, timestamp'2022-02-25 01:02:03.123');
+select date_add('QUARTER', 5, date'2022-02-25');
+
+-- Get the difference between timestamps or dates in the specified units
+select datediff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001');
+select date_diff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455');
+select datediff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01');
+select date_diff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00');
+select datediff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03');
+select date_diff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00');
+select datediff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03');
+select date_diff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03');
+select datediff(QUARTER, date'2022-02-25', date'2023-05-25');
+select date_diff(YEAR, date'2022-02-25', date'2023-02-25');
+
+select date_diff('MILLISECOND', timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455');
+select datediff('YEAR', date'2022-02-25', date'2023-02-25');
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting-invalid.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting-invalid.sql
new file mode 100644
index 000000000000..11bba00e91ab
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting-invalid.sql
@@ -0,0 +1,53 @@
+--- TESTS FOR DATETIME FORMATTING FUNCTIONS WITH INVALID PATTERNS ---
+
+-- separating this from datetime-formatting.sql, because the text form
+-- for patterns with 5 letters in SimpleDateFormat varies from different JDKs
+select date_format('2018-11-17 13:33:33.333', 'GGGGG');
+-- pattern letter count can not be greater than 6
+select date_format('2018-11-17 13:33:33.333', 'yyyyyyy');
+-- q/L in JDK 8 will fail when the count is more than 2
+select date_format('2018-11-17 13:33:33.333', 'qqqqq');
+select date_format('2018-11-17 13:33:33.333', 'QQQQQ');
+select date_format('2018-11-17 13:33:33.333', 'MMMMM');
+select date_format('2018-11-17 13:33:33.333', 'LLLLL');
+
+select date_format('2018-11-17 13:33:33.333', 'EEEEE');
+select date_format('2018-11-17 13:33:33.333', 'FF');
+select date_format('2018-11-17 13:33:33.333', 'ddd');
+-- DD is invalid if the day-of-year exceeds 100, but it becomes valid in Java 11
+-- select date_format('2018-11-17 13:33:33.333', 'DD');
+select date_format('2018-11-17 13:33:33.333', 'DDDD');
+select date_format('2018-11-17 13:33:33.333', 'HHH');
+select date_format('2018-11-17 13:33:33.333', 'hhh');
+select date_format('2018-11-17 13:33:33.333', 'kkk');
+select date_format('2018-11-17 13:33:33.333', 'KKK');
+select date_format('2018-11-17 13:33:33.333', 'mmm');
+select date_format('2018-11-17 13:33:33.333', 'sss');
+select date_format('2018-11-17 13:33:33.333', 'SSSSSSSSSS');
+select date_format('2018-11-17 13:33:33.333', 'aa');
+select date_format('2018-11-17 13:33:33.333', 'V');
+select date_format('2018-11-17 13:33:33.333', 'zzzzz');
+select date_format('2018-11-17 13:33:33.333', 'XXXXXX');
+select date_format('2018-11-17 13:33:33.333', 'ZZZZZZ');
+select date_format('2018-11-17 13:33:33.333', 'OO');
+select date_format('2018-11-17 13:33:33.333', 'xxxxxx');
+
+select date_format('2018-11-17 13:33:33.333', 'A');
+select date_format('2018-11-17 13:33:33.333', 'n');
+select date_format('2018-11-17 13:33:33.333', 'N');
+select date_format('2018-11-17 13:33:33.333', 'p');
+
+-- disabled week-based patterns
+select date_format('2018-11-17 13:33:33.333', 'Y');
+select date_format('2018-11-17 13:33:33.333', 'w');
+select date_format('2018-11-17 13:33:33.333', 'W');
+select date_format('2018-11-17 13:33:33.333', 'u');
+select date_format('2018-11-17 13:33:33.333', 'e');
+select date_format('2018-11-17 13:33:33.333', 'c');
+
+-- others
+select date_format('2018-11-17 13:33:33.333', 'B');
+select date_format('2018-11-17 13:33:33.333', 'C');
+select date_format('2018-11-17 13:33:33.333', 'I');
+
+
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting-legacy.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting-legacy.sql
new file mode 100644
index 000000000000..19cab61a7ee5
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting-legacy.sql
@@ -0,0 +1,2 @@
+--SET spark.sql.legacy.timeParserPolicy=LEGACY
+--IMPORT datetime-formatting.sql
\ No newline at end of file
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting.sql
new file mode 100644
index 000000000000..3618bb5c399f
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-formatting.sql
@@ -0,0 +1,73 @@
+--- TESTS FOR DATETIME FORMATTING FUNCTIONS ---
+
+create temporary view v as select col from values
+ (timestamp '1582-06-01 11:33:33.123UTC+080000'),
+ (timestamp '1970-01-01 00:00:00.000Europe/Paris'),
+ (timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'),
+ (timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
+ (timestamp '2018-11-17 13:33:33.123Z'),
+ (timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
+ (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col);
+
+select col, date_format(col, 'G GG GGG GGGG'), to_char(col, 'G GG GGG GGGG'), to_varchar(col, 'G GG GGG GGGG') from v;
+
+select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy'), to_char(col, 'y yy yyy yyyy yyyyy yyyyyy'), to_varchar(col, 'y yy yyy yyyy yyyyy yyyyyy') from v;
+
+select col, date_format(col, 'q qq'), to_char(col, 'q qq'), to_varchar(col, 'q qq') from v;
+
+select col, date_format(col, 'Q QQ QQQ QQQQ'), to_char(col, 'Q QQ QQQ QQQQ'), to_varchar(col, 'Q QQ QQQ QQQQ') from v;
+
+select col, date_format(col, 'M MM MMM MMMM'), to_char(col, 'M MM MMM MMMM'), to_varchar(col, 'M MM MMM MMMM') from v;
+
+select col, date_format(col, 'L LL'), to_char(col, 'L LL'), to_varchar(col, 'L LL') from v;
+
+select col, date_format(col, 'E EE EEE EEEE'), to_char(col, 'E EE EEE EEEE'), to_varchar(col, 'E EE EEE EEEE') from v;
+
+select col, date_format(col, 'F'), to_char(col, 'F'), to_varchar(col, 'F') from v;
+
+select col, date_format(col, 'd dd'), to_char(col, 'd dd'), to_varchar(col, 'd dd') from v;
+
+select col, date_format(col, 'DD'), to_char(col, 'DD'), to_varchar(col, 'DD') from v where col = timestamp '2100-01-01 01:33:33.123America/Los_Angeles';
+select col, date_format(col, 'D DDD'), to_char(col, 'D DDD'), to_varchar(col, 'D DDD') from v;
+
+select col, date_format(col, 'H HH'), to_char(col, 'H HH'), to_varchar(col, 'H HH') from v;
+
+select col, date_format(col, 'h hh'), to_char(col, 'h hh'), to_varchar(col, 'h hh') from v;
+
+select col, date_format(col, 'k kk'), to_char(col, 'k kk'), to_varchar(col, 'k kk') from v;
+
+select col, date_format(col, 'K KK'), to_char(col, 'K KK'), to_varchar(col, 'K KK') from v;
+
+select col, date_format(col, 'm mm'), to_char(col, 'm mm'), to_varchar(col, 'm mm') from v;
+
+select col, date_format(col, 's ss'), to_char(col, 's ss'), to_varchar(col, 's ss') from v;
+
+select col, date_format(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS'), to_char(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS'), to_varchar(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS') from v;
+
+select col, date_format(col, 'a'), to_char(col, 'a'), to_varchar(col, 'a') from v;
+
+select col, date_format(col, 'VV'), to_char(col, 'VV'), to_varchar(col, 'VV') from v;
+
+select col, date_format(col, 'z zz zzz zzzz'), to_char(col, 'z zz zzz zzzz'), to_varchar(col, 'z zz zzz zzzz') from v;
+
+select col, date_format(col, 'X XX XXX'), to_char(col, 'X XX XXX'), to_varchar(col, 'X XX XXX') from v;
+select col, date_format(col, 'XXXX XXXXX'), to_char(col, 'XXXX XXXXX'), to_varchar(col, 'XXXX XXXXX') from v;
+
+select col, date_format(col, 'Z ZZ ZZZ ZZZZ ZZZZZ'), to_char(col, 'Z ZZ ZZZ ZZZZ ZZZZZ'), to_varchar(col, 'Z ZZ ZZZ ZZZZ ZZZZZ') from v;
+
+select col, date_format(col, 'O OOOO'), to_char(col, 'O OOOO'), to_varchar(col, 'O OOOO') from v;
+
+select col, date_format(col, 'x xx xxx xxxx xxxx xxxxx'), to_char(col, 'x xx xxx xxxx xxxx xxxxx'), to_varchar(col, 'x xx xxx xxxx xxxx xxxxx') from v;
+
+-- optional pattern, but the results won't be optional for formatting
+select col, date_format(col, '[yyyy-MM-dd HH:mm:ss]'), to_char(col, '[yyyy-MM-dd HH:mm:ss]'), to_varchar(col, '[yyyy-MM-dd HH:mm:ss]') from v;
+
+-- literals
+select col, date_format(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'"), to_char(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'"), to_varchar(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'") from v;
+select col, date_format(col, "''"), to_char(col, "''"), to_varchar(col, "''") from v;
+select col, date_format(col, ''), to_char(col, ''), to_varchar(col, '') from v;
+
+-- different datetime types
+select date_format(date'2023-08-18', 'yyyy-MM-dd'), to_char(date'2023-08-18', 'yyyy-MM-dd'), to_varchar(date'2023-08-18', 'yyyy-MM-dd');
+select date_format(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_char(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_varchar(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
+select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_char(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_varchar(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS');
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-legacy.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-legacy.sql
new file mode 100644
index 000000000000..22b29f3451ff
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-legacy.sql
@@ -0,0 +1,3 @@
+--SET spark.sql.legacy.timeParserPolicy=LEGACY
+--IMPORT date.sql
+--IMPORT timestamp.sql
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing-invalid.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing-invalid.sql
new file mode 100644
index 000000000000..1f5a9b117ca2
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing-invalid.sql
@@ -0,0 +1,39 @@
+--- TESTS FOR DATETIME PARSING FUNCTIONS WITH INVALID VALUES ---
+
+-- parsing invalid value with pattern 'y'
+select to_timestamp('294248', 'y'); -- out of year value range [0, 294247]
+select to_timestamp('1', 'yy'); -- the number of digits must be 2 for 'yy'.
+select to_timestamp('-12', 'yy'); -- out of year value range [0, 99] for reduced two digit form
+select to_timestamp('123', 'yy'); -- the number of digits must be 2 for 'yy'.
+select to_timestamp('1', 'yyy'); -- the number of digits must be in [3, 6] for 'yyy'
+
+select to_timestamp('1234567', 'yyyyyyy'); -- the length of 'y' pattern must be less than 7
+
+-- parsing invalid values with pattern 'D'
+select to_timestamp('366', 'D');
+select to_timestamp('9', 'DD');
+select to_timestamp('366', 'DD');
+select to_timestamp('9', 'DDD');
+select to_timestamp('99', 'DDD');
+select to_timestamp('30-365', 'dd-DDD');
+select to_timestamp('11-365', 'MM-DDD');
+select to_timestamp('2019-366', 'yyyy-DDD');
+select to_timestamp('12-30-365', 'MM-dd-DDD');
+select to_timestamp('2020-01-365', 'yyyy-dd-DDD');
+select to_timestamp('2020-10-350', 'yyyy-MM-DDD');
+select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD');
+-- add a special case to test csv, because the legacy formatter it uses is lenient then Spark should
+-- throw SparkUpgradeException
+select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'));
+
+-- Datetime types parse error
+select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
+select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
+select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
+select cast("Unparseable" as timestamp);
+select cast("Unparseable" as date);
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing-legacy.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing-legacy.sql
new file mode 100644
index 000000000000..ee1afe502ab7
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing-legacy.sql
@@ -0,0 +1,2 @@
+--SET spark.sql.legacy.timeParserPolicy=LEGACY
+--IMPORT datetime-parsing.sql
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing.sql
new file mode 100644
index 000000000000..f87ae556867b
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-parsing.sql
@@ -0,0 +1,45 @@
+--- TESTS FOR DATETIME PARSING FUNCTIONS ---
+
+-- parsing with pattern 'y'.
+-- the range of valid year is [-290307, 294247],
+-- but particularly, some thrift client use java.sql.Timestamp to parse timestamp, which allows
+-- only positive year values less or equal than 9999. So the cases bellow only use [1, 9999] to pass
+-- ThriftServerQueryTestSuite
+select to_timestamp('1', 'y');
+select to_timestamp('009999', 'y');
+
+-- reduced two digit form is used, the range of valid year is 20-[01, 99]
+select to_timestamp('00', 'yy');
+select to_timestamp('99', 'yy');
+
+-- the range of valid year is [-290307, 294247], the number of digits must be in [3, 6] for 'yyy'
+select to_timestamp('001', 'yyy');
+select to_timestamp('009999', 'yyy');
+
+-- the range of valid year is [-9999, 9999], the number of digits must be 4 for 'yyyy'.
+select to_timestamp('0001', 'yyyy');
+select to_timestamp('9999', 'yyyy');
+
+-- the range of valid year is [-99999, 99999], the number of digits must be 5 for 'yyyyy'.
+select to_timestamp('00001', 'yyyyy');
+select to_timestamp('09999', 'yyyyy');
+
+-- the range of valid year is [-290307, 294247], the number of digits must be 6 for 'yyyyyy'.
+select to_timestamp('000001', 'yyyyyy');
+select to_timestamp('009999', 'yyyyyy');
+
+-- parsing with pattern 'D'
+select to_timestamp('9', 'D');
+select to_timestamp('300', 'D');
+select to_timestamp('09', 'DD');
+select to_timestamp('99', 'DD');
+select to_timestamp('100', 'DD');
+select to_timestamp('009', 'DDD');
+select to_timestamp('365', 'DDD');
+select to_timestamp('31-365', 'dd-DDD');
+select to_timestamp('12-365', 'MM-DDD');
+select to_timestamp('2020-365', 'yyyy-DDD');
+select to_timestamp('12-31-365', 'MM-dd-DDD');
+select to_timestamp('2020-30-365', 'yyyy-dd-DDD');
+select to_timestamp('2020-12-350', 'yyyy-MM-DDD');
+select to_timestamp('2020-12-31-366', 'yyyy-MM-dd-DDD');
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-special.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-special.sql
new file mode 100644
index 000000000000..2699274bb7c9
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/datetime-special.sql
@@ -0,0 +1,10 @@
+-- special date and timestamp values that are not allowed in the SQL standard
+-- these tests are put in this separated file because they don't work in JDBC environment
+
+-- date with year outside [0000-9999]
+select date'999999-03-18', date'-0001-1-28', date'0015';
+select make_date(999999, 3, 18), make_date(-1, 1, 28);
+
+-- timestamp with year outside [0000-9999]
+select timestamp'-1969-12-31 16:00:00', timestamp'-0015-03-18 16:00:00', timestamp'-000001', timestamp'99999-03-18T12:03:17';
+select make_timestamp(-1969, 12, 31, 16, 0, 0.0), make_timestamp(-15, 3, 18, 16, 0, 0.0), make_timestamp(99999, 3, 18, 12, 3, 17.0);
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/decimalArithmeticOperations.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/decimalArithmeticOperations.sql
new file mode 100644
index 000000000000..13bbd9d81b79
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/decimalArithmeticOperations.sql
@@ -0,0 +1,125 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1.0 as a, 0.0 as b;
+
+-- division, remainder and pmod by 0 return NULL
+select a / b from t;
+select a % b from t;
+select pmod(a, b) from t;
+
+-- tests for decimals handling in operations
+create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet;
+
+insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
+ (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789);
+
+-- test decimal operations
+select id, a+b, a-b, a*b, a/b from decimals_test order by id;
+
+-- test operations between decimals and constants
+select id, a*10, b/10 from decimals_test order by id;
+
+-- test operations on constants
+select 10.3 * 3.0;
+select 10.3000 * 3.0;
+select 10.30000 * 30.0;
+select 10.300000000000000000 * 3.000000000000000000;
+select 10.300000000000000000 * 3.0000000000000000000;
+select 2.35E10 * 1.0;
+
+-- arithmetic operations causing an overflow return NULL
+select (5e36BD + 0.1) + 5e36BD;
+select (-4e36BD - 0.1) - 7e36BD;
+select 12345678901234567890.0 * 12345678901234567890.0;
+select 1e35BD / 0.1;
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD;
+
+-- arithmetic operations causing a precision loss are truncated
+select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345;
+select 123456789123456789.1234567890 * 1.123456789123456789;
+select 12345678912345.123456789123 / 0.000000012345678;
+
+-- union decimal type
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) + CAST(90 AS DECIMAL(3, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) - CAST(-90 AS DECIMAL(3, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) * CAST(10 AS DECIMAL(3, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) / CAST(10 AS DECIMAL(3, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) % CAST(3 AS DECIMAL(5, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT pmod(CAST(10 AS DECIMAL(10, 2)), CAST(3 AS DECIMAL(5, 1)));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) div CAST(3 AS DECIMAL(5, 1));
+
+-- return NULL instead of rounding, according to old Spark versions' behavior
+set spark.sql.decimalOperations.allowPrecisionLoss=false;
+
+-- test decimal operations
+select /*+ COALESCE(1) */ id, a+b, a-b, a*b, a/b from decimals_test order by id;
+
+-- test operations between decimals and constants
+select id, a*10, b/10 from decimals_test order by id;
+
+-- test operations on constants
+select 10.3 * 3.0;
+select 10.3000 * 3.0;
+select 10.30000 * 30.0;
+select 10.300000000000000000 * 3.000000000000000000;
+select 10.300000000000000000 * 3.0000000000000000000;
+select 2.35E10 * 1.0;
+
+-- arithmetic operations causing an overflow return NULL
+select (5e36BD + 0.1) + 5e36BD;
+select (-4e36BD - 0.1) - 7e36BD;
+select 12345678901234567890.0 * 12345678901234567890.0;
+select 1e35BD / 0.1;
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD;
+
+-- arithmetic operations causing a precision loss return NULL
+select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345;
+select 123456789123456789.1234567890 * 1.123456789123456789;
+select 12345678912345.123456789123 / 0.000000012345678;
+
+select 1.0123456789012345678901234567890123456e36BD / 0.1;
+select 1.0123456789012345678901234567890123456e35BD / 1.0;
+select 1.0123456789012345678901234567890123456e34BD / 1.0;
+select 1.0123456789012345678901234567890123456e33BD / 1.0;
+select 1.0123456789012345678901234567890123456e32BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 0.1;
+select 1.0123456789012345678901234567890123456e31BD / 10.0;
+
+drop table decimals_test;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-part-after-analyze.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-part-after-analyze.sql
new file mode 100644
index 000000000000..f4239da90627
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-part-after-analyze.sql
@@ -0,0 +1,34 @@
+CREATE TABLE t (key STRING, value STRING, ds STRING, hr INT) USING parquet
+ PARTITIONED BY (ds, hr);
+
+INSERT INTO TABLE t PARTITION (ds='2017-08-01', hr=10)
+VALUES ('k1', 100), ('k2', 200), ('k3', 300);
+
+INSERT INTO TABLE t PARTITION (ds='2017-08-01', hr=11)
+VALUES ('k1', 101), ('k2', 201), ('k3', 301), ('k4', 401);
+
+INSERT INTO TABLE t PARTITION (ds='2017-09-01', hr=5)
+VALUES ('k1', 102), ('k2', 202);
+
+DESC EXTENDED t PARTITION (ds='2017-08-01', hr=10);
+
+-- Collect stats for a single partition
+ANALYZE TABLE t PARTITION (ds='2017-08-01', hr=10) COMPUTE STATISTICS;
+
+DESC EXTENDED t PARTITION (ds='2017-08-01', hr=10);
+
+-- Collect stats for 2 partitions
+ANALYZE TABLE t PARTITION (ds='2017-08-01') COMPUTE STATISTICS;
+
+DESC EXTENDED t PARTITION (ds='2017-08-01', hr=10);
+DESC EXTENDED t PARTITION (ds='2017-08-01', hr=11);
+
+-- Collect stats for all partitions
+ANALYZE TABLE t PARTITION (ds, hr) COMPUTE STATISTICS;
+
+DESC EXTENDED t PARTITION (ds='2017-08-01', hr=10);
+DESC EXTENDED t PARTITION (ds='2017-08-01', hr=11);
+DESC EXTENDED t PARTITION (ds='2017-09-01', hr=5);
+
+-- DROP TEST TABLES/VIEWS
+DROP TABLE t;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-query.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-query.sql
new file mode 100644
index 000000000000..f8fbb43ba2f8
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-query.sql
@@ -0,0 +1,31 @@
+-- Test tables
+CREATE table desc_temp1 (key int COMMENT 'column_comment', val string) USING PARQUET;
+CREATE table desc_temp2 (key int, val string) USING PARQUET;
+
+-- Simple Describe query
+DESC SELECT key, key + 1 as plusone FROM desc_temp1;
+DESC QUERY SELECT * FROM desc_temp2;
+DESC SELECT key, COUNT(*) as count FROM desc_temp1 group by key;
+DESC SELECT 10.00D as col1;
+DESC QUERY SELECT key FROM desc_temp1 UNION ALL select CAST(1 AS DOUBLE);
+DESC QUERY VALUES(1.00D, 'hello') as tab1(col1, col2);
+DESC QUERY FROM desc_temp1 a SELECT *;
+DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s;
+DESCRIBE QUERY WITH s AS (SELECT * from desc_temp1) SELECT * FROM s;
+DESCRIBE SELECT * FROM (FROM desc_temp2 select * select *);
+
+-- Error cases.
+DESCRIBE INSERT INTO desc_temp1 values (1, 'val1');
+DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2;
+DESCRIBE
+ FROM desc_temp1 a
+ insert into desc_temp1 select *
+ insert into desc_temp2 select *;
+
+-- Explain
+EXPLAIN DESC QUERY SELECT * FROM desc_temp2 WHERE key > 0;
+EXPLAIN EXTENDED DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s;
+
+-- cleanup
+DROP TABLE desc_temp1;
+DROP TABLE desc_temp2;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-table-after-alter-table.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-table-after-alter-table.sql
new file mode 100644
index 000000000000..69bff6656c43
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe-table-after-alter-table.sql
@@ -0,0 +1,29 @@
+CREATE TABLE table_with_comment (a STRING, b INT, c STRING, d STRING) USING parquet COMMENT 'added';
+
+DESC FORMATTED table_with_comment;
+
+-- ALTER TABLE BY MODIFYING COMMENT
+ALTER TABLE table_with_comment SET TBLPROPERTIES("comment"= "modified comment", "type"= "parquet");
+
+DESC FORMATTED table_with_comment;
+
+-- DROP TEST TABLE
+DROP TABLE table_with_comment;
+
+-- CREATE TABLE WITHOUT COMMENT
+CREATE TABLE table_comment (a STRING, b INT) USING parquet;
+
+DESC FORMATTED table_comment;
+
+-- ALTER TABLE BY ADDING COMMENT
+ALTER TABLE table_comment SET TBLPROPERTIES(comment = "added comment");
+
+DESC formatted table_comment;
+
+-- ALTER UNSET PROPERTIES COMMENT
+ALTER TABLE table_comment UNSET TBLPROPERTIES IF EXISTS ('comment');
+
+DESC FORMATTED table_comment;
+
+-- DROP TEST TABLE
+DROP TABLE table_comment;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe.sql
new file mode 100644
index 000000000000..dbe5bc840bce
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/describe.sql
@@ -0,0 +1,145 @@
+CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet
+ OPTIONS (a '1', b '2', password 'password')
+ PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS
+ COMMENT 'table_comment'
+ TBLPROPERTIES (t 'test', password 'password');
+
+CREATE TEMPORARY VIEW temp_v AS SELECT * FROM t;
+
+CREATE TEMPORARY VIEW temp_Data_Source_View
+ USING org.apache.spark.sql.sources.DDLScanSource
+ OPTIONS (
+ From '1',
+ To '10',
+ Table 'test1');
+
+CREATE VIEW v AS SELECT * FROM t;
+
+ALTER TABLE t SET TBLPROPERTIES (e = '3');
+
+ALTER TABLE t ADD PARTITION (c='Us', d=1);
+
+DESCRIBE t;
+
+DESCRIBE EXTENDED t AS JSON;
+
+-- AnalysisException: describe table as json must be extended
+DESCRIBE t AS JSON;
+
+-- AnalysisException: describe col as json unsupported
+DESC FORMATTED t a AS JSON;
+
+DESC default.t;
+
+DESC TABLE t;
+
+DESC FORMATTED t;
+
+DESC EXTENDED t;
+
+ALTER TABLE t UNSET TBLPROPERTIES (e);
+
+DESC EXTENDED t;
+
+ALTER TABLE t UNSET TBLPROPERTIES (comment);
+
+DESC EXTENDED t;
+
+DESC t PARTITION (c='Us', d=1);
+
+DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON;
+
+DESC EXTENDED t PARTITION (c='Us', d=1);
+
+DESC FORMATTED t PARTITION (c='Us', d=1);
+
+DESC EXTENDED t PARTITION (C='Us', D=1);
+
+-- NoSuchPartitionException: Partition not found in table
+DESC t PARTITION (c='Us', d=2);
+
+-- AnalysisException: Partition spec is invalid
+DESC t PARTITION (c='Us');
+
+-- ParseException: PARTITION specification is incomplete
+DESC t PARTITION (c='Us', d);
+
+-- DESC Temp View
+
+DESC temp_v;
+
+DESC TABLE temp_v;
+
+DESC FORMATTED temp_v;
+
+DESC EXTENDED temp_v;
+
+DESC temp_Data_Source_View;
+
+-- AnalysisException DESC PARTITION is not allowed on a temporary view
+DESC temp_v PARTITION (c='Us', d=1);
+
+-- DESC Persistent View
+
+DESC v;
+
+DESC TABLE v;
+
+DESC FORMATTED v;
+
+DESC EXTENDED v;
+
+-- AnalysisException DESC PARTITION is not allowed on a view
+DESC v PARTITION (c='Us', d=1);
+
+-- Explain Describe Table
+EXPLAIN DESC t;
+EXPLAIN DESC EXTENDED t;
+EXPLAIN EXTENDED DESC t;
+EXPLAIN DESCRIBE t b;
+EXPLAIN DESCRIBE t PARTITION (c='Us', d=2);
+EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON;
+
+-- Show column default values
+CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING parquet COMMENT 'table_comment';
+
+DESC d;
+
+DESC EXTENDED d;
+
+DESC TABLE EXTENDED d;
+
+DESC FORMATTED d;
+
+-- Show column default values with newlines in the string
+CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT 42) USING parquet COMMENT 'table_comment';
+
+DESC e;
+
+DESC EXTENDED e;
+
+DESC TABLE EXTENDED e;
+
+DESC FORMATTED e;
+
+CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C;
+
+DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33');
+
+DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON;
+
+-- DROP TEST TABLES/VIEWS
+
+DROP VIEW temp_v;
+
+DROP VIEW temp_Data_Source_View;
+
+DROP VIEW v;
+
+DROP TABLE t;
+
+DROP TABLE d;
+
+DROP TABLE e;
+
+DROP TABLE f;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/double-quoted-identifiers-enabled.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/double-quoted-identifiers-enabled.sql
new file mode 100644
index 000000000000..9547d011c76e
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/double-quoted-identifiers-enabled.sql
@@ -0,0 +1,3 @@
+--SET spark.sql.ansi.doubleQuotedIdentifiers=true
+--IMPORT double-quoted-identifiers.sql
+
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/double-quoted-identifiers.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/double-quoted-identifiers.sql
new file mode 100644
index 000000000000..ffb52b403346
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/double-quoted-identifiers.sql
@@ -0,0 +1,57 @@
+-- All these should error out in the parser
+SELECT 1 FROM "not_exist";
+
+USE SCHEMA "not_exist";
+
+ALTER TABLE "not_exist" ADD COLUMN not_exist int;
+
+ALTER TABLE not_exist ADD COLUMN "not_exist" int;
+
+SELECT 1 AS "not_exist" FROM not_exist;
+
+SELECT 1 FROM not_exist AS X("hello");
+
+SELECT "not_exist"();
+
+SELECT "not_exist".not_exist();
+
+-- All these should error out in analysis
+SELECT 1 FROM `hello`;
+
+USE SCHEMA `not_exist`;
+
+ALTER TABLE `not_exist` ADD COLUMN not_exist int;
+
+ALTER TABLE not_exist ADD COLUMN `not_exist` int;
+
+SELECT 1 AS `not_exist` FROM `not_exist`;
+
+SELECT 1 FROM not_exist AS X(`hello`);
+
+SELECT `not_exist`();
+
+SELECT `not_exist`.not_exist();
+
+-- Strings in various situations all work
+SELECT "hello";
+
+CREATE TEMPORARY VIEW v(c1 COMMENT "hello") AS SELECT 1;
+DROP VIEW v;
+
+SELECT INTERVAL "1" YEAR;
+
+-- Single ticks still work
+SELECT 'hello';
+
+CREATE TEMPORARY VIEW v(c1 COMMENT 'hello') AS SELECT 1;
+DROP VIEW v;
+
+SELECT INTERVAL '1' YEAR;
+
+-- A whole scenario
+CREATE SCHEMA "myschema";
+CREATE TEMPORARY VIEW "myview"("c1") AS
+ WITH "v"("a") AS (SELECT 1) SELECT "a" FROM "v";
+SELECT "a1" AS "a2" FROM "myview" AS "atab"("a1");
+DROP TABLE "myview";
+DROP SCHEMA "myschema";
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/except-all.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/except-all.sql
new file mode 100644
index 000000000000..e28f0721a644
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/except-all.sql
@@ -0,0 +1,160 @@
+CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
+ (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1);
+CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
+ (1), (2), (2), (3), (5), (5), (null) AS tab2(c1);
+CREATE TEMPORARY VIEW tab3 AS SELECT * FROM VALUES
+ (1, 2),
+ (1, 2),
+ (1, 3),
+ (2, 3),
+ (2, 2)
+ AS tab3(k, v);
+CREATE TEMPORARY VIEW tab4 AS SELECT * FROM VALUES
+ (1, 2),
+ (2, 3),
+ (2, 2),
+ (2, 2),
+ (2, 20)
+ AS tab4(k, v);
+
+-- Basic EXCEPT ALL
+SELECT * FROM tab1
+EXCEPT ALL
+SELECT * FROM tab2;
+
+-- MINUS ALL (synonym for EXCEPT)
+SELECT * FROM tab1
+MINUS ALL
+SELECT * FROM tab2;
+
+-- EXCEPT ALL same table in both branches
+SELECT * FROM tab1
+EXCEPT ALL
+SELECT * FROM tab2 WHERE c1 IS NOT NULL;
+
+-- Empty left relation
+SELECT * FROM tab1 WHERE c1 > 5
+EXCEPT ALL
+SELECT * FROM tab2;
+
+-- Empty right relation
+SELECT * FROM tab1
+EXCEPT ALL
+SELECT * FROM tab2 WHERE c1 > 6;
+
+-- Type Coerced ExceptAll
+SELECT * FROM tab1
+EXCEPT ALL
+SELECT CAST(1 AS BIGINT);
+
+-- Error as types of two side are not compatible
+SELECT * FROM tab1
+EXCEPT ALL
+SELECT array(1);
+
+-- Basic
+SELECT * FROM tab3
+EXCEPT ALL
+SELECT * FROM tab4;
+
+-- Basic
+SELECT * FROM tab4
+EXCEPT ALL
+SELECT * FROM tab3;
+
+-- EXCEPT ALL + INTERSECT
+SELECT * FROM tab4
+EXCEPT ALL
+SELECT * FROM tab3
+INTERSECT DISTINCT
+SELECT * FROM tab4;
+
+-- EXCEPT ALL + EXCEPT
+SELECT * FROM tab4
+EXCEPT ALL
+SELECT * FROM tab3
+EXCEPT DISTINCT
+SELECT * FROM tab4;
+
+-- Chain of set operations
+SELECT * FROM tab3
+EXCEPT ALL
+SELECT * FROM tab4
+UNION ALL
+SELECT * FROM tab3
+EXCEPT DISTINCT
+SELECT * FROM tab4;
+
+-- Mismatch on number of columns across both branches
+SELECT k FROM tab3
+EXCEPT ALL
+SELECT k, v FROM tab4;
+
+-- Chain of set operations
+SELECT * FROM tab3
+EXCEPT ALL
+SELECT * FROM tab4
+UNION
+SELECT * FROM tab3
+EXCEPT DISTINCT
+SELECT * FROM tab4;
+
+-- Using MINUS ALL
+SELECT * FROM tab3
+MINUS ALL
+SELECT * FROM tab4
+UNION
+SELECT * FROM tab3
+MINUS DISTINCT
+SELECT * FROM tab4;
+
+-- Chain of set operations
+SELECT * FROM tab3
+EXCEPT ALL
+SELECT * FROM tab4
+EXCEPT DISTINCT
+SELECT * FROM tab3
+EXCEPT DISTINCT
+SELECT * FROM tab4;
+
+-- Join under except all. Should produce empty resultset since both left and right sets
+-- are same.
+SELECT *
+FROM (SELECT tab3.k,
+ tab4.v
+ FROM tab3
+ JOIN tab4
+ ON tab3.k = tab4.k)
+EXCEPT ALL
+SELECT *
+FROM (SELECT tab3.k,
+ tab4.v
+ FROM tab3
+ JOIN tab4
+ ON tab3.k = tab4.k);
+
+-- Join under except all (2)
+SELECT *
+FROM (SELECT tab3.k,
+ tab4.v
+ FROM tab3
+ JOIN tab4
+ ON tab3.k = tab4.k)
+EXCEPT ALL
+SELECT *
+FROM (SELECT tab4.v AS k,
+ tab3.k AS v
+ FROM tab3
+ JOIN tab4
+ ON tab3.k = tab4.k);
+
+-- Group by under ExceptAll
+SELECT v FROM tab3 GROUP BY v
+EXCEPT ALL
+SELECT k FROM tab4 GROUP BY k;
+
+-- Clean-up
+DROP VIEW IF EXISTS tab1;
+DROP VIEW IF EXISTS tab2;
+DROP VIEW IF EXISTS tab3;
+DROP VIEW IF EXISTS tab4;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/except.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/except.sql
new file mode 100644
index 000000000000..ffdf1f4f3d24
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/except.sql
@@ -0,0 +1,76 @@
+-- Tests different scenarios of except operation
+create temporary view t1 as select * from values
+ ("one", 1),
+ ("two", 2),
+ ("three", 3),
+ ("one", NULL)
+ as t1(k, v);
+
+create temporary view t2 as select * from values
+ ("one", 1),
+ ("two", 22),
+ ("one", 5),
+ ("one", NULL),
+ (NULL, 5)
+ as t2(k, v);
+
+
+-- Except operation that will be replaced by left anti join
+SELECT * FROM t1 EXCEPT SELECT * FROM t2;
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT * FROM t1 EXCEPT SELECT * FROM t1 where v <> 1 and v <> 2;
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT * FROM t1 where v <> 1 and v <> 22 EXCEPT SELECT * FROM t1 where v <> 2 and v >= 3;
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT t1.* FROM t1, t2 where t1.k = t2.k
+EXCEPT
+SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != 'one';
+
+
+-- Except operation that will be replaced by left anti join
+SELECT * FROM t2 where v >= 1 and v <> 22 EXCEPT SELECT * FROM t1;
+
+
+-- Except operation that will be replaced by left anti join
+SELECT (SELECT min(k) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
+MINUS
+SELECT (SELECT min(k) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = 'one';
+
+
+-- Except operation that will be replaced by left anti join
+SELECT t1.k
+FROM t1
+WHERE t1.v <= (SELECT max(t2.v)
+ FROM t2
+ WHERE t2.k = t1.k)
+MINUS
+SELECT t1.k
+FROM t1
+WHERE t1.v >= (SELECT min(t2.v)
+ FROM t2
+ WHERE t2.k = t1.k);
+
+-- SPARK-32638: corrects references when adding aliases in WidenSetOperationTypes
+CREATE OR REPLACE TEMPORARY VIEW t3 AS VALUES (decimal(1)) tbl(v);
+SELECT t.v FROM (
+ SELECT v FROM t3
+ EXCEPT
+ SELECT v + v AS v FROM t3
+) t;
+
+SELECT SUM(t.v) FROM (
+ SELECT v FROM t3
+ EXCEPT
+ SELECT v + v AS v FROM t3
+) t;
+
+-- Clean-up
+DROP VIEW IF EXISTS t1;
+DROP VIEW IF EXISTS t2;
+DROP VIEW IF EXISTS t3;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/execute-immediate.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/execute-immediate.sql
new file mode 100644
index 000000000000..f7d27c6c0b03
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/execute-immediate.sql
@@ -0,0 +1,149 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
+ (10, 'name1', named_struct('f1', 1, 's2', named_struct('f2', 101, 'f3', 'a'))),
+ (20, 'name2', named_struct('f1', 2, 's2', named_struct('f2', 202, 'f3', 'b'))),
+ (30, 'name3', named_struct('f1', 3, 's2', named_struct('f2', 303, 'f3', 'c'))),
+ (40, 'name4', named_struct('f1', 4, 's2', named_struct('f2', 404, 'f3', 'd'))),
+ (50, 'name5', named_struct('f1', 5, 's2', named_struct('f2', 505, 'f3', 'e'))),
+ (60, 'name6', named_struct('f1', 6, 's2', named_struct('f2', 606, 'f3', 'f'))),
+ (70, 'name7', named_struct('f1', 7, 's2', named_struct('f2', 707, 'f3', 'g')))
+AS tbl_view(id, name, data);
+CREATE TABLE x (id INT) USING csv;
+
+DECLARE sql_string STRING;
+SET VAR sql_string = 'SELECT * from tbl_view where name = \'name1\'';
+
+-- test commands
+EXECUTE IMMEDIATE 'SET spark.sql.ansi.enabled=true';
+EXECUTE IMMEDIATE 'CREATE TEMPORARY VIEW IDENTIFIER(:tblName) AS SELECT id, name FROM tbl_view' USING 'tbl_view_tmp' as tblName;
+EXECUTE IMMEDIATE 'SELECT * FROM tbl_view_tmp';
+
+EXECUTE IMMEDIATE 'REFRESH TABLE IDENTIFIER(:tblName)' USING 'x' as tblName;
+
+-- test execute immediate without parameters
+EXECUTE IMMEDIATE sql_string;
+EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = \'name1\'';
+
+-- test positional parameters
+SET VAR sql_string = 'SELECT * from tbl_view where name = ? or name = ?';
+DECLARE a STRING;
+SET VAR a = 'name1';
+EXECUTE IMMEDIATE sql_string USING 'name1', 'name3';
+EXECUTE IMMEDIATE sql_string USING a, 'name2';
+EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING 'name1', 'name3';
+EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING a, 'name2';
+EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING (a, 'name2');
+-- test positional command
+EXECUTE IMMEDIATE 'INSERT INTO x VALUES(?)' USING 1;
+SELECT * from x;
+
+-- test named parameters
+SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second';
+DECLARE b INT;
+SET VAR b = 40;
+EXECUTE IMMEDIATE sql_string USING 40 as second, 'name7' as first;
+EXECUTE IMMEDIATE sql_string USING b as second, 'name7' as first;
+EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first or id = :second' USING 40 as second, 'name7' as first;
+EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first or id = :second' USING 'name7' as first, b as second;
+-- named parameter used multiple times
+EXECUTE IMMEDIATE 'SELECT tbl_view.*, :first as p FROM tbl_view WHERE name = :first' USING 'name7' as first;
+
+-- test named command and setup for next test
+EXECUTE IMMEDIATE 'SET VAR sql_string = ?' USING 'SELECT id from tbl_view where name = :first';
+SELECT sql_string;
+
+-- test into
+DECLARE res_id INT;
+EXECUTE IMMEDIATE sql_string INTO res_id USING 'name7' as first;
+SELECT res_id;
+EXECUTE IMMEDIATE sql_string INTO res_id USING a as first;
+SELECT res_id;
+
+-- test into without using
+SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second';
+EXECUTE IMMEDIATE 'SELECT 42' INTO res_id;
+SELECT res_id;
+
+-- multiple INTOs
+EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO b, a USING 10;
+SELECT b, a;
+
+-- use AS for using positional params
+EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where id = ? AND name = ?' USING b as first, a;
+
+-- empty query
+EXECUTE IMMEDIATE 'SELECT 42 WHERE 2 = 1' INTO res_id;
+SELECT res_id;
+
+-- implicit casting
+EXECUTE IMMEDIATE 'SELECT \'1707\'' INTO res_id;
+SELECT res_id;
+
+-- test errors
+-- string to int error
+EXECUTE IMMEDIATE 'SELECT \'invalid_cast_error_expected\'' INTO res_id;
+
+-- require query when using INTO
+EXECUTE IMMEDIATE 'INSERT INTO x VALUES (?)' INTO res_id USING 1;
+
+-- use column in using - should fail as we expect variable here
+EXECUTE IMMEDIATE 'SELECT * FROM tbl_view WHERE ? = id' USING id;
+
+-- either positional or named parameters must be used
+EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where ? = id and :first = name' USING 1, 'name2' as first;
+
+-- all paramerers must be named
+EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where :first = name' USING 1, 'name2' as first;
+
+-- internal syntax error
+EXECUTE IMMEDIATE 'SELCT Fa';
+
+-- internal syntax error - test that both parseQuery and parsePlan fail
+EXECUTE IMMEDIATE 'SELCT Fa' INTO res_id;
+
+-- Parameter passed must be STRING
+EXECUTE IMMEDIATE b;
+
+-- test expressions should fail with parser error
+SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second';
+SET VAR a = 'na';
+
+-- expressions not supported - feature not supported
+EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first' USING CONCAT(a , "me1") as first;
+EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first' USING (SELECT 42) as first, 'name2' as second;
+
+-- INTO variables not matching scalar types
+EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO a, b USING 10;
+
+-- INTO does not support braces - parser error
+EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO (a, b) USING 10;
+
+-- Error too many rows
+EXECUTE IMMEDIATE 'SELECT id FROM tbl_view' INTO res_id;
+
+-- Error mismatch cardinality
+EXECUTE IMMEDIATE 'SELECT id, data.f1 FROM tbl_view' INTO res_id;
+EXECUTE IMMEDIATE 'SELECT id FROM tbl_view' INTO res_id, b;
+
+-- duplicate aliases
+EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :first' USING 10 as first, 20 as first;
+
+-- no alias
+DECLARE p = 10;
+EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :p' USING p;
+
+-- mixing literals and named parameters
+EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :p' USING p, 'p';
+
+-- duplicate into entry
+EXECUTE IMMEDIATE 'SELECT id, data.f1 FROM tbl_view WHERE id = 10' INTO res_id, res_id;
+
+-- nested execute immediate
+EXECUTE IMMEDIATE 'EXECUTE IMMEDIATE \'SELECT id FROM tbl_view WHERE id = ? USING 10\'';
+
+-- sqlString is null
+SET VAR sql_string = null;
+EXECUTE IMMEDIATE sql_string;
+
+DROP TABLE x;
\ No newline at end of file
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain-aqe.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain-aqe.sql
new file mode 100644
index 000000000000..7aef901da4fb
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain-aqe.sql
@@ -0,0 +1,4 @@
+--IMPORT explain.sql
+
+--SET spark.sql.adaptive.enabled=true
+--SET spark.sql.maxMetadataStringLength = 500
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain-cbo.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain-cbo.sql
new file mode 100644
index 000000000000..eeb2180f7a54
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain-cbo.sql
@@ -0,0 +1,27 @@
+--SET spark.sql.cbo.enabled=true
+--SET spark.sql.maxMetadataStringLength = 500
+
+CREATE TABLE explain_temp1(a INT, b INT) USING PARQUET;
+CREATE TABLE explain_temp2(c INT, d INT) USING PARQUET;
+
+ANALYZE TABLE explain_temp1 COMPUTE STATISTICS FOR ALL COLUMNS;
+ANALYZE TABLE explain_temp2 COMPUTE STATISTICS FOR ALL COLUMNS;
+
+EXPLAIN COST WITH max_store_sales AS
+(
+ SELECT max(csales) tpcds_cmax
+ FROM (
+ SELECT sum(b) csales
+ FROM explain_temp1 WHERE a < 100
+ ) x
+),
+best_ss_customer AS
+(
+ SELECT c
+ FROM explain_temp2
+ WHERE d > (SELECT * FROM max_store_sales)
+)
+SELECT c FROM best_ss_customer;
+
+DROP TABLE explain_temp1;
+DROP TABLE explain_temp2;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain.sql
new file mode 100644
index 000000000000..698ca009b4ff
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/explain.sql
@@ -0,0 +1,136 @@
+--SET spark.sql.codegen.wholeStage = true
+--SET spark.sql.adaptive.enabled = false
+--SET spark.sql.maxMetadataStringLength = 500
+
+-- Test tables
+CREATE table explain_temp1 (key int, val int) USING PARQUET;
+CREATE table explain_temp2 (key int, val int) USING PARQUET;
+CREATE table explain_temp3 (key int, val int) USING PARQUET;
+CREATE table explain_temp4 (key int, val string) USING PARQUET;
+CREATE table explain_temp5 (key int) USING PARQUET PARTITIONED BY(val string);
+
+SET spark.sql.codegen.wholeStage = true;
+
+-- distinct func
+EXPLAIN EXTENDED
+ SELECT sum(distinct val)
+ FROM explain_temp1;
+
+-- single table
+EXPLAIN FORMATTED
+ SELECT key, max(val)
+ FROM explain_temp1
+ WHERE key > 0
+ GROUP BY key
+ ORDER BY key;
+
+EXPLAIN FORMATTED
+ SELECT key, max(val)
+ FROM explain_temp1
+ WHERE key > 0
+ GROUP BY key
+ HAVING max(val) > 0;
+
+-- simple union
+EXPLAIN FORMATTED
+ SELECT key, val FROM explain_temp1 WHERE key > 0
+ UNION
+ SELECT key, val FROM explain_temp1 WHERE key > 1;
+
+-- Join
+EXPLAIN FORMATTED
+ SELECT *
+ FROM explain_temp1 a,
+ explain_temp2 b
+ WHERE a.key = b.key;
+
+EXPLAIN FORMATTED
+ SELECT *
+ FROM explain_temp1 a
+ LEFT OUTER JOIN explain_temp2 b
+ ON a.key = b.key;
+
+-- Subqueries nested.
+EXPLAIN FORMATTED
+ SELECT *
+ FROM explain_temp1
+ WHERE key = (SELECT max(key)
+ FROM explain_temp2
+ WHERE key = (SELECT max(key)
+ FROM explain_temp3
+ WHERE val > 0)
+ AND val = 2)
+ AND val > 3;
+
+EXPLAIN FORMATTED
+ SELECT *
+ FROM explain_temp1
+ WHERE key = (SELECT max(key)
+ FROM explain_temp2
+ WHERE val > 0)
+ OR
+ key = (SELECT avg(key)
+ FROM explain_temp3
+ WHERE val > 0);
+
+-- Reuse subquery
+EXPLAIN FORMATTED
+ SELECT (SELECT Avg(key) FROM explain_temp1) + (SELECT Avg(key) FROM explain_temp1)
+ FROM explain_temp1;
+
+-- CTE + ReuseExchange
+EXPLAIN FORMATTED
+ WITH cte1 AS (
+ SELECT *
+ FROM explain_temp1
+ WHERE key > 10
+ )
+ SELECT * FROM cte1 a, cte1 b WHERE a.key = b.key;
+
+EXPLAIN FORMATTED
+ WITH cte1 AS (
+ SELECT key, max(val)
+ FROM explain_temp1
+ WHERE key > 10
+ GROUP BY key
+ )
+ SELECT * FROM cte1 a, cte1 b WHERE a.key = b.key;
+
+-- A spark plan which has innerChildren other than subquery
+EXPLAIN FORMATTED
+ CREATE VIEW explain_view AS
+ SELECT key, val FROM explain_temp1;
+
+-- HashAggregate
+EXPLAIN FORMATTED
+ SELECT
+ COUNT(val) + SUM(key) as TOTAL,
+ COUNT(key) FILTER (WHERE val > 1)
+ FROM explain_temp1;
+
+-- ObjectHashAggregate
+EXPLAIN FORMATTED
+ SELECT key, sort_array(collect_set(val))[0]
+ FROM explain_temp4
+ GROUP BY key;
+
+-- SortAggregate
+EXPLAIN FORMATTED
+ SELECT key, MIN(val)
+ FROM explain_temp4
+ GROUP BY key;
+
+-- V1 Write
+EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4;
+
+-- cleanup
+DROP TABLE explain_temp1;
+DROP TABLE explain_temp2;
+DROP TABLE explain_temp3;
+DROP TABLE explain_temp4;
+DROP TABLE explain_temp5;
+
+-- SPARK-35479: Format PartitionFilters IN strings in scan nodes
+CREATE table t(v array) USING PARQUET;
+EXPLAIN SELECT * FROM t WHERE v IN (array('a'), null);
+DROP TABLE t;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/extract.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/extract.sql
new file mode 100644
index 000000000000..5fd3362c6964
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/extract.sql
@@ -0,0 +1,167 @@
+CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c, to_timestamp_ntz('2011-05-06 07:08:09.1234567') as ntz, interval 10 year 20 month as i, interval 30 day 40 hour 50 minute 6.7890 second as j;
+
+select extract(year from c), extract(year from ntz), extract(year from i) from t;
+select extract(y from c), extract(y from ntz), extract(y from i) from t;
+select extract(years from c), extract(years from ntz), extract(years from i) from t;
+select extract(yr from c), extract(yr from ntz), extract(yr from i) from t;
+select extract(yrs from c), extract(yrs from ntz), extract(yrs from i) from t;
+
+select extract(yearofweek from c), extract(yearofweek from ntz) from t;
+
+select extract(quarter from c), extract(quarter from ntz) from t;
+select extract(qtr from c), extract(qtr from ntz) from t;
+
+select extract(month from c), extract(month from ntz), extract(month from i) from t;
+select extract(mon from c), extract(mon from ntz), extract(mon from i) from t;
+select extract(mons from c), extract(mons from ntz), extract(mons from i) from t;
+select extract(months from c), extract(months from ntz), extract(months from i) from t;
+
+select extract(week from c), extract(week from ntz) from t;
+select extract(w from c), extract(w from ntz) from t;
+select extract(weeks from c), extract(weeks from ntz) from t;
+
+select extract(day from c), extract(day from ntz), extract(day from j) from t;
+select extract(d from c), extract(d from ntz), extract(d from j) from t;
+select extract(days from c), extract(days from ntz), extract(days from j) from t;
+
+select extract(dayofweek from c), extract(dayofweek from ntz) from t;
+select extract(dow from c), extract(dow from ntz) from t;
+
+select extract(dayofweek_iso from c), extract(dayofweek_iso from ntz) from t;
+select extract(dow_iso from c), extract(dow_iso from ntz) from t;
+
+select extract(doy from c), extract(doy from ntz) from t;
+
+select extract(hour from c), extract(hour from ntz), extract(hour from j) from t;
+select extract(h from c), extract(h from ntz), extract(h from j) from t;
+select extract(hours from c), extract(hours from ntz), extract(hours from j) from t;
+select extract(hr from c), extract(hr from ntz), extract(hr from j) from t;
+select extract(hrs from c), extract(hrs from ntz), extract(hrs from j) from t;
+
+select extract(minute from c), extract(minute from ntz), extract(minute from j) from t;
+select extract(m from c), extract(m from ntz), extract(m from j) from t;
+select extract(min from c), extract(min from ntz), extract(min from j) from t;
+select extract(mins from c), extract(mins from ntz), extract(mins from j) from t;
+select extract(minutes from c), extract(minutes from ntz), extract(minutes from j) from t;
+
+select extract(second from c), extract(second from ntz), extract(second from j) from t;
+select extract(s from c), extract(s from ntz), extract(s from j) from t;
+select extract(sec from c), extract(sec from ntz), extract(sec from j) from t;
+select extract(seconds from c), extract(seconds from ntz), extract(seconds from j) from t;
+select extract(secs from c), extract(secs from ntz), extract(secs from j) from t;
+
+select extract(not_supported from c) from t;
+select extract(not_supported from i) from t;
+select extract(not_supported from j) from t;
+
+select date_part('year', c), date_part('year', ntz), date_part('year', i) from t;
+select date_part('y', c), date_part('y', ntz), date_part('y', i) from t;
+select date_part('years', c), date_part('years', ntz), date_part('years', i) from t;
+select date_part('yr', c), date_part('yr', ntz), date_part('yr', i) from t;
+select date_part('yrs', c), date_part('yrs', ntz), date_part('yrs', i) from t;
+
+select date_part('yearofweek', c), date_part('yearofweek', ntz) from t;
+
+select date_part('quarter', c), date_part('quarter', ntz) from t;
+select date_part('qtr', c), date_part('qtr', ntz) from t;
+
+select date_part('month', c), date_part('month', ntz), date_part('month', i) from t;
+select date_part('mon', c), date_part('mon', ntz), date_part('mon', i) from t;
+select date_part('mons', c), date_part('mons', ntz), date_part('mons', i) from t;
+select date_part('months', c), date_part('months', ntz), date_part('months', i) from t;
+
+select date_part('week', c), date_part('week', ntz) from t;
+select date_part('w', c), date_part('w', ntz) from t;
+select date_part('weeks', c), date_part('weeks', ntz) from t;
+
+select date_part('day', c), date_part('day', ntz), date_part('day', j) from t;
+select date_part('d', c), date_part('d', ntz), date_part('d', j) from t;
+select date_part('days', c), date_part('days', ntz), date_part('days', j) from t;
+
+select date_part('dayofweek', c), date_part('dayofweek', ntz) from t;
+select date_part('dow', c), date_part('dow', ntz) from t;
+
+select date_part('dayofweek_iso', c), date_part('dayofweek_iso', ntz) from t;
+select date_part('dow_iso', c), date_part('dow_iso', ntz) from t;
+
+select date_part('doy', c), date_part('doy', ntz) from t;
+
+select date_part('hour', c), date_part('hour', ntz), date_part('hour', j) from t;
+select date_part('h', c), date_part('h', ntz), date_part('h', j) from t;
+select date_part('hours', c), date_part('hours', ntz), date_part('hours', j) from t;
+select date_part('hr', c), date_part('hr', ntz), date_part('hr', j) from t;
+select date_part('hrs', c), date_part('hrs', ntz), date_part('hrs', j) from t;
+
+select date_part('minute', c), date_part('minute', ntz), date_part('minute', j) from t;
+select date_part('m', c), date_part('m', ntz), date_part('m', j) from t;
+select date_part('min', c), date_part('min', ntz), date_part('min', j) from t;
+select date_part('mins', c), date_part('mins', ntz), date_part('mins', j) from t;
+select date_part('minutes', c), date_part('minutes', ntz), date_part('minutes', j) from t;
+
+select date_part('second', c), date_part('second', ntz), date_part('second', j) from t;
+select date_part('s', c), date_part('s', ntz), date_part('s', j) from t;
+select date_part('sec', c), date_part('sec', ntz), date_part('sec', j) from t;
+select date_part('seconds', c), date_part('seconds', ntz), date_part('seconds', j) from t;
+select date_part('secs', c), date_part('secs', ntz), date_part('secs', j) from t;
+
+select date_part('not_supported', c) from t;
+select date_part(c, c) from t;
+select date_part(null, c) from t;
+
+select date_part(i, i) from t;
+select date_part(null, i) from t;
+
+-- In SPARK-31476, we've supported extract('field', source), too
+select extract('year', c) from t;
+select extract('quarter', c) from t;
+select extract('month', c) from t;
+select extract('week', c) from t;
+select extract('day', c) from t;
+select extract('days', c) from t;
+select extract('dayofweek', c) from t;
+select extract('dow', c) from t;
+select extract('doy', c) from t;
+select extract('hour', c) from t;
+select extract('minute', c) from t;
+select extract('second', c) from t;
+
+select c - j from t;
+select day(c - j) from t;
+select extract(day from c - j) from t;
+select extract(month from to_timestamp(c) - i) from t;
+select extract(second from to_timestamp(c) - j) from t;
+
+-- extract fields from year-month/day-time intervals
+select extract(YEAR from interval '2-1' YEAR TO MONTH);
+select date_part('YEAR', interval '2-1' YEAR TO MONTH);
+select extract(YEAR from -interval '2-1' YEAR TO MONTH);
+select extract(MONTH from interval '2-1' YEAR TO MONTH);
+select date_part('MONTH', interval '2-1' YEAR TO MONTH);
+select extract(MONTH from -interval '2-1' YEAR TO MONTH);
+select date_part(NULL, interval '2-1' YEAR TO MONTH);
+
+-- invalid
+select extract(DAY from interval '2-1' YEAR TO MONTH);
+select date_part('DAY', interval '2-1' YEAR TO MONTH);
+select date_part('not_supported', interval '2-1' YEAR TO MONTH);
+
+select extract(DAY from interval '123 12:34:56.789123123' DAY TO SECOND);
+select date_part('DAY', interval '123 12:34:56.789123123' DAY TO SECOND);
+select extract(DAY from -interval '123 12:34:56.789123123' DAY TO SECOND);
+select extract(HOUR from interval '123 12:34:56.789123123' DAY TO SECOND);
+select date_part('HOUR', interval '123 12:34:56.789123123' DAY TO SECOND);
+select extract(HOUR from -interval '123 12:34:56.789123123' DAY TO SECOND);
+select extract(MINUTE from interval '123 12:34:56.789123123' DAY TO SECOND);
+select date_part('MINUTE', interval '123 12:34:56.789123123' DAY TO SECOND);
+select extract(MINUTE from -interval '123 12:34:56.789123123' DAY TO SECOND);
+select extract(SECOND from interval '123 12:34:56.789123123' DAY TO SECOND);
+select date_part('SECOND', interval '123 12:34:56.789123123' DAY TO SECOND);
+select extract(SECOND from -interval '123 12:34:56.789123123' DAY TO SECOND);
+select date_part(NULL, interval '123 12:34:56.789123123' DAY TO SECOND);
+
+select extract(MONTH from interval '123 12:34:56.789123123' DAY TO SECOND);
+select date_part('not_supported', interval '123 12:34:56.789123123' DAY TO SECOND);
+
+-- alias for date_part
+select datepart('year', c), datepart('year', ntz), datepart('year', i) from t;
+select datepart('DAY', interval '123 12:34:56.789123123' DAY TO SECOND);
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-analytics.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-analytics.sql
new file mode 100644
index 000000000000..d6381e59e0d8
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-analytics.sql
@@ -0,0 +1,93 @@
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b);
+
+-- CUBE on overlapping columns
+SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE;
+
+SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE;
+
+-- ROLLUP on overlapping columns
+SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;
+
+SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP;
+
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings);
+
+-- ROLLUP
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year;
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year;
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year;
+
+-- CUBE
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year;
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year;
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year;
+
+-- GROUPING SETS
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year);
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ());
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course);
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year);
+
+-- Partial ROLLUP/CUBE/GROUPING SETS
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year;
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year;
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year;
+
+-- GROUPING SETS with aggregate functions containing groupBy columns
+SELECT course, SUM(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
+SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
+
+-- GROUPING/GROUPING_ID
+SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
+GROUP BY CUBE(course, year);
+SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year;
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year;
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year;
+
+-- GROUPING/GROUPING_ID in having clause
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year;
+SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0;
+SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0;
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0;
+
+-- GROUPING/GROUPING_ID in orderBy clause
+SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, year;
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, year;
+SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course);
+SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course);
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year;
+
+-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
+SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2);
+SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b);
+SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k);
+
+-- GROUP BY use mixed Separate columns and CUBE/ROLLUP/Gr
+SELECT a, b, count(1) FROM testData GROUP BY a, b, CUBE(a, b);
+SELECT a, b, count(1) FROM testData GROUP BY a, b, ROLLUP(a, b);
+SELECT a, b, count(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b);
+SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b);
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ());
+SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ());
+SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ());
+
+-- Support nested CUBE/ROLLUP/GROUPING SETS in GROUPING SETS
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b));
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()));
+
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)));
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b));
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)));
+
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b));
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()));
+SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ());
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-alias.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-alias.sql
new file mode 100644
index 000000000000..75afc82f998d
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-alias.sql
@@ -0,0 +1,58 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+-- GROUP BY alias should work with case insensitive names
+SELECT a from testData GROUP BY A;
+
+-- Aliases in SELECT could be used in GROUP BY
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1;
+SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k;
+SELECT a as alias FROM testData GROUP BY ALIAS;
+
+-- GROUP BY literal
+SELECT a AS k FROM testData GROUP BY 'k';
+SELECT 1 AS k FROM testData GROUP BY 'k';
+
+-- GROUP BY alias with the function name
+SELECT concat_ws(' ', a, b) FROM testData GROUP BY `concat_ws( , a, b)`;
+
+-- GROUP BY column with name same as an alias used in the project list
+SELECT 1 AS a FROM testData GROUP BY a;
+SELECT 1 AS a FROM testData GROUP BY `a`;
+
+-- GROUP BY implicit alias
+SELECT 1 GROUP BY `1`;
+
+-- GROUP BY alias with the subquery name
+SELECT (SELECT a FROM testData) + (SELECT b FROM testData) group by `(scalarsubquery() + scalarsubquery())`;
+
+-- GROUP BY with expression subqueries
+SELECT a, count(*) FROM testData GROUP BY (SELECT b FROM testData);
+SELECT a, count(*) FROM testData GROUP BY a, (SELECT b FROM testData);
+SELECT a, count(*) FROM testData GROUP BY a, (SELECT b FROM testData LIMIT 1);
+SELECT a, count(*) FROM testData GROUP BY a, b IN (SELECT a FROM testData);
+SELECT a, count(*) FROM testData GROUP BY a, a IN (SELECT b FROM testData);
+SELECT a, count(*) FROM testData GROUP BY a, EXISTS(SELECT b FROM testData);
+
+-- GROUP BY alias with invalid col in SELECT list
+SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k;
+
+-- Aggregate functions cannot be used in GROUP BY
+SELECT COUNT(b) AS k FROM testData GROUP BY k;
+
+-- Ordinal is replaced correctly when grouping by alias of a literal
+SELECT MAX(col1), 3 as abc FROM VALUES(1),(2),(3),(4) GROUP BY col1 % abc;
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Check analysis exceptions
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
+SELECT 1 GROUP BY `1`;
+SELECT 1 AS col FROM testData GROUP BY `col`;
+
+-- GROUP BY attribute takes precedence over alias
+SELECT 1 AS a FROM testData GROUP BY `a`;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all-duckdb.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all-duckdb.sql
new file mode 100644
index 000000000000..ac6d425a956f
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all-duckdb.sql
@@ -0,0 +1,29 @@
+-- group by all
+-- additional test cases from DuckDB, given to us by Mosha
+
+create temporary view integers as select * from values
+ (0, 1),
+ (0, 2),
+ (1, 3),
+ (1, NULL)
+ as integers(g, i);
+
+
+SELECT g, SUM(i) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT g, SUM(i), COUNT(*), COUNT(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT i%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT (g+i)%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT (g+i)%2 + SUM(i), SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT g, i, g%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1, 2, 3, 4;
+
+SELECT c0 FROM (SELECT 1 c0) t0 GROUP BY ALL HAVING c0>0;
+
+SELECT c0 FROM (SELECT 1 c0, 1 c1 UNION ALL SELECT 1, 2) t0 GROUP BY ALL ORDER BY c0;
+
+SELECT c0 FROM (SELECT 1 c0, 1 c1 UNION ALL SELECT 1, 2) t0 GROUP BY ALL HAVING c1>0 ORDER BY c0;
+
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all-mosha.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all-mosha.sql
new file mode 100644
index 000000000000..451f745a97ee
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all-mosha.sql
@@ -0,0 +1,32 @@
+-- group by all
+-- additional group by star test cases from Mosha
+create temporary view stuff as select * from values
+ (42, 9.75, 'hello world', '1970-08-07', '13.37', array(1,20,300)),
+ (1337, 1.2345, 'oh no', '2000-01-01', '42.0', array(4000,50000,600000)),
+ (42, 13.37, 'test', '1970-08-07', '1234567890', array(7000000,80000000,900000000))
+ as stuff(i, f, s, t, d, a);
+
+SELECT 100 * SUM(i) + SUM(f) / COUNT(s) AS f1, i AS f2 FROM stuff GROUP BY ALL ORDER BY f2;
+
+SELECT i + 1 AS i1, COUNT(i - 2) ci, f / i AS fi, SUM(i + f) sif FROM stuff GROUP BY ALL ORDER BY 1, 3;
+
+SELECT i AS i, COUNT(i) ci, f AS f, SUM(i + f) sif FROM stuff GROUP BY ALL ORDER BY 1, i, 2, ci, 3, f, 4, sif;
+
+SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), octet_length(d), size(a) FROM stuff
+GROUP BY ALL ORDER BY 1, 3, 4, 5, 6, 2;
+
+-- unlike Mosha, I'm failing this case because IMO it is too implicit to automatically group by i.
+SELECT i + SUM(f) FROM stuff GROUP BY ALL;
+
+SELECT s AS s, COUNT(*) c FROM stuff GROUP BY ALL HAVING SUM(f) > 0 ORDER BY s;
+
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING si > 2;
+
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING si < 2;
+
+-- negative test, i shouldn't propagate through the aggregate so the having should fail
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING i > 2;
+
+-- negative test, i shouldn't propagate through the aggregate so the order by should fail
+SELECT SUM(i) si FROM stuff GROUP BY ALL ORDER BY i DESC;
+
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all.sql
new file mode 100644
index 000000000000..4400c0b57866
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-all.sql
@@ -0,0 +1,85 @@
+-- group by all
+-- see https://www.linkedin.com/posts/mosha_duckdb-firebolt-snowflake-activity-7009615821006131200-VQ0o
+
+create temporary view data as select * from values
+ ("USA", "San Francisco", "Reynold", 1, 11.0),
+ ("USA", "San Francisco", "Matei", 2, 12.0),
+ ("USA", "Berkeley", "Xiao", 3, 13.0),
+ ("China", "Hangzhou", "Wenchen", 4, 14.0),
+ ("China", "Shanghai", "Shanghaiese", 5, 15.0),
+ ("Korea", "Seoul", "Hyukjin", 6, 16.0),
+ ("UK", "London", "Sean", 7, 17.0)
+ as data(country, city, name, id, power);
+
+-- basic
+select country, count(*) from data group by ALL;
+
+-- different case
+select country, count(*) from data group by aLl;
+
+-- a column named "all" would still work
+select all, city, count(*) from (select country as all, city, id from data) group by all, city;
+
+-- a column named "all" should take precedence over the normal group by all expansion
+-- if all refers to the column, then the following should return 3 rows.
+-- if all refers to the global aggregate, then 1 row.
+SELECT count(1) FROM VALUES(1), (2), (3) AS T(all) GROUP BY all;
+
+-- two grouping columns and two aggregates
+select country, city, count(*), sum(power) from data group by all;
+
+-- different ordering
+select count(*), country, city, sum(power) from data group by all;
+
+-- alias in grouping column
+select country as con, count(*) from data group by all;
+
+
+-- alias in aggregate column
+select country, count(*) as cnt from data group by all;
+
+-- scalar expression in grouping column
+select upper(country), count(*) as powerup from data group by all;
+
+-- scalar expression in aggregate column
+select country, sum(power) + 10 as powerup from data group by all;
+
+-- group by all without aggregate, which should just become a distinct
+select country, city from data group by all;
+
+-- make sure aliases are propagated through correctly
+select con, powerup from
+ (select country as con, sum(power) + 10 as powerup from data group by all);
+
+-- having
+select country, count(id) as cnt from data group by all having cnt > 1;
+
+-- no grouping column
+select count(id) from data group by all;
+
+-- a more complex no grouping column case
+select count(id + power / 2) * 3 from data group by all;
+
+-- no grouping column on an empty relation
+-- this should still return one row because we rewrite this to a global aggregate, as opposed to
+-- returning zero row (grouping by a constant).
+select count(*) from (select * from data where country = "DNS") group by all;
+
+-- complex cases that we choose not to infer; fail with a useful error message
+select id + count(*) from data group by all;
+
+-- an even more complex case that we choose not to infer; fail with a useful error message
+select (id + id) / 2 + count(*) * 2 from data group by all;
+
+-- uncorrelated subquery should work
+select country, (select count(*) from data) as cnt, count(id) as cnt_id from data group by all;
+
+-- correlated subquery should also work
+select country, (select count(*) from data d1 where d1.country = d2.country), count(id) from data d2 group by all;
+
+-- correlated subquery together with aggregate function doesn't work.
+-- make sure we report the right error UNRESOLVED_ALL_IN_GROUP_BY, rather than some random subquery error.
+select (select count(*) from data d1 where d1.country = d2.country) + count(id) from data d2 group by all;
+
+-- SELECT list contains unresolved column, should not report UNRESOLVED_ALL_IN_GROUP_BY
+select non_exist from data group by all;
\ No newline at end of file
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-filter.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-filter.sql
new file mode 100644
index 000000000000..6d8d0790ec8e
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-filter.sql
@@ -0,0 +1,179 @@
+-- Test filter clause for aggregate expression with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+--CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
+--CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+CREATE OR REPLACE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
+ (100, "emp 1", date "2005-01-01", 100.00D, 10),
+ (100, "emp 1", date "2005-01-01", 100.00D, 10),
+ (200, "emp 2", date "2003-01-01", 200.00D, 10),
+ (300, "emp 3", date "2002-01-01", 300.00D, 20),
+ (400, "emp 4", date "2005-01-01", 400.00D, 30),
+ (500, "emp 5", date "2001-01-01", 400.00D, NULL),
+ (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
+ (700, "emp 7", date "2010-01-01", 400.00D, 100),
+ (800, "emp 8", date "2016-01-01", 150.00D, 70)
+AS EMP(id, emp_name, hiredate, salary, dept_id);
+
+CREATE OR REPLACE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
+ (10, "dept 1", "CA"),
+ (20, "dept 2", "NY"),
+ (30, "dept 3", "TX"),
+ (40, "dept 4 - unassigned", "OR"),
+ (50, "dept 5 - unassigned", "NJ"),
+ (70, "dept 7", "FL")
+AS DEPT(dept_id, dept_name, state);
+
+CREATE OR REPLACE TEMPORARY VIEW FilterExpressionTestData AS SELECT * FROM VALUES
+ (1, 2, "asd"),
+ (3, 4, "fgh"),
+ (5, 6, "jkl")
+AS FilterExpressionTestData(num1, num2, str);
+
+-- Aggregate with filter and empty GroupBy expressions.
+SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData;
+SELECT COUNT(a) FILTER (WHERE a = 1), COUNT(b) FILTER (WHERE a > 1) FROM testData;
+SELECT COUNT(id) FILTER (WHERE hiredate = date "2001-01-01") FROM emp;
+SELECT COUNT(id) FILTER (WHERE hiredate = to_date('2001-01-01 00:00:00')) FROM emp;
+SELECT COUNT(id) FILTER (WHERE hiredate = to_timestamp("2001-01-01 00:00:00")) FROM emp;
+SELECT COUNT(id) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd") = "2001-01-01") FROM emp;
+SELECT COUNT(DISTINCT id) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd HH:mm:ss") = "2001-01-01 00:00:00") FROM emp;
+SELECT COUNT(DISTINCT id), COUNT(DISTINCT id) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd HH:mm:ss") = "2001-01-01 00:00:00") FROM emp;
+SELECT COUNT(DISTINCT id) FILTER (WHERE hiredate = to_timestamp("2001-01-01 00:00:00")), COUNT(DISTINCT id) FILTER (WHERE hiredate = to_date('2001-01-01 00:00:00')) FROM emp;
+SELECT SUM(salary), COUNT(DISTINCT id), COUNT(DISTINCT id) FILTER (WHERE hiredate = date "2001-01-01") FROM emp;
+SELECT COUNT(DISTINCT 1) FILTER (WHERE a = 1) FROM testData;
+SELECT COUNT(DISTINCT id) FILTER (WHERE true) FROM emp;
+SELECT COUNT(DISTINCT id) FILTER (WHERE false) FROM emp;
+SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id = 40) FROM emp;
+SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id = 40) FROM emp;
+SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id > 0) FROM emp;
+SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id > 0) FROM emp;
+SELECT COUNT(DISTINCT id), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id = 40) FROM emp;
+SELECT COUNT(DISTINCT id), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id = 40) FROM emp;
+SELECT COUNT(DISTINCT id), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id > 0) FROM emp;
+SELECT COUNT(DISTINCT id), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id > 0) FROM emp;
+
+-- Aggregate with filter and non-empty GroupBy expressions.
+SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData GROUP BY a;
+SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b;
+SELECT COUNT(a) FILTER (WHERE a >= 0), COUNT(b) FILTER (WHERE a >= 3) FROM testData GROUP BY a;
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > date "2003-01-01") FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > to_date("2003-01-01")) FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > to_timestamp("2003-01-01 00:00:00")) FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(salary) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd") > "2003-01-01") FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(DISTINCT salary) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd HH:mm:ss") > "2001-01-01 00:00:00") FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(DISTINCT salary), SUM(DISTINCT salary) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd HH:mm:ss") > "2001-01-01 00:00:00") FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(DISTINCT salary) FILTER (WHERE hiredate > date "2001-01-01"), SUM(DISTINCT salary) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd HH:mm:ss") > "2001-01-01 00:00:00") FROM emp GROUP BY dept_id;
+SELECT dept_id, COUNT(id), SUM(DISTINCT salary), SUM(DISTINCT salary) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd") > "2001-01-01") FROM emp GROUP BY dept_id;
+SELECT b, COUNT(DISTINCT 1) FILTER (WHERE a = 1) FROM testData GROUP BY b;
+
+-- Aggregate with filter and grouped by literals.
+SELECT 'foo', COUNT(a) FILTER (WHERE b <= 2) FROM testData GROUP BY 1;
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= date "2003-01-01") FROM emp GROUP BY 1;
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= to_date("2003-01-01")) FROM emp GROUP BY 1;
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= to_timestamp("2003-01-01")) FROM emp GROUP BY 1;
+
+-- Aggregate with filter, more than one aggregate function goes with distinct.
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary), sum(salary) filter (where id > 200) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary), sum(salary) filter (where id + dept_id > 500) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary) filter (where salary < 400.00D), sum(salary) filter (where id > 200) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary) filter (where salary < 400.00D), sum(salary) filter (where id + dept_id > 500) from emp group by dept_id;
+select dept_id, count(distinct emp_name) filter (where id > 200), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct emp_name) filter (where id + dept_id > 500), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct emp_name) filter (where id > 200), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct emp_name) filter (where id + dept_id > 500), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct emp_name) filter (where id > 200), sum(salary), sum(salary) filter (where id > 200) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct emp_name) filter (where id + dept_id > 500), sum(salary), sum(salary) filter (where id > 200) from emp group by dept_id;
+select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct hiredate), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct hiredate) filter (where hiredate > date "2003-01-01"), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct hiredate) filter (where hiredate > date "2003-01-01"), sum(salary) filter (where salary < 400.00D) from emp group by dept_id;
+select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct hiredate) filter (where hiredate > date "2003-01-01"), sum(salary) filter (where salary < 400.00D), sum(salary) filter (where id > 200) from emp group by dept_id;
+select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct emp_name), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct emp_name) filter (where hiredate > date "2003-01-01"), sum(salary) from emp group by dept_id;
+select dept_id, sum(distinct (id + dept_id)) filter (where id > 200), count(distinct hiredate), sum(salary) from emp group by dept_id;
+select dept_id, sum(distinct (id + dept_id)) filter (where id > 200), count(distinct hiredate) filter (where hiredate > date "2003-01-01"), sum(salary) from emp group by dept_id;
+select dept_id, avg(distinct (id + dept_id)) filter (where id > 200), count(distinct hiredate) filter (where hiredate > date "2003-01-01"), sum(salary) filter (where salary < 400.00D) from emp group by dept_id;
+select dept_id, count(distinct emp_name, hiredate) filter (where id > 200), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct emp_name, hiredate) filter (where id > 0), sum(salary) from emp group by dept_id;
+select dept_id, count(distinct 1), count(distinct 1) filter (where id > 200), sum(salary) from emp group by dept_id;
+
+-- Aggregate with filter and grouped by literals (hash aggregate), here the input table is filtered using WHERE.
+SELECT 'foo', APPROX_COUNT_DISTINCT(a) FILTER (WHERE b >= 0) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with filter and grouped by literals (sort aggregate), here the input table is filtered using WHERE.
+SELECT 'foo', MAX(STRUCT(a)) FILTER (WHERE b >= 1) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with filter and complex GroupBy expressions.
+SELECT a + b, COUNT(b) FILTER (WHERE b >= 2) FROM testData GROUP BY a + b;
+SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1;
+SELECT a + 1 + 1, COUNT(b) FILTER (WHERE b > 0) FROM testData GROUP BY a + 1;
+
+-- Aggregate with filter, foldable input and multiple distinct groups.
+SELECT COUNT(DISTINCT b) FILTER (WHERE b > 0), COUNT(DISTINCT b, c) FILTER (WHERE b > 0 AND c > 2)
+FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Check analysis exceptions
+SELECT a AS k, COUNT(b) FILTER (WHERE b > 0) FROM testData GROUP BY k;
+
+-- Aggregate with filter contains exists subquery
+SELECT emp.dept_id,
+ avg(salary),
+ avg(salary) FILTER (WHERE id > (SELECT 200))
+FROM emp
+GROUP BY dept_id;
+
+SELECT emp.dept_id,
+ avg(salary),
+ avg(salary) FILTER (WHERE emp.dept_id = (SELECT dept_id FROM dept LIMIT 1))
+FROM emp
+GROUP BY dept_id;
+
+-- [SPARK-30220] Support Filter expression uses IN/EXISTS predicate sub-queries
+SELECT emp.dept_id,
+ avg(salary),
+ avg(salary) FILTER (WHERE EXISTS (SELECT state
+ FROM dept
+ WHERE dept.dept_id = emp.dept_id))
+FROM emp
+GROUP BY dept_id;
+
+SELECT emp.dept_id,
+ Sum(salary),
+ Sum(salary) FILTER (WHERE NOT EXISTS (SELECT state
+ FROM dept
+ WHERE dept.dept_id = emp.dept_id))
+FROM emp
+GROUP BY dept_id;
+
+SELECT emp.dept_id,
+ avg(salary),
+ avg(salary) FILTER (WHERE emp.dept_id IN (SELECT DISTINCT dept_id
+ FROM dept))
+FROM emp
+GROUP BY dept_id;
+SELECT emp.dept_id,
+ Sum(salary),
+ Sum(salary) FILTER (WHERE emp.dept_id NOT IN (SELECT DISTINCT dept_id
+ FROM dept))
+FROM emp
+GROUP BY dept_id;
+
+-- Aggregate with filter is subquery
+SELECT t1.b FROM (SELECT COUNT(b) FILTER (WHERE a >= 2) AS b FROM testData) t1;
+
+-- SPARK-47256: Wrong use of FILTER expression in aggregate functions
+SELECT count(num1) FILTER (WHERE rand(int(num2)) > 1) FROM FilterExpressionTestData;
+
+SELECT sum(num1) FILTER (WHERE str) FROM FilterExpressionTestData;
+
+SELECT sum(num1) FILTER (WHERE max(num2) > 1) FROM FilterExpressionTestData;
+
+SELECT sum(num1) FILTER (WHERE nth_value(num2, 2) OVER(ORDER BY num2) > 1) FROM FilterExpressionTestData;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-ordinal.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-ordinal.sql
new file mode 100644
index 000000000000..b773396c050d
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by-ordinal.sql
@@ -0,0 +1,96 @@
+-- group by ordinal positions
+
+create temporary view data as select * from values
+ (1, 1),
+ (1, 2),
+ (2, 1),
+ (2, 2),
+ (3, 1),
+ (3, 2)
+ as data(a, b);
+
+-- basic case
+select a, sum(b) from data group by 1;
+
+-- constant case
+select 1, 2, sum(b) from data group by 1, 2;
+
+-- duplicate group by column
+select a, 1, sum(b) from data group by a, 1;
+select a, 1, sum(b) from data group by 1, 2;
+
+-- group by a non-aggregate expression's ordinal
+select a, b + 2, count(2) from data group by a, 2;
+
+-- with alias
+select a as aa, b + 2 as bb, count(2) from data group by 1, 2;
+
+-- foldable non-literal: this should be the same as no grouping.
+select sum(b) from data group by 1 + 0;
+
+-- negative cases: ordinal out of range
+select a, b from data group by -1;
+select a, b from data group by 0;
+select a, b from data group by 3;
+
+-- negative case: position is an aggregate expression
+select a, b, sum(b) from data group by 3;
+select a, b, sum(b) + 2 from data group by 3;
+
+-- negative case: nondeterministic expression
+select a, rand(0), sum(b)
+from
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2;
+
+-- negative case: star
+select * from data group by a, b, 1;
+
+-- group by ordinal followed by order by
+select a, count(a) from (select 1 as a) tmp group by 1 order by 1;
+
+-- group by ordinal followed by having
+select count(a), a from (select 1 as a) tmp group by 2 having a > 0;
+
+-- mixed cases: group-by ordinals and aliases
+select a, a AS k, count(b) from data group by k, 1;
+
+-- can use ordinal in CUBE
+select a, b, count(1) from data group by cube(1, 2);
+
+-- mixed cases: can use ordinal in CUBE
+select a, b, count(1) from data group by cube(1, b);
+
+-- can use ordinal with cube
+select a, b, count(1) from data group by 1, 2 with cube;
+
+-- can use ordinal in ROLLUP
+select a, b, count(1) from data group by rollup(1, 2);
+
+-- mixed cases: can use ordinal in ROLLUP
+select a, b, count(1) from data group by rollup(1, b);
+
+-- can use ordinal with rollup
+select a, b, count(1) from data group by 1, 2 with rollup;
+
+-- can use ordinal in GROUPING SETS
+select a, b, count(1) from data group by grouping sets((1), (2), (1, 2));
+
+-- mixed cases: can use ordinal in GROUPING SETS
+select a, b, count(1) from data group by grouping sets((1), (b), (a, 2));
+
+select a, b, count(1) from data group by a, 2 grouping sets((1), (b), (a, 2));
+
+-- range error
+select a, b, count(1) from data group by a, -1;
+
+select a, b, count(1) from data group by a, 3;
+
+select a, b, count(1) from data group by cube(-1, 2);
+
+select a, b, count(1) from data group by cube(1, 3);
+
+-- turn off group by ordinal
+set spark.sql.groupByOrdinal=false;
+
+-- can now group by negative literal
+select sum(b) from data group by -1;
diff --git a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by.sql b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by.sql
new file mode 100644
index 000000000000..c1b5e88c43f2
--- /dev/null
+++ b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/group-by.sql
@@ -0,0 +1,329 @@
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+-- Aggregate with empty GroupBy expressions.
+SELECT a, COUNT(b) FROM testData;
+SELECT COUNT(a), COUNT(b) FROM testData;
+
+-- Aggregate with non-empty GroupBy expressions.
+SELECT a, COUNT(b) FROM testData GROUP BY a;
+SELECT a, COUNT(b) FROM testData GROUP BY b;
+SELECT COUNT(a), COUNT(b) FROM testData GROUP BY a;
+
+-- Aggregate grouped by literals.
+SELECT 'foo', COUNT(a) FROM testData GROUP BY 1;
+
+-- Aggregate grouped by literals (whole stage code generation).
+SELECT 'foo' FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate grouped by literals (hash aggregate).
+SELECT 'foo', APPROX_COUNT_DISTINCT(a) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate grouped by literals (sort aggregate).
+SELECT 'foo', MAX(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with complex GroupBy expressions.
+SELECT a + b, COUNT(b) FROM testData GROUP BY a + b;
+SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1;
+SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1;
+
+-- struct() in group by
+SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa);
+
+-- Aggregate with nulls.
+SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
+FROM testData;
+
+-- Aggregate with foldable input and multiple distinct groups.
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
+SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a;
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Aggregate with empty input and non-empty GroupBy expressions.
+SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a;
+
+-- Aggregate with empty input and empty GroupBy expressions.
+SELECT COUNT(1) FROM testData WHERE false;
+SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t;
+
+-- Aggregate with empty GroupBy expressions and filter on top
+SELECT 1 from (
+ SELECT 1 AS z,
+ MIN(a.x)
+ FROM (select 1 as x) a
+ WHERE false
+) b
+where b.z != b.z;
+
+-- SPARK-24369 multiple distinct aggregations having the same argument set
+SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
+ FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y);
+
+-- SPARK-25708 HAVING without GROUP BY means global aggregate
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true;
+
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
+
+-- Test data
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+ (1, true), (1, false),
+ (2, true),
+ (3, false), (3, null),
+ (4, null), (4, null),
+ (5, null), (5, true), (5, false) AS test_agg(k, v);
+
+-- empty table
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;
+
+-- all null values
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;
+
+-- aggregates are null Filtering
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;
+
+-- group by
+SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;
+
+-- having
+SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false;
+SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) IS NULL;
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT k,
+ Every(v) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Any(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY k;
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT k,
+ Every(v) AS every
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Every(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY k;
+
+-- input type checking Int
+SELECT every(1);
+
+-- input type checking Short
+SELECT some(1S);
+
+-- input type checking Long
+SELECT any(1L);
+
+-- input type checking String
+SELECT every("true");
+
+-- input type checking Decimal
+SELECT bool_and(1.0);
+
+-- input type checking double
+SELECT bool_or(1.0D);
+
+-- every/some/any aggregates/bool_and/bool_or are supported as windows expression.
+SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+
+-- Having referencing aggregate expressions is ok.
+SELECT count(*) FROM test_agg HAVING count(*) > 1L;
+SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true;
+
+-- Aggrgate expressions can be referenced through an alias
+SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L;
+
+-- Error when aggregate expressions are in where clause directly
+SELECT count(*) FROM test_agg WHERE count(*) > 1L;
+SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L;
+SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1;
+
+-- Aggregate with multiple distinct decimal columns
+SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col);
+
+-- SPARK-34581: Don't optimize out grouping expressions from aggregate expressions without aggregate function
+SELECT not(a IS NULL), count(*) AS c
+FROM testData
+GROUP BY a IS NULL;
+
+SELECT if(not(a IS NULL), rand(0), 1), count(*) AS c
+FROM testData
+GROUP BY a IS NULL;
+
+
+-- Histogram aggregates with different numeric input types
+SELECT
+ histogram_numeric(col, 2) as histogram_2,
+ histogram_numeric(col, 3) as histogram_3,
+ histogram_numeric(col, 5) as histogram_5,
+ histogram_numeric(col, 10) as histogram_10
+FROM VALUES
+ (1), (2), (3), (4), (5), (6), (7), (8), (9), (10),
+ (11), (12), (13), (14), (15), (16), (17), (18), (19), (20),
+ (21), (22), (23), (24), (25), (26), (27), (28), (29), (30),
+ (31), (32), (33), (34), (35), (3), (37), (38), (39), (40),
+ (41), (42), (43), (44), (45), (46), (47), (48), (49), (50) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1), (2), (3) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1L), (2L), (3L) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1F), (2F), (3F) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1D), (2D), (3D) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1S), (2S), (3S) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS BYTE)), (CAST(2 AS BYTE)), (CAST(3 AS BYTE)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS TINYINT)), (CAST(2 AS TINYINT)), (CAST(3 AS TINYINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS SMALLINT)), (CAST(2 AS SMALLINT)), (CAST(3 AS SMALLINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS BIGINT)), (CAST(2 AS BIGINT)), (CAST(3 AS BIGINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+ (CAST(1 AS DECIMAL(4, 2))), (CAST(2 AS DECIMAL(4, 2))), (CAST(3 AS DECIMAL(4, 2))) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
+ (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '100-00' YEAR TO MONTH),
+ (INTERVAL '110-00' YEAR TO MONTH), (INTERVAL '120-00' YEAR TO MONTH) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '12 20:4:0' DAY TO SECOND),
+ (INTERVAL '12 21:4:0' DAY TO SECOND), (INTERVAL '12 22:4:0' DAY TO SECOND) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (NULL), (NULL), (NULL) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS INT)), (CAST(NULL AS INT)), (CAST(NULL AS INT)) AS tab(col);
+
+-- SPARK-27974: Support ANSI Aggregate Function: array_agg
+SELECT
+ collect_list(col),
+ array_agg(col)
+FROM VALUES
+ (1), (2), (1) AS tab(col);
+SELECT
+ a,
+ collect_list(b),
+ array_agg(b)
+FROM VALUES
+ (1,4),(2,3),(1,4),(2,4) AS v(a,b)
+GROUP BY a;
+
+-- SPARK-44846: PushFoldableIntoBranches in complex grouping expressions cause bindReference error
+SELECT c * 2 AS d
+FROM (
+ SELECT if(b > 1, 1, b) AS c
+ FROM (
+ SELECT if(a < 0, 0, a) AS b
+ FROM VALUES (-1), (1), (2) AS t1(a)
+ ) t2
+ GROUP BY b
+ ) t3
+GROUP BY c;
+
+-- SPARK-45599: Check that "weird" doubles group and sort as desired.
+SELECT col1, count(*) AS cnt
+FROM VALUES
+ (0.0),
+ (-0.0),
+ (double('NaN')),
+ (double('NaN')),
+ (double('Infinity')),
+ (double('Infinity')),
+ (-double('Infinity')),
+ (-double('Infinity'))
+GROUP BY col1
+ORDER BY col1
+;
+
+-- SC-170296: Verify that group by works when MapType is inside complex type for column type
+-- ARRAY