diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 11c8204d2c93c..821aa2615ee2d 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -1390,6 +1390,12 @@ ], "sqlState" : "42805" }, + "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE" : { + "message" : [ + "The expression cannot be used as a grouping expression because its data type is not an orderable data type." + ], + "sqlState" : "42822" + }, "HLL_INVALID_INPUT_SKETCH_BUFFER" : { "message" : [ "Invalid call to ; only valid HLL sketch buffers are supported as inputs (such as those produced by the `hll_sketch_agg` function)." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 85b9e85ac4201..e9c17bf4f93b2 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -846,6 +846,12 @@ GROUP BY `` refers to an expression `` that contains an aggregat GROUP BY position `` is not in select list (valid range is [1, ``]). +### GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE + +[SQLSTATE: 42822](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +The expression `` cannot be used as a grouping expression because its data type `` is not an orderable data type. + ### HLL_INVALID_INPUT_SKETCH_BUFFER [SQLSTATE: 22546](sql-error-conditions-sqlstates.html#class-22-data-exception) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala index 9319b104024a0..385e0f00695a3 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, NullType, StructType, UserDefinedType} +import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, NullType, StructType, UserDefinedType, VariantType} object OrderUtils { /** @@ -24,6 +24,7 @@ object OrderUtils { */ def isOrderable(dataType: DataType): Boolean = dataType match { case NullType => true + case VariantType => false case dt: AtomicType => true case struct: StructType => struct.fields.forall(f => isOrderable(f.dataType)) case array: ArrayType => isOrderable(array.elementType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala index eaf10973e71de..258bc0ed8fe73 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharUtils} import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors} -import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType} +import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType, VariantType} import org.apache.spark.unsafe.types.UTF8String object ExprUtils extends QueryErrorsBase { @@ -193,6 +193,15 @@ object ExprUtils extends QueryErrorsBase { messageParameters = Map("sqlExpr" -> expr.sql)) } + // Check if the data type of expr is orderable. + if (expr.dataType.existsRecursively(_.isInstanceOf[VariantType])) { + expr.failAnalysis( + errorClass = "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE", + messageParameters = Map( + "sqlExpr" -> toSQLExpr(expr), + "dataType" -> toSQLType(expr.dataType))) + } + if (!expr.deterministic) { // This is just a sanity check, our analysis rule PullOutNondeterministic should // already pull out those nondeterministic expressions and evaluate them in diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala index f95111f9f91be..d1ac259becccd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala @@ -251,4 +251,33 @@ class VariantSuite extends QueryTest with SharedSparkSession { } } } + + test("group/order/join variant are disabled") { + var ex = intercept[AnalysisException] { + spark.sql("select parse_json('') group by 1") + } + assert(ex.getErrorClass == "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE") + + ex = intercept[AnalysisException] { + spark.sql("select parse_json('') order by 1") + } + assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + + ex = intercept[AnalysisException] { + spark.sql("select parse_json('') sort by 1") + } + assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + + ex = intercept[AnalysisException] { + spark.sql("with t as (select 1 as a, parse_json('') as v) " + + "select rank() over (partition by a order by v) from t") + } + assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + + ex = intercept[AnalysisException] { + spark.sql("with t as (select parse_json('') as v) " + + "select t1.v from t as t1 join t as t2 on t1.v = t2.v") + } + assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + } }