diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index f450dd80a8b13..56cd224dd8c53 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -2545,7 +2545,7 @@ case class Encode(value: Expression, charset: Expression) @ExpressionDescription( usage = """ _FUNC_(str[, fmt]) - Converts the input `str` to a binary value based on the supplied `fmt`. - `fmt` can be a case-insensitive string literal of "hex", "utf-8", "base2", or "base64". + `fmt` can be a case-insensitive string literal of "hex", "utf-8", or "base64". By default, the binary format for conversion is "hex" if `fmt` is omitted. The function returns NULL if at least one of the input parameters is NULL. """, @@ -2562,7 +2562,7 @@ case class ToBinary(expr: Expression, format: Option[Expression], child: Express def this(expr: Expression, format: Expression) = this(expr, Option(format), format match { - case lit if lit.foldable => + case lit if (lit.foldable && Seq(StringType, NullType).contains(lit.dataType)) => val value = lit.eval() if (value == null) Literal(null, BinaryType) else { @@ -2570,7 +2570,6 @@ case class ToBinary(expr: Expression, format: Option[Expression], child: Express case "hex" => Unhex(expr) case "utf-8" => Encode(expr, Literal("UTF-8")) case "base64" => UnBase64(expr) - case "base2" => Cast(expr, BinaryType) case _ => lit } } @@ -2589,10 +2588,11 @@ case class ToBinary(expr: Expression, format: Option[Expression], child: Express override def checkInputDataTypes(): TypeCheckResult = { def checkFormat(lit: Expression) = { - if (lit.foldable) { + if (lit.foldable && Seq(StringType, NullType).contains(lit.dataType)) { val value = lit.eval() - value == null || Seq("hex", "utf-8", "base64", "base2").contains( - value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT)) + value == null || + Seq("hex", "utf-8", "base64").contains( + value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT)) } else false } @@ -2601,7 +2601,7 @@ case class ToBinary(expr: Expression, format: Option[Expression], child: Express } else { TypeCheckResult.TypeCheckFailure( s"Unsupported encoding format: $format. The format has to be " + - s"a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'") + s"a case-insensitive string literal of 'hex', 'utf-8', or 'base64'") } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index 9571f3eb6c2bb..94eb96f6249a0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -140,7 +140,6 @@ select to_number('00,454.8-', '00,000.9-'); select to_binary('abc'); select to_binary('abc', 'utf-8'); select to_binary('abc', 'base64'); -select to_binary('abc', 'base2'); select to_binary('abc', 'hex'); select to_binary('abc', concat('utf', '-8')); select to_binary('abc', concat('base', '64')); @@ -150,4 +149,6 @@ select to_binary('abc', null); select to_binary(null, 'utf-8'); select to_binary(null, null); select to_binary(null, cast(null as string)); +select to_binary(null, cast(null as int)); select to_binary('abc', 'invalidFormat'); +select to_binary('abc', 1); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 86c90fc1fe34d..4c0aa8c948334 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 116 +-- Number of queries: 117 -- !query @@ -850,14 +850,6 @@ struct i� --- !query -select to_binary('abc', 'base2') --- !query schema -struct --- !query output -abc - - -- !query select to_binary('abc', 'hex') -- !query schema @@ -930,10 +922,28 @@ struct NULL +-- !query +select to_binary(null, cast(null as int)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'to_binary(NULL, CAST(NULL AS INT))' due to data type mismatch: Unsupported encoding format: Some(ansi_cast(null as int)). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7 + + -- !query select to_binary('abc', 'invalidFormat') -- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'; line 1 pos 7 +cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7 + + +-- !query +select to_binary('abc', 1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'to_binary('abc', 1)' due to data type mismatch: Unsupported encoding format: Some(1). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7 diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index f3852a9527b00..bb2974db2322b 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 116 +-- Number of queries: 117 -- !query @@ -846,14 +846,6 @@ struct i� --- !query -select to_binary('abc', 'base2') --- !query schema -struct --- !query output -abc - - -- !query select to_binary('abc', 'hex') -- !query schema @@ -926,10 +918,28 @@ struct NULL +-- !query +select to_binary(null, cast(null as int)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'to_binary(NULL, CAST(NULL AS INT))' due to data type mismatch: Unsupported encoding format: Some(cast(null as int)). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7 + + -- !query select to_binary('abc', 'invalidFormat') -- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'; line 1 pos 7 +cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7 + + +-- !query +select to_binary('abc', 1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'to_binary('abc', 1)' due to data type mismatch: Unsupported encoding format: Some(1). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7