apache · dilipbiswal · Aug 1, 2019 · Aug 1, 2019 · Aug 2, 2019 · Aug 2, 2019
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.math.{BigDecimal => JavaBigDecimal}
 import java.time.ZoneId
+import java.util.Locale
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkException
@@ -192,6 +193,18 @@ object Cast {
   }
 
   def resolvableNullability(from: Boolean, to: Boolean): Boolean = !from || to
+
+  def processFloatingPointSpecialLiterals(v: String, isFloat: Boolean): Any = {
+    v.trim.toLowerCase(Locale.ROOT) match {
+      case "inf" | "+inf" | "infinity" | "+infinity" =>
+        if (isFloat) Float.PositiveInfinity else Double.PositiveInfinity
+      case "-inf" | "-infinity" =>
+        if (isFloat) Float.NegativeInfinity else Double.NegativeInfinity
+      case "nan" =>
+        if (isFloat) Float.NaN else Double.NaN
+      case _ => null
+    }
+  }
 }
 
 /**
@@ -562,8 +575,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // DoubleConverter
   private[this] def castToDouble(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => try s.toString.toDouble catch {
-        case _: NumberFormatException => null
+      buildCast[UTF8String](_, s => {
+        val doubleStr = s.toString
+        try doubleStr.toDouble catch {
+          case _: NumberFormatException =>
+            Cast.processFloatingPointSpecialLiterals(doubleStr, false)
+        }
       })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1d else 0d)
@@ -578,8 +595,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // FloatConverter
   private[this] def castToFloat(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => try s.toString.toFloat catch {
-        case _: NumberFormatException => null
+      buildCast[UTF8String](_, s => {
+        val floatStr = s.toString
+        try floatStr.toFloat catch {
+          case _: NumberFormatException =>
+            Cast.processFloatingPointSpecialLiterals(floatStr, true)
+        }
       })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1f else 0f)
@@ -717,9 +738,9 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case ByteType => castToByteCode(from, ctx)
     case ShortType => castToShortCode(from, ctx)
     case IntegerType => castToIntCode(from, ctx)
-    case FloatType => castToFloatCode(from)
+    case FloatType => castToFloatCode(from, ctx)
     case LongType => castToLongCode(from, ctx)
-    case DoubleType => castToDoubleCode(from)
+    case DoubleType => castToDoubleCode(from, ctx)
 
     case array: ArrayType =>
       castArrayCode(from.asInstanceOf[ArrayType].elementType, array.elementType, ctx)
@@ -1259,48 +1280,66 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       (c, evPrim, evNull) => code"$evPrim = (long) $c;"
   }
 
-  private[this] def castToFloatCode(from: DataType): CastFunction = from match {
-    case StringType =>
-      (c, evPrim, evNull) =>
-        code"""
+  private[this] def castToFloatCode(from: DataType, ctx: CodegenContext): CastFunction = {
+    from match {
+      case StringType =>
+        val floatStr = ctx.freshVariable("floatStr", StringType)
+        (c, evPrim, evNull) =>
+          code"""
+          final String $floatStr = $c.toString();
           try {
-            $evPrim = Float.valueOf($c.toString());
+            $evPrim = Float.valueOf($floatStr);
           } catch (java.lang.NumberFormatException e) {
-            $evNull = true;
+            Float f = (Float) Cast.processFloatingPointSpecialLiterals($floatStr, true);
+            if (f == null) {
+              $evNull = true;
+            } else {
+              $evPrim = f.floatValue();
+            }
           }
         """
-    case BooleanType =>
-      (c, evPrim, evNull) => code"$evPrim = $c ? 1.0f : 0.0f;"
-    case DateType =>
-      (c, evPrim, evNull) => code"$evNull = true;"
-    case TimestampType =>
-      (c, evPrim, evNull) => code"$evPrim = (float) (${timestampToDoubleCode(c)});"
-    case DecimalType() =>
-      (c, evPrim, evNull) => code"$evPrim = $c.toFloat();"
-    case x: NumericType =>
-      (c, evPrim, evNull) => code"$evPrim = (float) $c;"
+      case BooleanType =>
+        (c, evPrim, evNull) => code"$evPrim = $c ? 1.0f : 0.0f;"
+      case DateType =>
+        (c, evPrim, evNull) => code"$evNull = true;"
+      case TimestampType =>
+        (c, evPrim, evNull) => code"$evPrim = (float) (${timestampToDoubleCode(c)});"
+      case DecimalType() =>
+        (c, evPrim, evNull) => code"$evPrim = $c.toFloat();"
+      case x: NumericType =>
+        (c, evPrim, evNull) => code"$evPrim = (float) $c;"
+    }
   }
 
-  private[this] def castToDoubleCode(from: DataType): CastFunction = from match {
-    case StringType =>
-      (c, evPrim, evNull) =>
-        code"""
+  private[this] def castToDoubleCode(from: DataType, ctx: CodegenContext): CastFunction = {
+    from match {
+      case StringType =>
+        val doubleStr = ctx.freshVariable("doubleStr", StringType)
+        (c, evPrim, evNull) =>
+          code"""
+          final String $doubleStr = $c.toString();
           try {
-            $evPrim = Double.valueOf($c.toString());
+            $evPrim = Double.valueOf($doubleStr);
           } catch (java.lang.NumberFormatException e) {
-            $evNull = true;
+            Double d = (Double) Cast.processFloatingPointSpecialLiterals($doubleStr, false);
+            if (d == null) {
+              $evNull = true;
+            } else {
+              $evPrim = d.doubleValue();
+            }
           }
         """
-    case BooleanType =>
-      (c, evPrim, evNull) => code"$evPrim = $c ? 1.0d : 0.0d;"
-    case DateType =>
-      (c, evPrim, evNull) => code"$evNull = true;"
-    case TimestampType =>
-      (c, evPrim, evNull) => code"$evPrim = ${timestampToDoubleCode(c)};"
-    case DecimalType() =>
-      (c, evPrim, evNull) => code"$evPrim = $c.toDouble();"
-    case x: NumericType =>
-      (c, evPrim, evNull) => code"$evPrim = (double) $c;"
+      case BooleanType =>
+        (c, evPrim, evNull) => code"$evPrim = $c ? 1.0d : 0.0d;"
+      case DateType =>
+        (c, evPrim, evNull) => code"$evNull = true;"
+      case TimestampType =>
+        (c, evPrim, evNull) => code"$evPrim = ${timestampToDoubleCode(c)};"
+      case DecimalType() =>
+        (c, evPrim, evNull) => code"$evPrim = $c.toDouble();"
+      case x: NumericType =>
+        (c, evPrim, evNull) => code"$evPrim = (double) $c;"
+    }
   }
 
   private[this] def castArrayCode(

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -1045,4 +1045,34 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
         Cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
     }
   }
+
+  test("Process Infinity, -Infinity, NaN in case insensitive manner") {
+    checkEvaluation(cast("inf", FloatType), Float.PositiveInfinity)
+    checkEvaluation(cast("+inf", FloatType), Float.PositiveInfinity)
+    checkEvaluation(cast("infinity", FloatType), Float.PositiveInfinity)
+    checkEvaluation(cast("+infiNity", FloatType), Float.PositiveInfinity)
+    checkEvaluation(cast(" infinity ", FloatType), Float.PositiveInfinity)
+    checkEvaluation(cast("-infinity", FloatType), Float.NegativeInfinity)
+    checkEvaluation(cast("-infiniTy", FloatType), Float.NegativeInfinity)
+    checkEvaluation(cast("  -infinity  ", FloatType), Float.NegativeInfinity)
+    checkEvaluation(cast("  -inf  ", FloatType), Float.NegativeInfinity)
+
+    checkEvaluation(cast("inf", DoubleType), Double.PositiveInfinity)
+    checkEvaluation(cast("+inf", DoubleType), Double.PositiveInfinity)
+    checkEvaluation(cast("infinity", DoubleType), Double.PositiveInfinity)
+    checkEvaluation(cast("+infiNity", DoubleType), Double.PositiveInfinity)
+    checkEvaluation(cast(" infinity ", DoubleType), Double.PositiveInfinity)
+    checkEvaluation(cast("-infinity", DoubleType), Double.NegativeInfinity)
+    checkEvaluation(cast("-infiniTy", DoubleType), Double.NegativeInfinity)
+    checkEvaluation(cast("  -infinity  ", DoubleType), Double.NegativeInfinity)
+    checkEvaluation(cast("-inf", DoubleType), Double.NegativeInfinity)
+
+    checkEvaluation(cast("nan", FloatType), Float.NaN)
+    checkEvaluation(cast("nAn", FloatType), Float.NaN)
+    checkEvaluation(cast(" nan ", FloatType), Float.NaN)
+
+    checkEvaluation(cast("nan", DoubleType), Double.NaN)
+    checkEvaluation(cast("nAn", DoubleType), Double.NaN)
+    checkEvaluation(cast(" nan ", DoubleType), Double.NaN)
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part1.sql
@@ -59,16 +59,14 @@ select avg(CAST(null AS DOUBLE)) from range(1,4);
 select sum(CAST('NaN' AS DOUBLE)) from range(1,4);
 select avg(CAST('NaN' AS DOUBLE)) from range(1,4);
 
--- [SPARK-27768] verify correct results for infinite inputs
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
-FROM (VALUES (CAST('1' AS DOUBLE)), (CAST('Infinity' AS DOUBLE))) v(x);
+FROM (VALUES (CAST('1' AS DOUBLE)), (CAST('infinity' AS DOUBLE))) v(x);
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
-FROM (VALUES ('Infinity'), ('1')) v(x);
+FROM (VALUES ('infinity'), ('1')) v(x);
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
-FROM (VALUES ('Infinity'), ('Infinity')) v(x);
+FROM (VALUES ('infinity'), ('infinity')) v(x);
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
-FROM (VALUES ('-Infinity'), ('Infinity')) v(x);
-
+FROM (VALUES ('-infinity'), ('infinity')) v(x);
 
 -- test accuracy with a large input offset
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))

diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out
@@ -236,7 +236,7 @@ NaN
 
 -- !query 29
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
-FROM (VALUES (CAST('1' AS DOUBLE)), (CAST('Infinity' AS DOUBLE))) v(x)
+FROM (VALUES (CAST('1' AS DOUBLE)), (CAST('infinity' AS DOUBLE))) v(x)
 -- !query 29 schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
 -- !query 29 output
@@ -245,7 +245,7 @@ Infinity	NaN
 
 -- !query 30
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
-FROM (VALUES ('Infinity'), ('1')) v(x)
+FROM (VALUES ('infinity'), ('1')) v(x)
 -- !query 30 schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
 -- !query 30 output
@@ -254,7 +254,7 @@ Infinity	NaN
 
 -- !query 31
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
-FROM (VALUES ('Infinity'), ('Infinity')) v(x)
+FROM (VALUES ('infinity'), ('infinity')) v(x)
 -- !query 31 schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
 -- !query 31 output
@@ -263,7 +263,7 @@ Infinity	NaN
 
 -- !query 32
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
-FROM (VALUES ('-Infinity'), ('Infinity')) v(x)
+FROM (VALUES ('-infinity'), ('infinity')) v(x)
 -- !query 32 schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
 -- !query 32 output

diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out
@@ -63,31 +63,31 @@ SELECT float('nan')
 -- !query 7 schema
 struct<CAST(nan AS FLOAT):float>
 -- !query 7 output
-NULL
+NaN
 
 
 -- !query 8
 SELECT float('   NAN  ')
 -- !query 8 schema
 struct<CAST(   NAN   AS FLOAT):float>
 -- !query 8 output
-NULL
+NaN
 
 
 -- !query 9
 SELECT float('infinity')
 -- !query 9 schema
 struct<CAST(infinity AS FLOAT):float>
 -- !query 9 output
-NULL
+Infinity
 
 
 -- !query 10
 SELECT float('          -INFINiTY   ')
 -- !query 10 schema
 struct<CAST(          -INFINiTY    AS FLOAT):float>
 -- !query 10 output
-NULL
+-Infinity
 
 
 -- !query 11
@@ -135,7 +135,7 @@ SELECT float('nan') / float('nan')
 -- !query 16 schema
 struct<(CAST(CAST(nan AS FLOAT) AS DOUBLE) / CAST(CAST(nan AS FLOAT) AS DOUBLE)):double>
 -- !query 16 output
-NULL
+NaN
 
 
 -- !query 17

diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out
@@ -95,31 +95,31 @@ SELECT double('nan')
 -- !query 11 schema
 struct<CAST(nan AS DOUBLE):double>
 -- !query 11 output
-NULL
+NaN
 
 
 -- !query 12
 SELECT double('   NAN  ')
 -- !query 12 schema
 struct<CAST(   NAN   AS DOUBLE):double>
 -- !query 12 output
-NULL
+NaN
 
 
 -- !query 13
 SELECT double('infinity')
 -- !query 13 schema
 struct<CAST(infinity AS DOUBLE):double>
 -- !query 13 output
-NULL
+Infinity
 
 
 -- !query 14
 SELECT double('          -INFINiTY   ')
 -- !query 14 schema
 struct<CAST(          -INFINiTY    AS DOUBLE):double>
 -- !query 14 output
-NULL
+-Infinity
 
 
 -- !query 15