From a186dfd39e47fa7d1f4e6ced88190ecb4450eaf3 Mon Sep 17 00:00:00 2001
From: Aman Omer <amanomer1996@gmail.com>
Date: Mon, 9 Dec 2019 17:08:17 +0530
Subject: [PATCH 1/6] Initial commit

---
 .../spark/sql/catalyst/expressions/collectionOperations.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index d5d42510842e..ff2121da21d0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1081,7 +1081,7 @@ case class ArrayContains(left: Expression, right: Expression)
     (left.dataType, right.dataType) match {
       case (_, NullType) => Seq.empty
       case (ArrayType(e1, hasNull), e2) =>
-        TypeCoercion.findTightestCommonType(e1, e2) match {
+        TypeCoercion.findWiderTypeForTwo(e1, e2) match {
           case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
           case _ => Seq.empty
         }

From 71b7ad35f33d92d8d73161198260b5d3e36848e0 Mon Sep 17 00:00:00 2001
From: Aman Omer <amanomer1996@gmail.com>
Date: Mon, 9 Dec 2019 18:35:29 +0530
Subject: [PATCH 2/6] Test case

---
 .../spark/sql/DataFrameFunctionsSuite.scala   | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index a346377cd1bc..22ae235c5b0f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -850,19 +850,19 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     val errorMsg1 =
       s"""
          |Input to function array_contains should have been array followed by a
-         |value with same element type, but it's [array<int>, decimal(29,29)].
+         |value with same element type, but it's [array<int>, decimal(38,29)].
        """.stripMargin.replace("\n", " ").trim()
     assert(e1.message.contains(errorMsg1))
 
-    val e2 = intercept[AnalysisException] {
-      OneRowRelation().selectExpr("array_contains(array(1), 'foo')")
-    }
-    val errorMsg2 =
-      s"""
-         |Input to function array_contains should have been array followed by a
-         |value with same element type, but it's [array<int>, string].
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e2.message.contains(errorMsg2))
+    checkAnswer(
+      OneRowRelation().selectExpr("array_contains(array(1), 'foo')"),
+      Seq(Row(false))
+    )
+
+    checkAnswer(
+      sql("select array_contains(array(1.0, 2.02), 1.0)"),
+      Seq(Row(true))
+    )
   }
 
   test("arrays_overlap function") {

From d2ce3aedd3431fc6b56f711f5d88decb60fcbe35 Mon Sep 17 00:00:00 2001
From: Aman Omer <amanomer1996@gmail.com>
Date: Mon, 9 Dec 2019 20:13:01 +0530
Subject: [PATCH 3/6] Updated migration guide

---
 docs/sql-migration-guide.md                   | 46 +++++++++++++++++++
 .../spark/sql/DataFrameFunctionsSuite.scala   | 12 ++++-
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 9bcd36ce4127..736a7fdebf3c 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -256,6 +256,52 @@ license: |
 
   - Since Spark 3.0, the unary arithmetic operator plus(`+`) only accepts string, numeric and interval type values as inputs. Besides, `+` with a integral string representation will be coerced to double value, e.g. `+'1'` results `1.0`. In Spark version 2.4 and earlier, this operator is ignored. There is no type checking for it, thus, all type values with a `+` prefix are valid, e.g. `+ array(1, 2)` is valid and results `[1, 2]`. Besides, there is no type coercion for it at all, e.g. in Spark 2.4, the result of `+'1'` is string `1`.
 
+  - Since Spark 3.0, the parameter(first or second) to array_contains function is implicitly promoted to the wider type parameter.
+    <table class="table">
+        <tr>
+            <th>
+                <b>Query</b>
+            </th>
+            <th>
+                <b>Spark 2.4</b>
+            </th>
+            <th>
+                <b>Spark 3.0</b>
+            </th>
+            <th>
+                <b>Remarks</b>
+            </th>
+        </tr>
+        <tr>
+            <td>
+                <code>select array_contains(array(1.10), 1.1);</code>
+            </td>
+            <td>
+                <code>AnalysisException</code> is thrown.
+            </td>
+            <td>
+                <code>True</code>
+            </td>
+            <td>
+                In spark 2.4, left parameter is of <code>array(decimal(3,2))</code> where as right parameter is of <code>decimal(2,1)</code>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <code>select array_contains(array(1.1), 1.10);</code>
+            </td>
+            <td>
+                <code>AnalysisException</code> is thrown.
+            </td>
+            <td>
+                <code>True</code>
+            </td>
+            <td>
+                In spark 2.4, left parameter is of <code>array(decimal(2,1))</code> where as right parameter is of <code>decimal(3,2)</code>
+            </td>
+        </tr>
+    </table>
+    
 ## Upgrading from Spark SQL 2.4 to 2.4.1
 
   - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 22ae235c5b0f..39876adb0596 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -860,9 +860,19 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     checkAnswer(
-      sql("select array_contains(array(1.0, 2.02), 1.0)"),
+      sql("select array_contains(array(1.10), 1.1)"),
       Seq(Row(true))
     )
+
+    checkAnswer(
+      sql("SELECT array_contains(array(1.1), 1.10)"),
+      Seq(Row(true))
+    )
+
+    checkAnswer(
+      sql("SELECT array_contains(array(1.11), 1.1)"),
+      Seq(Row(false))
+    )
   }
 
   test("arrays_overlap function") {

From 74123680aa53a7f8d734998490276b539094a6a4 Mon Sep 17 00:00:00 2001
From: Aman Omer <amanomer1996@gmail.com>
Date: Thu, 12 Dec 2019 15:36:56 +0530
Subject: [PATCH 4/6] findWiderTypeForTwo ->
 findWiderTypeWithoutStringPromotionForTwo

---
 .../spark/sql/catalyst/analysis/TypeCoercion.scala  |  2 +-
 .../catalyst/expressions/collectionOperations.scala |  2 +-
 .../apache/spark/sql/DataFrameFunctionsSuite.scala  | 13 +++++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index e76193fd9422..f94e733ad813 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -243,7 +243,7 @@ object TypeCoercion {
    * string. If the wider decimal type exceeds system limitation, this rule will truncate
    * the decimal type before return it.
    */
-  private[analysis] def findWiderTypeWithoutStringPromotionForTwo(
+  private[catalyst] def findWiderTypeWithoutStringPromotionForTwo(
       t1: DataType,
       t2: DataType): Option[DataType] = {
     findTightestCommonType(t1, t2)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index ff2121da21d0..6ed68e47ce7a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1081,7 +1081,7 @@ case class ArrayContains(left: Expression, right: Expression)
     (left.dataType, right.dataType) match {
       case (_, NullType) => Seq.empty
       case (ArrayType(e1, hasNull), e2) =>
-        TypeCoercion.findWiderTypeForTwo(e1, e2) match {
+        TypeCoercion.findWiderTypeWithoutStringPromotionForTwo(e1, e2) match {
           case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
           case _ => Seq.empty
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 39876adb0596..6b04a4ea3578 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -854,10 +854,15 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
        """.stripMargin.replace("\n", " ").trim()
     assert(e1.message.contains(errorMsg1))
 
-    checkAnswer(
-      OneRowRelation().selectExpr("array_contains(array(1), 'foo')"),
-      Seq(Row(false))
-    )
+    val e2 = intercept[AnalysisException] {
+      OneRowRelation().selectExpr("array_contains(array(1), 'foo')")
+    }
+    val errorMsg2 =
+      s"""
+         |Input to function array_contains should have been array followed by a
+         |value with same element type, but it's [array<int>, string].
+       """.stripMargin.replace("\n", " ").trim()
+    assert(e2.message.contains(errorMsg2))
 
     checkAnswer(
       sql("select array_contains(array(1.10), 1.1)"),

From 6c3545de883ad27dab9814b1576c8495cd476048 Mon Sep 17 00:00:00 2001
From: Aman Omer <amanomer1996@gmail.com>
Date: Thu, 12 Dec 2019 19:38:36 +0530
Subject: [PATCH 5/6] revert migration guide changes

---
 docs/sql-migration-guide.md | 46 -------------------------------------
 1 file changed, 46 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 736a7fdebf3c..9bcd36ce4127 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -256,52 +256,6 @@ license: |
 
   - Since Spark 3.0, the unary arithmetic operator plus(`+`) only accepts string, numeric and interval type values as inputs. Besides, `+` with a integral string representation will be coerced to double value, e.g. `+'1'` results `1.0`. In Spark version 2.4 and earlier, this operator is ignored. There is no type checking for it, thus, all type values with a `+` prefix are valid, e.g. `+ array(1, 2)` is valid and results `[1, 2]`. Besides, there is no type coercion for it at all, e.g. in Spark 2.4, the result of `+'1'` is string `1`.
 
-  - Since Spark 3.0, the parameter(first or second) to array_contains function is implicitly promoted to the wider type parameter.
-    <table class="table">
-        <tr>
-            <th>
-                <b>Query</b>
-            </th>
-            <th>
-                <b>Spark 2.4</b>
-            </th>
-            <th>
-                <b>Spark 3.0</b>
-            </th>
-            <th>
-                <b>Remarks</b>
-            </th>
-        </tr>
-        <tr>
-            <td>
-                <code>select array_contains(array(1.10), 1.1);</code>
-            </td>
-            <td>
-                <code>AnalysisException</code> is thrown.
-            </td>
-            <td>
-                <code>True</code>
-            </td>
-            <td>
-                In spark 2.4, left parameter is of <code>array(decimal(3,2))</code> where as right parameter is of <code>decimal(2,1)</code>
-            </td>
-        </tr>
-        <tr>
-            <td>
-                <code>select array_contains(array(1.1), 1.10);</code>
-            </td>
-            <td>
-                <code>AnalysisException</code> is thrown.
-            </td>
-            <td>
-                <code>True</code>
-            </td>
-            <td>
-                In spark 2.4, left parameter is of <code>array(decimal(2,1))</code> where as right parameter is of <code>decimal(3,2)</code>
-            </td>
-        </tr>
-    </table>
-    
 ## Upgrading from Spark SQL 2.4 to 2.4.1
 
   - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was

From 7b12d6cf78d2228d36192e78cca5a12648c1db80 Mon Sep 17 00:00:00 2001
From: Aman Omer <amanomer1996@gmail.com>
Date: Fri, 13 Dec 2019 18:02:49 +0530
Subject: [PATCH 6/6] Split UT

---
 .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 6b04a4ea3578..584768eff700 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -863,7 +863,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
          |value with same element type, but it's [array<int>, string].
        """.stripMargin.replace("\n", " ").trim()
     assert(e2.message.contains(errorMsg2))
+  }
 
+  test("SPARK-29600: ArrayContains function may return incorrect result for DecimalType") {
     checkAnswer(
       sql("select array_contains(array(1.10), 1.1)"),
       Seq(Row(true))

+ Query +	+ Spark 2.4 +	+ Spark 3.0 +	+ Remarks +
+ `select array_contains(array(1.10), 1.1);` +	+ `AnalysisException` is thrown. +	+ `True` +	+ In spark 2.4, left parameter is of `array(decimal(3,2))` where as right parameter is of `decimal(2,1)` +
+ `select array_contains(array(1.1), 1.10);` +	+ `AnalysisException` is thrown. +	+ `True` +	+ In spark 2.4, left parameter is of `array(decimal(2,1))` where as right parameter is of `decimal(3,2)` +
- Query -	- Spark 2.4 -	- Spark 3.0 -	- Remarks -
- `select array_contains(array(1.10), 1.1);` -	- `AnalysisException` is thrown. -	- `True` -	- In spark 2.4, left parameter is of `array(decimal(3,2))` where as right parameter is of `decimal(2,1)` -
- `select array_contains(array(1.1), 1.10);` -	- `AnalysisException` is thrown. -	- `True` -	- In spark 2.4, left parameter is of `array(decimal(2,1))` where as right parameter is of `decimal(3,2)` -