From 5f0ebddc36bb4f63aca162d3f0c23d56860a55b6 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sat, 25 Mar 2017 16:15:59 +0900
Subject: [PATCH 1/4] Match Scala/Python/R changes

---
 R/pkg/R/functions.R             |  6 +++---
 python/pyspark/sql/functions.py | 11 +++++++----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 2cff3ac08c3a..449476dec533 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2632,8 +2632,8 @@ setMethod("date_sub", signature(y = "Column", x = "numeric"),
 
 #' format_number
 #'
-#' Formats numeric column y to a format like '#,###,###.##', rounded to x decimal places,
-#' and returns the result as a string column.
+#' Formats numeric column y to a format like '#,###,###.##', rounded to x decimal places
+#' with HALF_EVEN round mode, and returns the result as a string column.
 #'
 #' If x is 0, the result has no decimal point or fractional part.
 #' If x < 0, the result will be null.
@@ -3548,7 +3548,7 @@ setMethod("row_number",
 
 #' array_contains
 #'
-#' Returns true if the array contain the value.
+#' Returns null if the array is null, true if the array contains the value, and false otherwise.
 #'
 #' @param x A Column
 #' @param value A value to be checked if contained in the column
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index f9121e60f35b..1697d145e9b1 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1327,8 +1327,8 @@ def encode(col, charset):
 @since(1.5)
 def format_number(col, d):
     """
-    Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places,
-    and returns the result as a string.
+    Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places
+    with HALF_EVEN round mode, and returns the result as a string.
 
     :param col: the column name of the numeric value to be formatted
     :param d: the N decimal places
@@ -1675,8 +1675,8 @@ def array(*cols):
 @since(1.5)
 def array_contains(col, value):
     """
-    Collection function: returns True if the array contains the given value. The collection
-    elements and value must be of the same type.
+    Collection function: returns null if the array is null, true if the array contains the
+    given value, and false otherwise.
 
     :param col: name of column containing array
     :param value: value to check for in array
@@ -1684,6 +1684,9 @@ def array_contains(col, value):
     >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
     >>> df.select(array_contains(df.data, "a")).collect()
     [Row(array_contains(data, a)=True), Row(array_contains(data, a)=False)]
+    >>> df = spark.createDataFrame([(["1", "2", "3"],), ([],)], ['data'])
+    >>> df.select(array_contains(df.data, 1)).collect()
+    [Row(array_contains(data, 1)=True), Row(array_contains(data, 1)=False)]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.array_contains(_to_java_column(col), value))

From 33f132808562aaa78446c05f1fb3462603346935 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 27 Mar 2017 08:12:28 +0900
Subject: [PATCH 2/4] Remove potentially confusing doctest in array_contains in
 Python

---
 python/pyspark/sql/functions.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 1697d145e9b1..843ae3816f06 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1684,9 +1684,6 @@ def array_contains(col, value):
     >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
     >>> df.select(array_contains(df.data, "a")).collect()
     [Row(array_contains(data, a)=True), Row(array_contains(data, a)=False)]
-    >>> df = spark.createDataFrame([(["1", "2", "3"],), ([],)], ['data'])
-    >>> df.select(array_contains(df.data, 1)).collect()
-    [Row(array_contains(data, 1)=True), Row(array_contains(data, 1)=False)]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.array_contains(_to_java_column(col), value))

From d21a9bf760d5e03d844b26c44132e3c5f9953af6 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 27 Mar 2017 08:26:59 +0900
Subject: [PATCH 3/4] Add the test in python/pyspark/sql/tests.py instead

---
 python/pyspark/sql/tests.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b93b7ed19210..265238041c5d 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1129,6 +1129,14 @@ def test_rand_functions(self):
         rndn2 = df.select('key', functions.randn(0)).collect()
         self.assertEqual(sorted(rndn1), sorted(rndn2))
 
+    def test_array_contains_function(self):
+        from pyspark.sql.functions import array_contains
+
+        df = self.spark.createDataFrame([(["1", "2", "3"],), ([],)], ['data'])
+        b = df.select(array_contains(df.data, 1).alias('bool')).collect()
+        # The value argument can be implicitly castable to the element's type of the array.
+        self.assertEqual([Row(bool=True), Row(bool=False)], b)
+
     def test_between_function(self):
         df = self.sc.parallelize([
             Row(a=1, b=2, c=3),

From d05aba5b70ad22fd0e5661168dc6deceff51a13e Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 27 Mar 2017 08:28:50 +0900
Subject: [PATCH 4/4] Rename variables

---
 python/pyspark/sql/tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 265238041c5d..db41b4edb6dd 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1133,9 +1133,9 @@ def test_array_contains_function(self):
         from pyspark.sql.functions import array_contains
 
         df = self.spark.createDataFrame([(["1", "2", "3"],), ([],)], ['data'])
-        b = df.select(array_contains(df.data, 1).alias('bool')).collect()
+        actual = df.select(array_contains(df.data, 1).alias('b')).collect()
         # The value argument can be implicitly castable to the element's type of the array.
-        self.assertEqual([Row(bool=True), Row(bool=False)], b)
+        self.assertEqual([Row(b=True), Row(b=False)], actual)
 
     def test_between_function(self):
         df = self.sc.parallelize([