From 13e0245f11f0199807f3c5342cdd6d6aa1e1a92a Mon Sep 17 00:00:00 2001
From: Wojtek Szymanski <wk.szymanski@gmail.com>
Date: Sun, 5 Mar 2017 15:52:52 +0100
Subject: [PATCH 1/2] Bucketizer.handleInvalid docs improved

---
 .../org/apache/spark/ml/feature/Bucketizer.scala    | 13 ++++++++-----
 python/pyspark/ml/feature.py                        |  4 +++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index d1f3b2af1e48..db662308279d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -78,16 +78,19 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
   /**
-   * Param for how to handle invalid entries. Options are 'skip' (filter out rows with
-   * invalid values), 'error' (throw an error), or 'keep' (keep invalid values in a special
-   * additional bucket).
+   * Param for how to handle invalid entries containing either NaN or null values.
+   * Values outside the splits will always be treated as errors.
+   * Options are 'skip' (filter out rows with invalid values), 'error' (throw an error),
+   * or 'keep' (keep invalid values in a special additional bucket).
    * Default: "error"
    * @group param
    */
   // TODO: SPARK-18619 Make Bucketizer inherit from HasHandleInvalid.
   @Since("2.1.0")
-  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle " +
-    "invalid entries. Options are skip (filter out rows with invalid values), " +
+  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
+    "how to handle invalid entries containing either NaN or null values. " +
+    "Values outside the splits will always be treated as errors. " +
+    "Options are skip (filter out rows with invalid values), " +
     "error (throw an error), or keep (keep invalid values in a special additional bucket).",
     ParamValidators.inArray(Bucketizer.supportedHandleInvalids))
 
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 92f8549e9cb9..af8c3daa4d24 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -356,7 +356,9 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
               "splits specified will be treated as errors.",
               typeConverter=TypeConverters.toListFloat)
 
-    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
+    handleInvalid = Param(Params._dummy(), "handleInvalid",
+                          "how to handle invalid entries containing either NaN or null values. " +
+                          "Values outside the splits will always be treated as errors. " +
                           "Options are 'skip' (filter out rows with invalid values), " +
                           "'error' (throw an error), or 'keep' (keep invalid values in a special " +
                           "additional bucket).",

From ca6e9577f16e453abd25e8011db0146394822ef3 Mon Sep 17 00:00:00 2001
From: Wojtek Szymanski <wk.szymanski@gmail.com>
Date: Mon, 6 Mar 2017 22:58:03 +0100
Subject: [PATCH 2/2] Bucketizer docs update - nulls not supported in
 handleInvalid

---
 .../main/scala/org/apache/spark/ml/feature/Bucketizer.scala   | 4 ++--
 python/pyspark/ml/feature.py                                  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index db662308279d..07d44a9dca0c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -78,7 +78,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
   /**
-   * Param for how to handle invalid entries containing either NaN or null values.
+   * Param for how to handle invalid entries containing NaN values.
    * Values outside the splits will always be treated as errors.
    * Options are 'skip' (filter out rows with invalid values), 'error' (throw an error),
    * or 'keep' (keep invalid values in a special additional bucket).
@@ -88,7 +88,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
   // TODO: SPARK-18619 Make Bucketizer inherit from HasHandleInvalid.
   @Since("2.1.0")
   val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
-    "how to handle invalid entries containing either NaN or null values. " +
+    "how to handle invalid entries containing NaN values. " +
     "Values outside the splits will always be treated as errors. " +
     "Options are skip (filter out rows with invalid values), " +
     "error (throw an error), or keep (keep invalid values in a special additional bucket).",
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index af8c3daa4d24..5aaeb7c4decf 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -357,7 +357,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
               typeConverter=TypeConverters.toListFloat)
 
     handleInvalid = Param(Params._dummy(), "handleInvalid",
-                          "how to handle invalid entries containing either NaN or null values. " +
+                          "how to handle invalid entries containing NaN values. " +
                           "Values outside the splits will always be treated as errors. " +
                           "Options are 'skip' (filter out rows with invalid values), " +
                           "'error' (throw an error), or 'keep' (keep invalid values in a special " +