From 83892808fde633525c4366b368033ba80c81eaa8 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 5 May 2016 13:24:14 -0700
Subject: [PATCH 01/23] Mark a number of alogrithms and models experimental
 that are marked that way in scala and update the docs for logisitc regression
 threshold

---
 python/pyspark/ml/classification.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index f03296333446..645ebdf722f4 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -96,7 +96,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
 
     threshold = Param(Params._dummy(), "threshold",
                       "Threshold in binary classification prediction, in range [0, 1]." +
-                      " If threshold and thresholds are both set, they must match.",
+                      " If threshold and thresholds are both set, they must match." +
+                      "e.g. threshold must be equal to [1-p, p].",
                       typeConverter=TypeConverters.toFloat)
 
     @keyword_only
@@ -154,7 +155,9 @@ def setThreshold(self, value):
     @since("1.4.0")
     def getThreshold(self):
         """
-        Gets the value of threshold or its default value.
+        Gets the value of threshold or attempt to convert thresholds to threshold if set, or default
+        value if neither are set.
+        This conversion is equivalent to: {{{1 / (1 + thresholds(0) / thresholds(1))}}}.
         """
         self._checkThresholdConsistency()
         if self.isSet(self.thresholds):
@@ -616,6 +619,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
                              RandomForestParams, TreeClassifierParams, HasCheckpointInterval,
                              JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     `http://en.wikipedia.org/wiki/Random_forest  Random Forest`
     learning algorithm for classification.
     It supports both binary and multiclass labels, as well as both continuous and categorical
@@ -708,6 +713,7 @@ def _create_model(self, java_model):
 
 class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
     Model fitted by RandomForestClassifier.
 
     .. versionadded:: 1.4.0
@@ -862,6 +868,8 @@ def featureImportances(self):
 class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol,
                  HasRawPredictionCol, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     Naive Bayes Classifiers.
     It supports both Multinomial and Bernoulli NB. Multinomial NB
     (`http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html`)
@@ -980,6 +988,8 @@ def getModelType(self):
 
 class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     Model fitted by NaiveBayes.
 
     .. versionadded:: 1.5.0
@@ -1006,6 +1016,8 @@ def theta(self):
 class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
                                      HasMaxIter, HasTol, HasSeed, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     Classifier trainer based on the Multilayer Perceptron.
     Each layer has sigmoid activation function, output layer has softmax.
     Number of inputs has to be equal to the size of feature vectors.
@@ -1120,6 +1132,8 @@ def getBlockSize(self):
 
 class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     Model fitted by MultilayerPerceptronClassifier.
 
     .. versionadded:: 1.6.0
@@ -1169,6 +1183,8 @@ def getClassifier(self):
 @inherit_doc
 class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
     """
+    .. note:: Experimental
+
     Reduction of Multiclass Classification to Binary Classification.
     Performs reduction using one against all strategy.
     For a multiclass classification with k classes, train k models (one per class).
@@ -1323,6 +1339,8 @@ def _to_java(self):
 
 class OneVsRestModel(Model, OneVsRestParams, MLReadable, MLWritable):
     """
+    .. note:: Experimental
+
     Model fitted by OneVsRest.
     This stores the models resulting from training k binary classifiers: one for each class.
     Each example is scored against all k models, and the model with the highest score

From 1fa57e5ded4c8e47ac87cbc783184ca71f9ab699 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 5 May 2016 13:30:20 -0700
Subject: [PATCH 02/23] Add the rest

---
 python/pyspark/ml/classification.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 645ebdf722f4..f9d5009be59c 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -49,6 +49,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
                          HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds,
                          HasWeightCol, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     Logistic regression.
     Currently, this class only supports binary classification.
 
@@ -211,6 +213,8 @@ def _checkThresholdConsistency(self):
 
 class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     Model fitted by LogisticRegression.
 
     .. versionadded:: 1.3.0
@@ -492,6 +496,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
                              TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
                              JavaMLReadable):
     """
+    .. note:: Experimental
+
     `http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree`
     learning algorithm for classification.
     It supports both binary and multiclass labels, as well as both continuous and categorical
@@ -586,6 +592,8 @@ def _create_model(self, java_model):
 @inherit_doc
 class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     Model fitted by DecisionTreeClassifier.
 
     .. versionadded:: 1.4.0
@@ -714,6 +722,7 @@ def _create_model(self, java_model):
 class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
     """
     .. note:: Experimental
+
     Model fitted by RandomForestClassifier.
 
     .. versionadded:: 1.4.0
@@ -740,6 +749,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
                     GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
                     JavaMLReadable):
     """
+    .. note:: Experimental
+
     `http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)`
     learning algorithm for classification.
     It supports binary labels, as well as both continuous and categorical features.
@@ -843,6 +854,8 @@ def getLossType(self):
 
 class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
     """
+    .. note:: Experimental
+
     Model fitted by GBTClassifier.
 
     .. versionadded:: 1.4.0

From b1ce81779ef93e5a83ab68a30f1ae5a91e0f38ec Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 5 May 2016 14:51:53 -0700
Subject: [PATCH 03/23] Use mathjax for formula in PyDoc

---
 python/docs/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/docs/conf.py b/python/docs/conf.py
index d35bf73c3051..50fb3175a7dc 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -32,6 +32,7 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
     'epytext',
+    'sphinx.ext.mathjax',
 ]
 
 # Add any paths that contain templates here, relative to this directory.

From 8125c8c6a79cf55a74894a7d2e4efb68a331fcfe Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 5 May 2016 14:52:18 -0700
Subject: [PATCH 04/23] Switch to math highlighting and update legostic
 regresion get doc since it doesn't throw an an error

---
 python/pyspark/ml/classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index f9d5009be59c..bbbcc6de7c18 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -159,7 +159,7 @@ def getThreshold(self):
         """
         Gets the value of threshold or attempt to convert thresholds to threshold if set, or default
         value if neither are set.
-        This conversion is equivalent to: {{{1 / (1 + thresholds(0) / thresholds(1))}}}.
+        This conversion is equivalent to: :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
         """
         self._checkThresholdConsistency()
         if self.isSet(self.thresholds):
@@ -188,7 +188,7 @@ def getThresholds(self):
         If :py:attr:`thresholds` is set, return its value.
         Otherwise, if :py:attr:`threshold` is set, return the equivalent thresholds for binary
         classification: (1-threshold, threshold).
-        If neither are set, throw an error.
+        If neither are set, return the default value.
         """
         self._checkThresholdConsistency()
         if not self.isSet(self.thresholds) and self.isSet(self.threshold):

From c72fa4679dd2fa56e8f590b1b93beaf8c939b523 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 5 May 2016 14:54:30 -0700
Subject: [PATCH 05/23] Long line fix

---
 python/pyspark/ml/classification.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index bbbcc6de7c18..14b1aa518f27 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -159,7 +159,8 @@ def getThreshold(self):
         """
         Gets the value of threshold or attempt to convert thresholds to threshold if set, or default
         value if neither are set.
-        This conversion is equivalent to: :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
+        This conversion is equivalent to:
+        :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
         """
         self._checkThresholdConsistency()
         if self.isSet(self.thresholds):

From 3fd1dce92e123d89273490d7ad0e1d716efcb124 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 5 May 2016 16:40:17 -0700
Subject: [PATCH 06/23] Start adding the missing params to mutli-layer
 perceptron, also investigate how to handle the weights param being none

---
 .../MultilayerPerceptronClassifier.scala      |  6 +-
 python/pyspark/ml/classification.py           | 81 +++++++++++++++++--
 python/pyspark/ml/wrapper.py                  |  5 +-
 3 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 72cf55f6bb99..3906aa542f5a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -75,8 +75,8 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
    * @group expertParam
    */
   final val solver: Param[String] = new Param[String](this, "solver",
-    " Allows setting the solver: minibatch gradient descent (gd) or l-bfgs. " +
-      " l-bfgs is the default one.",
+    "Allows setting the solver: minibatch gradient descent (gd) or l-bfgs. " +
+      "(Default l-bfgs)",
     ParamValidators.inArray[String](Array("gd", "l-bfgs")))
 
   /** @group getParam */
@@ -88,7 +88,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
    * @group expertParam
    */
   final val weights: Param[Vector] = new Param[Vector](this, "weights",
-    " Sets the weights of the model ")
+    "Sets the weights of the model")
 
   /** @group getParam */
   final def getWeights: Vector = $(weights)
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 14b1aa518f27..f377e8b66429 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1028,7 +1028,8 @@ def theta(self):
 
 @inherit_doc
 class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                                     HasMaxIter, HasTol, HasSeed, JavaMLWritable, JavaMLReadable):
+                                     HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable,
+                                     JavaMLReadable):
     """
     .. note:: Experimental
 
@@ -1065,6 +1066,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     >>> mlp2 = MultilayerPerceptronClassifier.load(mlp_path)
     >>> mlp2.getBlockSize()
     1
+    >>> mlp2.getStepSize()
+    0.03
     >>> model_path = temp_path + "/mlp_model"
     >>> model.save(model_path)
     >>> model2 = MultilayerPerceptronClassificationModel.load(model_path)
@@ -1072,6 +1075,12 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     True
     >>> model.weights == model2.weights
     True
+    >>> mlp2.setWeights([
+    ...    2, 5, 1, -7, -5, -10, 0, 0.6, -1, 2, -2, 1, 2, -7, -1, -2, 2, 1, -1, 9, -9, 3, -3, -3,
+    ...    3.0, 0, -1])
+    >>> model3 = mlp2.fit(df)
+    >>> model3.weights[0]
+    2
 
     .. versionadded:: 1.6.0
     """
@@ -1085,28 +1094,38 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
                       "remaining data in a partition then it is adjusted to the size of this " +
                       "data. Recommended size is between 10 and 1000, default is 128.",
                       typeConverter=TypeConverters.toInt)
+    solver = Param(Params._dummy(), "solver", "Allows setting the solver: minibatch gradient " +
+                   "descent (gd) or l-bfgs. (Default l-bfgs)",
+                   typeConverter=TypeConverters.toString)
+    weights = Param(Params._dummy(), "weights", "Sets the weights of the model",
+                    typeConverter=TypeConverters.toVector)
 
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128):
+                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03,
+                 solver="l-bfgs", weights=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128)
+                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03,
+                 solver="l-bfgs", weights=None)
         """
         super(MultilayerPerceptronClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
-        self._setDefault(maxIter=100, tol=1E-4, blockSize=128)
+        self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, solver="l-bfgs",
+                         weights=None)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("1.6.0")
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128):
+                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03,
+                  solver="l-bfgs", weights=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128)
+                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03,
+                  solver="l-bfgs", weights=None)
         Sets params for MultilayerPerceptronClassifier.
         """
         kwargs = self.setParams._input_kwargs
@@ -1143,6 +1162,56 @@ def getBlockSize(self):
         """
         return self.getOrDefault(self.blockSize)
 
+    @since("2.0.0")
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
+
+    @since("2.0.0")
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)
+
+    @since("2.0.0")
+    def setSolver(self, value):
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
+
+    @since("2.0.0")
+    def getSolver(self):
+        """
+        Gets the value of solver or its default value.
+        """
+        return self.getOrDefault(self.solver)
+
+    @property
+    @since("2.0.0")
+    def getOptimizer(self):
+        """
+        Gets the optimizer used.
+        """
+        return self.getSolver()
+
+    @since("2.0.0")
+    def setWeights(self, value):
+        """
+        Sets the value of :py:attr:`weights`.
+        """
+        return self._set(weights=value)
+
+    @since("2.0.0")
+    def getWeights(self):
+        """
+        Gets the value of weights or its default value.
+        """
+        return self.getOrDefault(self.weights)
+
 
 class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index fef0040faf86..cc9a99bfb0e1 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -88,8 +88,9 @@ def _transfer_params_to_java(self):
         paramMap = self.extractParamMap()
         for param in self.params:
             if param in paramMap:
-                pair = self._make_java_param_pair(param, paramMap[param])
-                self._java_obj.set(pair)
+                if paramMap[param] is not None:
+                    pair = self._make_java_param_pair(param, paramMap[param])
+                    self._java_obj.set(pair)
 
     def _transfer_param_map_to_java(self, pyParamMap):
         """

From c7caa43b9da3655b5c28b5dd3b4e9e954f735945 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 5 May 2016 16:49:09 -0700
Subject: [PATCH 07/23] Or wait we just don't need to support None

---
 python/pyspark/ml/classification.py | 3 +--
 python/pyspark/ml/wrapper.py        | 5 ++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index f377e8b66429..344e27949431 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1112,8 +1112,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(MultilayerPerceptronClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
-        self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, solver="l-bfgs",
-                         weights=None)
+        self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, solver="l-bfgs")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index cc9a99bfb0e1..fef0040faf86 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -88,9 +88,8 @@ def _transfer_params_to_java(self):
         paramMap = self.extractParamMap()
         for param in self.params:
             if param in paramMap:
-                if paramMap[param] is not None:
-                    pair = self._make_java_param_pair(param, paramMap[param])
-                    self._java_obj.set(pair)
+                pair = self._make_java_param_pair(param, paramMap[param])
+                self._java_obj.set(pair)
 
     def _transfer_param_map_to_java(self, pyParamMap):
         """

From 4776221984d36e7beee8b8c5da70c1a1b9010815 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 5 May 2016 17:14:36 -0700
Subject: [PATCH 08/23] Update the doc string for weights param and add doctest
 that verifys layers stay same but weights change

---
 .../MultilayerPerceptronClassifier.scala             |  4 ++--
 python/pyspark/ml/classification.py                  | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 3906aa542f5a..af046a095d6a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -88,7 +88,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
    * @group expertParam
    */
   final val weights: Param[Vector] = new Param[Vector](this, "weights",
-    "Sets the weights of the model")
+    "Weights (either initial if before training or actual on model)")
 
   /** @group getParam */
   final def getWeights: Vector = $(weights)
@@ -181,7 +181,7 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
   def setSeed(value: Long): this.type = set(seed, value)
 
   /**
-   * Sets the model weights.
+   * Sets the initial weights used for the optimizer.
    *
    * @group expertParam
    */
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 344e27949431..938d5b236062 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1075,12 +1075,14 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     True
     >>> model.weights == model2.weights
     True
-    >>> mlp2.setWeights([
+    >>> mlp2 = mlp2.setWeights([
     ...    2, 5, 1, -7, -5, -10, 0, 0.6, -1, 2, -2, 1, 2, -7, -1, -2, 2, 1, -1, 9, -9, 3, -3, -3,
     ...    3.0, 0, -1])
     >>> model3 = mlp2.fit(df)
-    >>> model3.weights[0]
-    2
+    >>> model3.weights != model2.weights
+    True
+    >>> model3.layers == model.layers
+    True
 
     .. versionadded:: 1.6.0
     """
@@ -1097,8 +1099,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     solver = Param(Params._dummy(), "solver", "Allows setting the solver: minibatch gradient " +
                    "descent (gd) or l-bfgs. (Default l-bfgs)",
                    typeConverter=TypeConverters.toString)
-    weights = Param(Params._dummy(), "weights", "Sets the weights of the model",
-                    typeConverter=TypeConverters.toVector)
+    weights = Param(Params._dummy(), "weights", "Weights (either initial if before training or " +
+                    "actual on model)", typeConverter=TypeConverters.toVector)
 
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",

From 2397004c8bb6d9482422276188149b274ef5411a Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 10 May 2016 10:38:46 -0700
Subject: [PATCH 09/23] mini fix

---
 python/pyspark/ml/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 30935cefa975..88afbc1c02ea 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1120,7 +1120,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  solver="l-bfgs", weights=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03,
+                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, \
                  solver="l-bfgs", weights=None)
         """
         super(MultilayerPerceptronClassifier, self).__init__()

From a73913b3ea72b067ab9e7f19bdc5821145b003b4 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 10 May 2016 10:40:04 -0700
Subject: [PATCH 10/23] more pydoc fix

---
 python/pyspark/ml/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 88afbc1c02ea..3cd8bb58cfd6 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1137,7 +1137,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   solver="l-bfgs", weights=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03,
+                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, \
                   solver="l-bfgs", weights=None)
         Sets params for MultilayerPerceptronClassifier.
         """

From 9e38ddf6088e0ce7342327b8f1ed83560c8b5a63 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 10 May 2016 11:45:22 -0700
Subject: [PATCH 11/23] Remove flaky doctet component

---
 python/pyspark/ml/classification.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3cd8bb58cfd6..2d26b578a5f7 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1078,8 +1078,6 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     >>> mlp2 = MultilayerPerceptronClassifier.load(mlp_path)
     >>> mlp2.getBlockSize()
     1
-    >>> mlp2.getStepSize()
-    0.03
     >>> model_path = temp_path + "/mlp_model"
     >>> model.save(model_path)
     >>> model2 = MultilayerPerceptronClassificationModel.load(model_path)

From f4df8f087575b6a994cfa4cdaf3b6f0c7e612884 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 10 May 2016 12:42:12 -0700
Subject: [PATCH 12/23] Add a : as requested

---
 .../ml/classification/MultilayerPerceptronClassifier.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index af046a095d6a..39496e0586a3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -76,7 +76,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
    */
   final val solver: Param[String] = new Param[String](this, "solver",
     "Allows setting the solver: minibatch gradient descent (gd) or l-bfgs. " +
-      "(Default l-bfgs)",
+      "(Default: l-bfgs)",
     ParamValidators.inArray[String](Array("gd", "l-bfgs")))
 
   /** @group getParam */

From 2eec9472d879d7136e5d5c0c2931e6b81f1e88a8 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 19 May 2016 13:42:46 -0700
Subject: [PATCH 13/23] Back out some unrelated changes that are in a seperate
 PR anyways

---
 python/pyspark/ml/classification.py | 80 +++--------------------------
 1 file changed, 6 insertions(+), 74 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 66940773b796..3d8261b5d8d5 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1045,8 +1045,7 @@ def theta(self):
 
 @inherit_doc
 class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                                     HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable,
-                                     JavaMLReadable):
+                                     HasMaxIter, HasTol, HasSeed, JavaMLWritable, JavaMLReadable):
     """
     .. note:: Experimental
 
@@ -1090,14 +1089,6 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     True
     >>> model.weights == model2.weights
     True
-    >>> mlp2 = mlp2.setWeights([
-    ...    2, 5, 1, -7, -5, -10, 0, 0.6, -1, 2, -2, 1, 2, -7, -1, -2, 2, 1, -1, 9, -9, 3, -3, -3,
-    ...    3.0, 0, -1])
-    >>> model3 = mlp2.fit(df)
-    >>> model3.weights != model2.weights
-    True
-    >>> model3.layers == model.layers
-    True
 
     .. versionadded:: 1.6.0
     """
@@ -1111,37 +1102,28 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
                       "remaining data in a partition then it is adjusted to the size of this " +
                       "data. Recommended size is between 10 and 1000, default is 128.",
                       typeConverter=TypeConverters.toInt)
-    solver = Param(Params._dummy(), "solver", "Allows setting the solver: minibatch gradient " +
-                   "descent (gd) or l-bfgs. (Default l-bfgs)",
-                   typeConverter=TypeConverters.toString)
-    weights = Param(Params._dummy(), "weights", "Weights (either initial if before training or " +
-                    "actual on model)", typeConverter=TypeConverters.toVector)
 
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03,
-                 solver="l-bfgs", weights=None):
+                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, \
-                 solver="l-bfgs", weights=None)
+                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128)
         """
         super(MultilayerPerceptronClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
-        self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, solver="l-bfgs")
+        self._setDefault(maxIter=100, tol=1E-4, blockSize=128)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("1.6.0")
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03,
-                  solver="l-bfgs", weights=None):
+                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, \
-                  solver="l-bfgs", weights=None)
+                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128)
         Sets params for MultilayerPerceptronClassifier.
         """
         kwargs = self.setParams._input_kwargs
@@ -1178,56 +1160,6 @@ def getBlockSize(self):
         """
         return self.getOrDefault(self.blockSize)
 
-    @since("2.0.0")
-    def setStepSize(self, value):
-        """
-        Sets the value of :py:attr:`stepSize`.
-        """
-        return self._set(stepSize=value)
-
-    @since("2.0.0")
-    def getStepSize(self):
-        """
-        Gets the value of stepSize or its default value.
-        """
-        return self.getOrDefault(self.stepSize)
-
-    @since("2.0.0")
-    def setSolver(self, value):
-        """
-        Sets the value of :py:attr:`solver`.
-        """
-        return self._set(solver=value)
-
-    @since("2.0.0")
-    def getSolver(self):
-        """
-        Gets the value of solver or its default value.
-        """
-        return self.getOrDefault(self.solver)
-
-    @property
-    @since("2.0.0")
-    def getOptimizer(self):
-        """
-        Gets the optimizer used.
-        """
-        return self.getSolver()
-
-    @since("2.0.0")
-    def setWeights(self, value):
-        """
-        Sets the value of :py:attr:`weights`.
-        """
-        return self._set(weights=value)
-
-    @since("2.0.0")
-    def getWeights(self):
-        """
-        Gets the value of weights or its default value.
-        """
-        return self.getOrDefault(self.weights)
-
 
 class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """

From 4111b2d01c33fac3c2537fe9430ce05063530a48 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 26 May 2016 12:29:46 -0700
Subject: [PATCH 14/23] Update scaladoc and PyDoc to both have the correct
 chain for getThreshold (thresholds -> threshold -> threshold default value)

---
 .../spark/ml/classification/LogisticRegression.scala  |  5 ++---
 python/pyspark/ml/classification.py                   | 11 ++++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 0ab4459bdb9d..69c57133689a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -72,10 +72,9 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   /**
    * Get threshold for binary classification.
    *
-   * If [[threshold]] is set, returns that value.
-   * Otherwise, if [[thresholds]] is set with length 2 (i.e., binary classification),
+   * If [[thresholds]] is set with length 2 (i.e., binary classification),
    * this returns the equivalent threshold: {{{1 / (1 + thresholds(0) / thresholds(1))}}}.
-   * Otherwise, returns [[threshold]] default value.
+   * Otherwise, returns [[threshold]] if set, or its default value if unset.
    *
    * @group getParam
    * @throws IllegalArgumentException if [[thresholds]] is set to an array of length other than 2.
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3d8261b5d8d5..ec44f12d2c82 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -99,7 +99,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     threshold = Param(Params._dummy(), "threshold",
                       "Threshold in binary classification prediction, in range [0, 1]." +
                       " If threshold and thresholds are both set, they must match." +
-                      "e.g. threshold must be equal to [1-p, p].",
+                      "e.g. if threshold is p, then thresholds must be equal to [1-p, p].",
                       typeConverter=TypeConverters.toFloat)
 
     @keyword_only
@@ -157,10 +157,11 @@ def setThreshold(self, value):
     @since("1.4.0")
     def getThreshold(self):
         """
-        Gets the value of threshold or attempt to convert thresholds to threshold if set, or default
-        value if neither are set.
-        This conversion is equivalent to:
-        :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
+        Get threshold for binary classification.
+
+        If :py:attr:`thresholds is set with length 2 (i.e., binary classification),
+        this returns the equivalent threshold: :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
+        Otherwise, returns :py:attr:`threshold` if set or its default value.
         """
         self._checkThresholdConsistency()
         if self.isSet(self.thresholds):

From 53ab7906d57f3ce2c954a5b54ff1ba5e97ebd00a Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Thu, 26 May 2016 12:46:19 -0700
Subject: [PATCH 15/23] pep8

---
 python/pyspark/ml/classification.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index ec44f12d2c82..3e0e03bd2aa5 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -160,7 +160,8 @@ def getThreshold(self):
         Get threshold for binary classification.
 
         If :py:attr:`thresholds is set with length 2 (i.e., binary classification),
-        this returns the equivalent threshold: :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
+        this returns the equivalent threshold:
+        :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
         Otherwise, returns :py:attr:`threshold` if set or its default value.
         """
         self._checkThresholdConsistency()

From a7aadec43efbb523545db8806b189872727b786f Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Mon, 6 Jun 2016 13:24:52 -0700
Subject: [PATCH 16/23] Revert doc change

---
 python/pyspark/ml/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 2b6951f0cde0..47e1b5c9afa8 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -191,7 +191,7 @@ def getThresholds(self):
         If :py:attr:`thresholds` is set, return its value.
         Otherwise, if :py:attr:`threshold` is set, return the equivalent thresholds for binary
         classification: (1-threshold, threshold).
-        If neither are set, return the default value.
+        If neither are set, throw an error.
         """
         self._checkThresholdConsistency()
         if not self.isSet(self.thresholds) and self.isSet(self.threshold):

From e4061f4f4c8531c97f31989e1723e44c6170e673 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Mon, 6 Jun 2016 13:27:06 -0700
Subject: [PATCH 17/23] minor fix

---
 python/pyspark/ml/classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 47e1b5c9afa8..ebc945368d61 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -159,10 +159,10 @@ def getThreshold(self):
         """
         Get threshold for binary classification.
 
-        If :py:attr:`thresholds is set with length 2 (i.e., binary classification),
+        If :py:attr:`thresholds` is set with length 2 (i.e., binary classification),
         this returns the equivalent threshold:
         :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
-        Otherwise, returns :py:attr:`threshold` if set or its default value.
+        Otherwise, returns :py:attr:`threshold` if set or its default value if unset.
         """
         self._checkThresholdConsistency()
         if self.isSet(self.thresholds):

From 398161285321c976e850f18b486e512efbe0d24e Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Mon, 13 Jun 2016 19:21:48 -0700
Subject: [PATCH 18/23] oook lets try 86ing mathjax but... welll w/e

---
 python/docs/conf.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/docs/conf.py b/python/docs/conf.py
index 50fb3175a7dc..d35bf73c3051 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -32,7 +32,6 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
     'epytext',
-    'sphinx.ext.mathjax',
 ]
 
 # Add any paths that contain templates here, relative to this directory.

From 3d13c6c662cf18ed0dc390fb355b83c7989370f9 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Mon, 13 Jun 2016 21:27:48 -0700
Subject: [PATCH 19/23] reenable mathjax

---
 python/docs/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/docs/conf.py b/python/docs/conf.py
index d35bf73c3051..50fb3175a7dc 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -32,6 +32,7 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
     'epytext',
+    'sphinx.ext.mathjax',
 ]
 
 # Add any paths that contain templates here, relative to this directory.

From 2be8cdf15d6431c8d2ef0d7a6c5d136e2b71410e Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Mon, 13 Jun 2016 21:27:56 -0700
Subject: [PATCH 20/23] Revert "[SPARK-15745][SQL] Use classloader's
 getResource() for reading resource files in HiveTests" as it was causing
 Jenkins failures.

This reverts commit f7288e166c696da15e790c28fc3ed78531fd362d.
---
 .../org/apache/spark/sql/hive/test/TestHive.scala   | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 1d1d5e3f7bd6..81964db5477c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -179,8 +179,19 @@ private[hive] class TestHiveSparkSession(
   hiveFilesTemp.mkdir()
   ShutdownHookManager.registerShutdownDeleteDir(hiveFilesTemp)
 
+  val inRepoTests = if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
+    new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
+  } else {
+    new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" +
+      File.separator + "resources")
+  }
+
   def getHiveFile(path: String): File = {
-    new File(Thread.currentThread().getContextClassLoader.getResource(path).getFile)
+    val stripped = path.replaceAll("""\.\.\/""", "").replace('/', File.separatorChar)
+    hiveDevHome
+      .map(new File(_, stripped))
+      .filter(_.exists)
+      .getOrElse(new File(inRepoTests, stripped))
   }
 
   val describedTable = "DESCRIBE (\\w+)".r

From 4431daa9552426f02a993778eb5a5f2c2d87b183 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 14 Jun 2016 10:59:27 -0700
Subject: [PATCH 21/23] Support both methods

---
 .../apache/spark/sql/hive/test/TestHive.scala | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 81964db5477c..30b0fb5b0cfc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -179,19 +179,25 @@ private[hive] class TestHiveSparkSession(
   hiveFilesTemp.mkdir()
   ShutdownHookManager.registerShutdownDeleteDir(hiveFilesTemp)
 
-  val inRepoTests = if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
-    new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
-  } else {
-    new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" +
-      File.separator + "resources")
+  lazy val inRepoTests = {
+    if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
+      new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
+    } else {
+      new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" +
+        File.separator + "resources")
+    }
   }
 
   def getHiveFile(path: String): File = {
-    val stripped = path.replaceAll("""\.\.\/""", "").replace('/', File.separatorChar)
-    hiveDevHome
-      .map(new File(_, stripped))
-      .filter(_.exists)
-      .getOrElse(new File(inRepoTests, stripped))
+    // Attempt to load from class loader, fall back to old system property based.
+    val resourcePath = Option(Thread.currentThread().getContextClassLoader.getResource(path))
+    resourcePath.map(rp => new File(rp.getFile)).getOrElse{
+      val stripped = path.replaceAll("""\.\.\/""", "").replace('/', File.separatorChar)
+        hiveDevHome
+        .map(new File(_, stripped))
+        .filter(_.exists)
+        .getOrElse(new File(inRepoTests, stripped))
+    }
   }
 
   val describedTable = "DESCRIBE (\\w+)".r

From d842309c749b817bdacb6a57bccba74cc7c0fbf4 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 21 Jun 2016 10:32:57 -0700
Subject: [PATCH 22/23] Revert "Support both methods"

This reverts commit 4431daa9552426f02a993778eb5a5f2c2d87b183.
---
 .../apache/spark/sql/hive/test/TestHive.scala | 26 +++++++------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 30b0fb5b0cfc..81964db5477c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -179,25 +179,19 @@ private[hive] class TestHiveSparkSession(
   hiveFilesTemp.mkdir()
   ShutdownHookManager.registerShutdownDeleteDir(hiveFilesTemp)
 
-  lazy val inRepoTests = {
-    if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
-      new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
-    } else {
-      new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" +
-        File.separator + "resources")
-    }
+  val inRepoTests = if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
+    new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
+  } else {
+    new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" +
+      File.separator + "resources")
   }
 
   def getHiveFile(path: String): File = {
-    // Attempt to load from class loader, fall back to old system property based.
-    val resourcePath = Option(Thread.currentThread().getContextClassLoader.getResource(path))
-    resourcePath.map(rp => new File(rp.getFile)).getOrElse{
-      val stripped = path.replaceAll("""\.\.\/""", "").replace('/', File.separatorChar)
-        hiveDevHome
-        .map(new File(_, stripped))
-        .filter(_.exists)
-        .getOrElse(new File(inRepoTests, stripped))
-    }
+    val stripped = path.replaceAll("""\.\.\/""", "").replace('/', File.separatorChar)
+    hiveDevHome
+      .map(new File(_, stripped))
+      .filter(_.exists)
+      .getOrElse(new File(inRepoTests, stripped))
   }
 
   val describedTable = "DESCRIBE (\\w+)".r

From de63f9f7451ae6e527fe383537a2afbdf273449a Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 21 Jun 2016 11:47:08 -0700
Subject: [PATCH 23/23] Revert "Revert "[SPARK-15745][SQL] Use classloader's
 getResource() for reading resource files in HiveTests" as it was causing
 Jenkins failures."

This reverts commit 2be8cdf15d6431c8d2ef0d7a6c5d136e2b71410e.
---
 .../org/apache/spark/sql/hive/test/TestHive.scala   | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 81964db5477c..1d1d5e3f7bd6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -179,19 +179,8 @@ private[hive] class TestHiveSparkSession(
   hiveFilesTemp.mkdir()
   ShutdownHookManager.registerShutdownDeleteDir(hiveFilesTemp)
 
-  val inRepoTests = if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
-    new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
-  } else {
-    new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" +
-      File.separator + "resources")
-  }
-
   def getHiveFile(path: String): File = {
-    val stripped = path.replaceAll("""\.\.\/""", "").replace('/', File.separatorChar)
-    hiveDevHome
-      .map(new File(_, stripped))
-      .filter(_.exists)
-      .getOrElse(new File(inRepoTests, stripped))
+    new File(Thread.currentThread().getContextClassLoader.getResource(path).getFile)
   }
 
   val describedTable = "DESCRIBE (\\w+)".r