apache · yanboliang · May 7, 2017 · May 9, 2017 · May 9, 2017 · May 9, 2017
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
@@ -246,18 +246,55 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
                    "be used in the model. Supported options: auto, binomial, multinomial",
                    typeConverter=TypeConverters.toString)
 
+    lowerBoundsOnCoefficients = Param(Params._dummy(), "lowerBoundsOnCoefficients",
+                                      "The lower bounds on coefficients if fitting under bound "
+                                      "constrained optimization. The bound matrix must be "
+                                      "compatible with the shape "
+                                      "(1, number of features) for binomial regression, or "
+                                      "(number of classes, number of features) "
+                                      "for multinomial regression.",
+                                      typeConverter=TypeConverters.toMatrix)
+
+    upperBoundsOnCoefficients = Param(Params._dummy(), "upperBoundsOnCoefficients",
+                                      "The upper bounds on coefficients if fitting under bound "
+                                      "constrained optimization. The bound matrix must be "
+                                      "compatible with the shape "
+                                      "(1, number of features) for binomial regression, or "
+                                      "(number of classes, number of features) "
+                                      "for multinomial regression.",
+                                      typeConverter=TypeConverters.toMatrix)
+
+    lowerBoundsOnIntercepts = Param(Params._dummy(), "lowerBoundsOnIntercepts",
+                                    "The lower bounds on intercepts if fitting under bound "
+                                    "constrained optimization. The bounds vector size must be"
+                                    "equal with 1 for binomial regression, or the number of"
+                                    "lasses for multinomial regression.",
+                                    typeConverter=TypeConverters.toVector)
+
+    upperBoundsOnIntercepts = Param(Params._dummy(), "upperBoundsOnIntercepts",
+                                    "The upper bounds on intercepts if fitting under bound "
+                                    "constrained optimization. The bound vector size must be "
+                                    "equal with 1 for binomial regression, or the number of "
+                                    "classes for multinomial regression.",
+                                    typeConverter=TypeConverters.toVector)
+
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                  threshold=0.5, thresholds=None, probabilityCol="probability",
                  rawPredictionCol="rawPrediction", standardization=True, weightCol=None,
-                 aggregationDepth=2, family="auto"):
+                 aggregationDepth=2, family="auto",
+                 lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None,
+                 lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None):
+
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  threshold=0.5, thresholds=None, probabilityCol="probability", \
                  rawPredictionCol="rawPrediction", standardization=True, weightCol=None, \
-                 aggregationDepth=2, family="auto")
+                 aggregationDepth=2, family="auto", \
+                 lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None, \
+                 lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None):
         If the threshold and thresholds Params are both set, they must be equivalent.
         """
         super(LogisticRegression, self).__init__()
@@ -274,13 +311,17 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                   threshold=0.5, thresholds=None, probabilityCol="probability",
                   rawPredictionCol="rawPrediction", standardization=True, weightCol=None,
-                  aggregationDepth=2, family="auto"):
+                  aggregationDepth=2, family="auto",
+                  lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None,
+                  lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                   threshold=0.5, thresholds=None, probabilityCol="probability", \
                   rawPredictionCol="rawPrediction", standardization=True, weightCol=None, \
-                  aggregationDepth=2, family="auto")
+                  aggregationDepth=2, family="auto", \
+                  lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None, \
+                  lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None):
         Sets params for logistic regression.
         If the threshold and thresholds Params are both set, they must be equivalent.
         """
@@ -375,6 +416,62 @@ def getFamily(self):
         """
         return self.getOrDefault(self.family)
 
+    @since("2.3.0")
+    def setLowerBoundsOnCoefficients(self, value):
+        """
+        Sets the value of :py:attr:`lowerBoundsOnCoefficients`
+        """
+        return self._set(lowerBoundsOnCoefficients=value)
+
+    @since("2.3.0")
+    def getLowerBoundsOnCoefficients(self):
+        """
+        Gets the value of :py:attr:`lowerBoundsOnCoefficients`
+        """
+        return self.getOrDefault(self.lowerBoundsOnCoefficients)
+
+    @since("2.3.0")
+    def setUpperBoundsOnCoefficients(self, value):
+        """
+        Sets the value of :py:attr:`upperBoundsOnCoefficients`
+        """
+        return self._set(upperBoundsOnCoefficients=value)
+
+    @since("2.3.0")
+    def getUpperBoundsOnCoefficients(self):
+        """
+        Gets the value of :py:attr:`upperBoundsOnCoefficients`
+        """
+        return self.getOrDefault(self.upperBoundsOnCoefficients)
+
+    @since("2.3.0")
+    def setLowerBoundsOnIntercepts(self, value):
+        """
+        Sets the value of :py:attr:`lowerBoundsOnIntercepts`
+        """
+        return self._set(lowerBoundsOnIntercepts=value)
+
+    @since("2.3.0")
+    def getLowerBoundsOnIntercepts(self):
+        """
+        Gets the value of :py:attr:`lowerBoundsOnIntercepts`
+        """
+        return self.getOrDefault(self.lowerBoundsOnIntercepts)
+
+    @since("2.3.0")
+    def setUpperBoundsOnIntercepts(self, value):
+        """
+        Sets the value of :py:attr:`upperBoundsOnIntercepts`
+        """
+        return self._set(upperBoundsOnIntercepts=value)
+
+    @since("2.3.0")
+    def getUpperBoundsOnIntercepts(self):
+        """
+        Gets the value of :py:attr:`upperBoundsOnIntercepts`
+        """
+        return self.getOrDefault(self.upperBoundsOnIntercepts)
+
 
 class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
     """

diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
@@ -27,7 +27,7 @@
 
 from py4j.java_gateway import JavaObject
 
-from pyspark.ml.linalg import DenseVector, Vector
+from pyspark.ml.linalg import DenseVector, Vector, Matrix
 from pyspark.ml.util import Identifiable
 
 
@@ -169,6 +169,15 @@ def toVector(value):
                 return DenseVector(value)
         raise TypeError("Could not convert %s to vector" % value)
 
+    @staticmethod
+    def toMatrix(value):
+        """
+        Convert a value to a MLlib Matrix, if possible.
+        """
+        if isinstance(value, Matrix):
+            return value
+        raise TypeError("Could not convert %s to matrix" % value)
+
     @staticmethod
     def toFloat(value):
         """

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
@@ -1271,6 +1271,43 @@ def test_tweedie_distribution(self):
         self.assertTrue(np.isclose(model2.intercept, 0.6667, atol=1E-4))
 
 
+class LogisticRegressionTest(SparkSessionTestCase):
+
+    def test_binomial_logistic_regression_with_bound(self):
+
+        df = self.spark.createDataFrame(
+            [(1.0, 1.0, Vectors.dense(0.0, 5.0)),
+             (0.0, 2.0, Vectors.dense(1.0, 2.0)),
+             (1.0, 3.0, Vectors.dense(2.0, 1.0)),
+             (0.0, 4.0, Vectors.dense(3.0, 3.0)), ], ["label", "weight", "features"])
+
+        lor = LogisticRegression(regParam=0.01, weightCol="weight",
+                                 lowerBoundsOnCoefficients=Matrices.dense(1, 2, [-1.0, -1.0]),
+                                 upperBoundsOnIntercepts=Vectors.dense(0.0))
+        model = lor.fit(df)
+        self.assertTrue(
+            np.allclose(model.coefficients.toArray(), [-0.2944, -0.0484], atol=1E-4))
+        self.assertTrue(np.isclose(model.intercept, 0.0, atol=1E-4))
+
+    def test_multinomial_logistic_regression_with_bound(self):
+
+        data_path = "data/mllib/sample_multiclass_classification_data.txt"
+        df = self.spark.read.format("libsvm").load(data_path)
+
+        lor = LogisticRegression(regParam=0.01,
+                                 lowerBoundsOnCoefficients=Matrices.dense(3, 4, range(12)),
+                                 upperBoundsOnIntercepts=Vectors.dense(0.0, 0.0, 0.0))
+        model = lor.fit(df)
+        expected = [[4.593, 4.5516, 9.0099, 12.2904],
+                    [1.0, 8.1093, 7.0, 10.0],
+                    [3.041, 5.0, 8.0, 11.0]]
+        for i in range(0, len(expected)):
+            self.assertTrue(
+                np.allclose(model.coefficientMatrix.toArray()[i], expected[i], atol=1E-4))
+        self.assertTrue(
+            np.allclose(model.interceptVector.toArray(), [-0.9057, -1.1392, -0.0033], atol=1E-4))
+
+
 class FPGrowthTests(SparkSessionTestCase):
     def setUp(self):
         super(FPGrowthTests, self).setUp()