Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ private[ml] trait FactorizationMachinesParams extends PredictorParams
"The solver algorithm for optimization. Supported options: " +
s"${supportedSolvers.mkString(", ")}. (Default adamW)",
ParamValidators.inArray[String](supportedSolvers))

setDefault(factorSize -> 8, fitIntercept -> true, fitLinear -> true, regParam -> 0.0,
miniBatchFraction -> 1.0, initStd -> 0.01, maxIter -> 100, stepSize -> 1.0, tol -> 1E-6,
solver -> AdamW)
}

private[ml] trait FactorizationMachines extends FactorizationMachinesParams {
Expand Down Expand Up @@ -308,7 +312,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setFactorSize(value: Int): this.type = set(factorSize, value)
setDefault(factorSize -> 8)

/**
* Set whether to fit intercept term.
Expand All @@ -318,7 +321,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
setDefault(fitIntercept -> true)

/**
* Set whether to fit linear term.
Expand All @@ -328,7 +330,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setFitLinear(value: Boolean): this.type = set(fitLinear, value)
setDefault(fitLinear -> true)

/**
* Set the L2 regularization parameter.
Expand All @@ -338,7 +339,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setRegParam(value: Double): this.type = set(regParam, value)
setDefault(regParam -> 0.0)

/**
* Set the mini-batch fraction parameter.
Expand All @@ -348,7 +348,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setMiniBatchFraction(value: Double): this.type = set(miniBatchFraction, value)
setDefault(miniBatchFraction -> 1.0)

/**
* Set the standard deviation of initial coefficients.
Expand All @@ -358,7 +357,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setInitStd(value: Double): this.type = set(initStd, value)
setDefault(initStd -> 0.01)

/**
* Set the maximum number of iterations.
Expand All @@ -368,7 +366,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
setDefault(maxIter -> 100)

/**
* Set the initial step size for the first step (like learning rate).
Expand All @@ -378,7 +375,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setStepSize(value: Double): this.type = set(stepSize, value)
setDefault(stepSize -> 1.0)

/**
* Set the convergence tolerance of iterations.
Expand All @@ -388,7 +384,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setTol(value: Double): this.type = set(tol, value)
setDefault(tol -> 1E-6)

/**
* Set the solver algorithm used for optimization.
Expand All @@ -399,7 +394,6 @@ class FMRegressor @Since("3.0.0") (
*/
@Since("3.0.0")
def setSolver(value: String): this.type = set(solver, value)
setDefault(solver -> AdamW)

/**
* Set the random seed for weight initialization.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
s"${supportedSolvers.mkString(", ")}. (Default irls)",
ParamValidators.inArray[String](supportedSolvers))

setDefault(family -> Gaussian.name, variancePower -> 0.0, maxIter -> 25, tol -> 1E-6,
regParam -> 0.0, solver -> IRLS)

@Since("2.0.0")
override def validateAndTransformSchema(
schema: StructType,
Expand Down Expand Up @@ -257,7 +260,6 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
*/
@Since("2.0.0")
def setFamily(value: String): this.type = set(family, value)
setDefault(family -> Gaussian.name)

/**
* Sets the value of param [[variancePower]].
Expand All @@ -268,7 +270,6 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
*/
@Since("2.2.0")
def setVariancePower(value: Double): this.type = set(variancePower, value)
setDefault(variancePower -> 0.0)

/**
* Sets the value of param [[linkPower]].
Expand Down Expand Up @@ -305,7 +306,6 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
*/
@Since("2.0.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
setDefault(maxIter -> 25)

/**
* Sets the convergence tolerance of iterations.
Expand All @@ -316,7 +316,6 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
*/
@Since("2.0.0")
def setTol(value: Double): this.type = set(tol, value)
setDefault(tol -> 1E-6)

/**
* Sets the regularization parameter for L2 regularization.
Expand All @@ -332,7 +331,6 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
*/
@Since("2.0.0")
def setRegParam(value: Double): this.type = set(regParam, value)
setDefault(regParam -> 0.0)

/**
* Sets the value of param [[weightCol]].
Expand Down Expand Up @@ -364,7 +362,6 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
*/
@Since("2.0.0")
def setSolver(value: String): this.type = set(solver, value)
setDefault(solver -> IRLS)

/**
* Sets the link prediction (linear predictor) column name.
Expand Down
8 changes: 4 additions & 4 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2421,6 +2421,10 @@ class _MultilayerPerceptronParams(_ProbabilisticClassifierParams, HasSeed, HasMa
initialWeights = Param(Params._dummy(), "initialWeights", "The initial weights of the model.",
typeConverter=TypeConverters.toVector)

def __init__(self):
super(_MultilayerPerceptronParams, self).__init__()
self._setDefault(maxIter=100, tol=1E-6, blockSize=128, stepSize=0.03, solver="l-bfgs")

@since("1.6.0")
def getLayers(self):
"""
Expand Down Expand Up @@ -2524,7 +2528,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
super(MultilayerPerceptronClassifier, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
self._setDefault(maxIter=100, tol=1E-6, blockSize=128, stepSize=0.03, solver="l-bfgs")
kwargs = self._input_kwargs
self.setParams(**kwargs)

Expand Down Expand Up @@ -3120,9 +3123,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
super(FMClassifier, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.classification.FMClassifier", self.uid)
self._setDefault(factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
tol=1e-6, solver="adamW")
kwargs = self._input_kwargs
self.setParams(**kwargs)

Expand Down
16 changes: 11 additions & 5 deletions python/pyspark/ml/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1891,6 +1891,11 @@ class _GeneralizedLinearRegressionParams(_PredictorParams, HasFitIntercept, HasM
"or empty, we treat all instance offsets as 0.0",
typeConverter=TypeConverters.toString)

def __init__(self):
super(_GeneralizedLinearRegressionParams, self).__init__()
self._setDefault(family="gaussian", maxIter=25, tol=1e-6, regParam=0.0, solver="irls",
variancePower=0.0, aggregationDepth=2)

@since("2.0.0")
def getFamily(self):
"""
Expand Down Expand Up @@ -2023,8 +2028,6 @@ def __init__(self, labelCol="label", featuresCol="features", predictionCol="pred
super(GeneralizedLinearRegression, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.regression.GeneralizedLinearRegression", self.uid)
self._setDefault(family="gaussian", maxIter=25, tol=1e-6, regParam=0.0, solver="irls",
variancePower=0.0, aggregationDepth=2)
kwargs = self._input_kwargs

self.setParams(**kwargs)
Expand Down Expand Up @@ -2398,6 +2401,12 @@ class _FactorizationMachinesParams(_PredictorParams, HasMaxIter, HasStepSize, Ha
solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
"options: gd, adamW. (Default adamW)", typeConverter=TypeConverters.toString)

def __init__(self):
super(_FactorizationMachinesParams, self).__init__()
self._setDefault(factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
tol=1e-6, solver="adamW")

@since("3.0.0")
def getFactorSize(self):
"""
Expand Down Expand Up @@ -2489,9 +2498,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
super(FMRegressor, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.regression.FMRegressor", self.uid)
self._setDefault(factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
tol=1e-6, solver="adamW")
kwargs = self._input_kwargs
self.setParams(**kwargs)

Expand Down
65 changes: 62 additions & 3 deletions python/pyspark/ml/tests/test_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,78 @@
import unittest

from pyspark.ml import Transformer
from pyspark.ml.classification import DecisionTreeClassifier, LogisticRegression, OneVsRest, \
OneVsRestModel
from pyspark.ml.classification import DecisionTreeClassifier, FMClassifier, \
FMClassificationModel, LogisticRegression, MultilayerPerceptronClassifier, \
MultilayerPerceptronClassificationModel, OneVsRest, OneVsRestModel
from pyspark.ml.clustering import KMeans
from pyspark.ml.feature import Binarizer, HashingTF, PCA
from pyspark.ml.linalg import Vectors
from pyspark.ml.param import Params
from pyspark.ml.pipeline import Pipeline, PipelineModel
from pyspark.ml.regression import DecisionTreeRegressor, LinearRegression
from pyspark.ml.regression import DecisionTreeRegressor, GeneralizedLinearRegression, \
GeneralizedLinearRegressionModel, \
LinearRegression
from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWriter
from pyspark.ml.wrapper import JavaParams
from pyspark.testing.mlutils import MockUnaryTransformer, SparkSessionTestCase


class TestDefaultSolver(SparkSessionTestCase):

def test_multilayer_load(self):
df = self.spark.createDataFrame([(0.0, Vectors.dense([0.0, 0.0])),
(1.0, Vectors.dense([0.0, 1.0])),
(1.0, Vectors.dense([1.0, 0.0])),
(0.0, Vectors.dense([1.0, 1.0]))],
["label", "features"])

mlp = MultilayerPerceptronClassifier(layers=[2, 2, 2], seed=123)
model = mlp.fit(df)
self.assertEqual(model.getSolver(), "l-bfgs")
transformed1 = model.transform(df)
path = tempfile.mkdtemp()
model_path = path + "/mlp"
model.save(model_path)
model2 = MultilayerPerceptronClassificationModel.load(model_path)
self.assertEqual(model2.getSolver(), "l-bfgs")
transformed2 = model2.transform(df)
self.assertEqual(transformed1.take(4), transformed2.take(4))

def test_fm_load(self):
df = self.spark.createDataFrame([(1.0, Vectors.dense(1.0)),
(0.0, Vectors.sparse(1, [], []))],
["label", "features"])
fm = FMClassifier(factorSize=2, maxIter=50, stepSize=2.0)
model = fm.fit(df)
self.assertEqual(model.getSolver(), "adamW")
transformed1 = model.transform(df)
path = tempfile.mkdtemp()
model_path = path + "/fm"
model.save(model_path)
model2 = FMClassificationModel.load(model_path)
self.assertEqual(model2.getSolver(), "adamW")
transformed2 = model2.transform(df)
self.assertEqual(transformed1.take(2), transformed2.take(2))

def test_glr_load(self):
df = self.spark.createDataFrame([(1.0, Vectors.dense(0.0, 0.0)),
(1.0, Vectors.dense(1.0, 2.0)),
(2.0, Vectors.dense(0.0, 0.0)),
(2.0, Vectors.dense(1.0, 1.0))],
["label", "features"])
glr = GeneralizedLinearRegression(family="gaussian", link="identity", linkPredictionCol="p")
model = glr.fit(df)
self.assertEqual(model.getSolver(), "irls")
transformed1 = model.transform(df)
path = tempfile.mkdtemp()
model_path = path + "/glr"
model.save(model_path)
model2 = GeneralizedLinearRegressionModel.load(model_path)
self.assertEqual(model2.getSolver(), "irls")
transformed2 = model2.transform(df)
self.assertEqual(transformed1.take(4), transformed2.take(4))


class PersistenceTest(SparkSessionTestCase):

def test_linear_regression(self):
Expand Down