diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala index 6ef42500f86f7..2f2b29908b0e1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala @@ -85,7 +85,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setFactorSize(value: Int): this.type = set(factorSize, value) - setDefault(factorSize -> 8) /** * Set whether to fit intercept term. @@ -95,7 +94,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) - setDefault(fitIntercept -> true) /** * Set whether to fit linear term. @@ -105,7 +103,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setFitLinear(value: Boolean): this.type = set(fitLinear, value) - setDefault(fitLinear -> true) /** * Set the L2 regularization parameter. @@ -115,7 +112,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setRegParam(value: Double): this.type = set(regParam, value) - setDefault(regParam -> 0.0) /** * Set the mini-batch fraction parameter. @@ -125,7 +121,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setMiniBatchFraction(value: Double): this.type = set(miniBatchFraction, value) - setDefault(miniBatchFraction -> 1.0) /** * Set the standard deviation of initial coefficients. @@ -135,7 +130,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setInitStd(value: Double): this.type = set(initStd, value) - setDefault(initStd -> 0.01) /** * Set the maximum number of iterations. @@ -145,7 +139,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setMaxIter(value: Int): this.type = set(maxIter, value) - setDefault(maxIter -> 100) /** * Set the initial step size for the first step (like learning rate). @@ -155,7 +148,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setStepSize(value: Double): this.type = set(stepSize, value) - setDefault(stepSize -> 1.0) /** * Set the convergence tolerance of iterations. @@ -165,7 +157,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setTol(value: Double): this.type = set(tol, value) - setDefault(tol -> 1E-6) /** * Set the solver algorithm used for optimization. @@ -176,7 +167,6 @@ class FMClassifier @Since("3.0.0") ( */ @Since("3.0.0") def setSolver(value: String): this.type = set(solver, value) - setDefault(solver -> AdamW) /** * Set the random seed for weight initialization. diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index 4adc527c89b36..77272c65eb231 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -55,6 +55,9 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR */ final override val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in binary classification prediction applied to rawPrediction") + + setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6, + standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSize -> 1) } /** @@ -82,7 +85,6 @@ class LinearSVC @Since("2.2.0") ( */ @Since("2.2.0") def setRegParam(value: Double): this.type = set(regParam, value) - setDefault(regParam -> 0.0) /** * Set the maximum number of iterations. @@ -92,7 +94,6 @@ class LinearSVC @Since("2.2.0") ( */ @Since("2.2.0") def setMaxIter(value: Int): this.type = set(maxIter, value) - setDefault(maxIter -> 100) /** * Whether to fit an intercept term. @@ -102,7 +103,6 @@ class LinearSVC @Since("2.2.0") ( */ @Since("2.2.0") def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) - setDefault(fitIntercept -> true) /** * Set the convergence tolerance of iterations. @@ -113,7 +113,6 @@ class LinearSVC @Since("2.2.0") ( */ @Since("2.2.0") def setTol(value: Double): this.type = set(tol, value) - setDefault(tol -> 1E-6) /** * Whether to standardize the training features before fitting the model. @@ -123,7 +122,6 @@ class LinearSVC @Since("2.2.0") ( */ @Since("2.2.0") def setStandardization(value: Boolean): this.type = set(standardization, value) - setDefault(standardization -> true) /** * Set the value of param [[weightCol]]. @@ -142,7 +140,6 @@ class LinearSVC @Since("2.2.0") ( */ @Since("2.2.0") def setThreshold(value: Double): this.type = set(threshold, value) - setDefault(threshold -> 0.0) /** * Suggested depth for treeAggregate (greater than or equal to 2). @@ -154,7 +151,6 @@ class LinearSVC @Since("2.2.0") ( */ @Since("2.2.0") def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) - setDefault(aggregationDepth -> 2) /** * Set block size for stacking input data in matrices. @@ -173,7 +169,6 @@ class LinearSVC @Since("2.2.0") ( */ @Since("3.1.0") def setBlockSize(value: Int): this.type = set(blockSize, value) - setDefault(blockSize -> 1) @Since("2.2.0") override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra) @@ -381,7 +376,6 @@ class LinearSVCModel private[classification] ( @Since("2.2.0") def setThreshold(value: Double): this.type = set(threshold, value) - setDefault(threshold, 0.0) private val margin: Vector => Double = (features) => { BLAS.dot(features, coefficients) + intercept diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 47b3e2de7695c..0e3708bfe9a68 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -243,6 +243,10 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas @Since("2.2.0") def getUpperBoundsOnIntercepts: Vector = $(upperBoundsOnIntercepts) + setDefault(regParam -> 0.0, elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6, + fitIntercept -> true, family -> "auto", standardization -> true, threshold -> 0.5, + aggregationDepth -> 2, blockSize -> 1) + protected def usingBoundConstrainedOptimization: Boolean = { isSet(lowerBoundsOnCoefficients) || isSet(upperBoundsOnCoefficients) || isSet(lowerBoundsOnIntercepts) || isSet(upperBoundsOnIntercepts) @@ -290,7 +294,6 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("1.2.0") def setRegParam(value: Double): this.type = set(regParam, value) - setDefault(regParam -> 0.0) /** * Set the ElasticNet mixing parameter. @@ -306,7 +309,6 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("1.4.0") def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) - setDefault(elasticNetParam -> 0.0) /** * Set the maximum number of iterations. @@ -316,7 +318,6 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("1.2.0") def setMaxIter(value: Int): this.type = set(maxIter, value) - setDefault(maxIter -> 100) /** * Set the convergence tolerance of iterations. @@ -327,7 +328,6 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("1.4.0") def setTol(value: Double): this.type = set(tol, value) - setDefault(tol -> 1E-6) /** * Whether to fit an intercept term. @@ -337,7 +337,6 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("1.4.0") def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) - setDefault(fitIntercept -> true) /** * Sets the value of param [[family]]. @@ -347,7 +346,6 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("2.1.0") def setFamily(value: String): this.type = set(family, value) - setDefault(family -> "auto") /** * Whether to standardize the training features before fitting the model. @@ -361,11 +359,9 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("1.5.0") def setStandardization(value: Boolean): this.type = set(standardization, value) - setDefault(standardization -> true) @Since("1.5.0") override def setThreshold(value: Double): this.type = super.setThreshold(value) - setDefault(threshold -> 0.5) @Since("1.5.0") override def getThreshold: Double = super.getThreshold @@ -396,7 +392,6 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("2.1.0") def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) - setDefault(aggregationDepth -> 2) /** * Set the lower bounds on coefficients if fitting under bound constrained optimization. @@ -447,7 +442,6 @@ class LogisticRegression @Since("1.2.0") ( */ @Since("3.1.0") def setBlockSize(value: Int): this.type = set(blockSize, value) - setDefault(blockSize -> 1) private def assertBoundConstrainedOptimizationParamsValid( numCoefficientSets: Int, diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala index e65295dbdaf55..6b1537bcc5069 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala @@ -64,6 +64,8 @@ private[classification] trait NaiveBayesParams extends PredictorParams with HasW /** @group getParam */ final def getModelType: String = $(modelType) + + setDefault(smoothing -> 1.0, modelType -> NaiveBayes.Multinomial) } // scalastyle:off line.size.limit @@ -107,7 +109,6 @@ class NaiveBayes @Since("1.5.0") ( */ @Since("1.5.0") def setSmoothing(value: Double): this.type = set(smoothing, value) - setDefault(smoothing -> 1.0) /** * Set the model type using a string (case-sensitive). @@ -117,7 +118,6 @@ class NaiveBayes @Since("1.5.0") ( */ @Since("1.5.0") def setModelType(value: String): this.type = set(modelType, value) - setDefault(modelType -> Multinomial) /** * Sets the value of param [[weightCol]]. diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index b09f11dcfe156..5a60bed2652f7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -72,6 +72,8 @@ private[clustering] trait BisectingKMeansParams extends Params with HasMaxIter @Since("2.0.0") def getMinDivisibleClusterSize: Double = $(minDivisibleClusterSize) + setDefault(k -> 4, maxIter -> 20, minDivisibleClusterSize -> 1.0) + /** * Validates and transforms the input schema. * @param schema input schema @@ -226,11 +228,6 @@ class BisectingKMeans @Since("2.0.0") ( @Since("2.0.0") override val uid: String) extends Estimator[BisectingKMeansModel] with BisectingKMeansParams with DefaultParamsWritable { - setDefault( - k -> 4, - maxIter -> 20, - minDivisibleClusterSize -> 1.0) - @Since("2.0.0") override def copy(extra: ParamMap): BisectingKMeans = defaultCopy(extra) diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index 18fd220b4ca9c..996036e2d6330 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -59,6 +59,8 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w @Since("2.0.0") def getK: Int = $(k) + setDefault(k -> 2, maxIter -> 100, tol -> 0.01, blockSize -> 1) + /** * Validates and transforms the input schema. * @@ -328,11 +330,6 @@ class GaussianMixture @Since("2.0.0") ( @Since("2.0.0") override val uid: String) extends Estimator[GaussianMixtureModel] with GaussianMixtureParams with DefaultParamsWritable { - setDefault( - k -> 2, - maxIter -> 100, - tol -> 0.01) - @Since("2.0.0") override def copy(extra: ParamMap): GaussianMixture = defaultCopy(extra) @@ -392,7 +389,6 @@ class GaussianMixture @Since("2.0.0") ( */ @Since("3.1.0") def setBlockSize(value: Int): this.type = set(blockSize, value) - setDefault(blockSize -> 1) /** * Number of samples per cluster to use when initializing Gaussians. diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 806015b633c23..5c06973e618bd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -87,6 +87,9 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe @Since("1.5.0") def getInitSteps: Int = $(initSteps) + setDefault(k -> 2, maxIter -> 20, initMode -> MLlibKMeans.K_MEANS_PARALLEL, initSteps -> 2, + tol -> 1e-4, distanceMeasure -> DistanceMeasure.EUCLIDEAN) + /** * Validates and transforms the input schema. * @param schema input schema @@ -271,14 +274,6 @@ class KMeans @Since("1.5.0") ( @Since("1.5.0") override val uid: String) extends Estimator[KMeansModel] with KMeansParams with DefaultParamsWritable { - setDefault( - k -> 2, - maxIter -> 20, - initMode -> MLlibKMeans.K_MEANS_PARALLEL, - initSteps -> 2, - tol -> 1e-4, - distanceMeasure -> DistanceMeasure.EUCLIDEAN) - @Since("1.5.0") override def copy(extra: ParamMap): KMeans = defaultCopy(extra) diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index 4e86b3b247ace..c1b76fb40b2f6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -199,8 +199,6 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM " with estimates of the topic mixture distribution for each document (often called \"theta\"" + " in the literature). Returns a vector of zeros for an empty document.") - setDefault(topicDistributionCol -> "topicDistribution") - /** @group getParam */ @Since("1.6.0") def getTopicDistributionCol: String = $(topicDistributionCol) @@ -315,6 +313,11 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM @Since("2.0.0") def getKeepLastCheckpoint: Boolean = $(keepLastCheckpoint) + setDefault(maxIter -> 20, k -> 10, optimizer -> "online", checkpointInterval -> 10, + learningOffset -> 1024, learningDecay -> 0.51, subsamplingRate -> 0.05, + optimizeDocConcentration -> true, keepLastCheckpoint -> true, + topicDistributionCol -> "topicDistribution") + /** * Validates and transforms the input schema. * @@ -863,10 +866,6 @@ class LDA @Since("1.6.0") ( @Since("1.6.0") def this() = this(Identifiable.randomUID("lda")) - setDefault(maxIter -> 20, k -> 10, optimizer -> "online", checkpointInterval -> 10, - learningOffset -> 1024, learningDecay -> 0.51, subsamplingRate -> 0.05, - optimizeDocConcentration -> true, keepLastCheckpoint -> true) - /** * The features for LDA should be a `Vector` representing the word counts in a document. * The vector should be of length vocabSize, with counts for each term (word). diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala index 812a426a062c1..1466b32bef530 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala @@ -91,7 +91,7 @@ private[clustering] trait PowerIterationClusteringParams extends Params with Has @Since("2.4.0") def getDstCol: String = $(dstCol) - setDefault(srcCol -> "src", dstCol -> "dst") + setDefault(srcCol -> "src", dstCol -> "dst", k -> 2, maxIter -> 20, initMode -> "random") } /** @@ -111,11 +111,6 @@ class PowerIterationClustering private[clustering] ( @Since("2.4.0") override val uid: String) extends PowerIterationClusteringParams with DefaultParamsWritable { - setDefault( - k -> 2, - maxIter -> 20, - initMode -> "random") - @Since("2.4.0") def this() = this(Identifiable.randomUID("PowerIterationClustering")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala index 52be22f714981..93b66f3ab7007 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala @@ -81,8 +81,6 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va @Since("3.0.0") def setNumBins(value: Int): this.type = set(numBins, value) - setDefault(numBins -> 1000) - /** @group setParam */ @Since("1.5.0") def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value) @@ -95,7 +93,7 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va @Since("3.0.0") def setWeightCol(value: String): this.type = set(weightCol, value) - setDefault(metricName -> "areaUnderROC") + setDefault(metricName -> "areaUnderROC", numBins -> 1000) @Since("2.0.0") override def evaluate(dataset: Dataset[_]): Double = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala index 3d77792c4fc88..beeefde8c5fac 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala @@ -64,8 +64,6 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid @Since("1.5.0") def setMetricName(value: String): this.type = set(metricName, value) - setDefault(metricName -> "f1") - /** @group setParam */ @Since("1.5.0") def setPredictionCol(value: String): this.type = set(predictionCol, value) @@ -105,8 +103,6 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid @Since("3.0.0") def setMetricLabel(value: Double): this.type = set(metricLabel, value) - setDefault(metricLabel -> 0.0) - /** * The beta value, which controls precision vs recall weighting, * used in `"weightedFMeasure"`, `"fMeasureByLabel"`. @@ -128,8 +124,6 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid @Since("3.0.0") def setBeta(value: Double): this.type = set(beta, value) - setDefault(beta -> 1.0) - /** * param for eps. log-loss is undefined for p=0 or p=1, so probabilities are clipped to * max(eps, min(1 - eps, p)). Must be in range (0, 0.5). The default value is 1e-15. @@ -150,7 +144,7 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid @Since("3.0.0") def setEps(value: Double): this.type = set(eps, value) - setDefault(eps -> 1e-15) + setDefault(metricName -> "f1", eps -> 1e-15, metricLabel -> 0.0, beta -> 1.0) @Since("2.0.0") override def evaluate(dataset: Dataset[_]): Double = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala index 1a82ac7a9472f..8ed26502407a8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala @@ -65,8 +65,6 @@ class MultilabelClassificationEvaluator @Since("3.0.0") (@Since("3.0.0") overrid @Since("3.0.0") def setMetricName(value: String): this.type = set(metricName, value) - setDefault(metricName -> "f1Measure") - /** * param for the class whose metric will be computed in `"precisionByLabel"`, `"recallByLabel"`, * `"f1MeasureByLabel"`. @@ -86,8 +84,6 @@ class MultilabelClassificationEvaluator @Since("3.0.0") (@Since("3.0.0") overrid /** @group setParam */ def setMetricLabel(value: Double): this.type = set(metricLabel, value) - setDefault(metricLabel -> 0.0) - /** @group setParam */ @Since("3.0.0") def setPredictionCol(value: String): this.type = set(predictionCol, value) @@ -96,6 +92,8 @@ class MultilabelClassificationEvaluator @Since("3.0.0") (@Since("3.0.0") overrid @Since("3.0.0") def setLabelCol(value: String): this.type = set(labelCol, value) + setDefault(metricLabel -> 0.0, metricName -> "f1Measure") + @Since("3.0.0") override def evaluate(dataset: Dataset[_]): Double = { val metrics = getMetrics(dataset) diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala index 82dda4109771d..01fb0599160b0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala @@ -61,8 +61,6 @@ class RankingEvaluator @Since("3.0.0") (@Since("3.0.0") override val uid: String @Since("3.0.0") def setMetricName(value: String): this.type = set(metricName, value) - setDefault(metricName -> "meanAveragePrecision") - /** * param for ranking position value used in `"meanAveragePrecisionAtK"`, `"precisionAtK"`, * `"ndcgAtK"`, `"recallAtK"`. Must be > 0. The default value is 10. @@ -83,8 +81,6 @@ class RankingEvaluator @Since("3.0.0") (@Since("3.0.0") override val uid: String @Since("3.0.0") def setK(value: Int): this.type = set(k, value) - setDefault(k -> 10) - /** @group setParam */ @Since("3.0.0") def setPredictionCol(value: String): this.type = set(predictionCol, value) @@ -93,6 +89,8 @@ class RankingEvaluator @Since("3.0.0") (@Since("3.0.0") override val uid: String @Since("3.0.0") def setLabelCol(value: String): this.type = set(labelCol, value) + setDefault(k -> 10, metricName -> "meanAveragePrecision") + @Since("3.0.0") override def evaluate(dataset: Dataset[_]): Double = { val metrics = getMetrics(dataset) diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala index f0b7c345c3285..902869cc681b8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala @@ -80,8 +80,6 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui @Since("3.0.0") def setThroughOrigin(value: Boolean): this.type = set(throughOrigin, value) - setDefault(throughOrigin -> false) - /** @group setParam */ @Since("1.4.0") def setPredictionCol(value: String): this.type = set(predictionCol, value) @@ -94,7 +92,7 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui @Since("3.0.0") def setWeightCol(value: String): this.type = set(weightCol, value) - setDefault(metricName -> "rmse") + setDefault(metricName -> "rmse", throughOrigin -> false) @Since("2.0.0") override def evaluate(dataset: Dataset[_]): Double = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala index f1a68edaed950..7aab4ef62c4d9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala @@ -50,7 +50,6 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol { */ @Since("2.2.0") val itemsCol: Param[String] = new Param[String](this, "itemsCol", "items column name") - setDefault(itemsCol -> "items") /** @group getParam */ @Since("2.2.0") @@ -66,7 +65,6 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol { val minSupport: DoubleParam = new DoubleParam(this, "minSupport", "the minimal support level of a frequent pattern", ParamValidators.inRange(0.0, 1.0)) - setDefault(minSupport -> 0.3) /** @group getParam */ @Since("2.2.0") @@ -95,12 +93,13 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol { val minConfidence: DoubleParam = new DoubleParam(this, "minConfidence", "minimal confidence for generating Association Rule", ParamValidators.inRange(0.0, 1.0)) - setDefault(minConfidence -> 0.8) /** @group getParam */ @Since("2.2.0") def getMinConfidence: Double = $(minConfidence) + setDefault(minSupport -> 0.3, itemsCol -> "items", minConfidence -> 0.8) + /** * Validates and transforms the input schema. * @param schema input schema diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 2c30e44b93467..f301c349a2dc7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -62,7 +62,6 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams /** @group getParam */ @Since("1.6.0") def getCensorCol: String = $(censorCol) - setDefault(censorCol -> "censor") /** * Param for quantile probabilities array. @@ -78,7 +77,6 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams /** @group getParam */ @Since("1.6.0") def getQuantileProbabilities: Array[Double] = $(quantileProbabilities) - setDefault(quantileProbabilities -> Array(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99)) /** * Param for quantiles column name. @@ -92,6 +90,10 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams @Since("1.6.0") def getQuantilesCol: String = $(quantilesCol) + setDefault(censorCol -> "censor", + quantileProbabilities -> Array(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99), + fitIntercept -> true, maxIter -> 100, tol -> 1E-6, aggregationDepth -> 2, blockSize -> 1) + /** Checks whether the input has quantiles column name. */ private[regression] def hasQuantilesCol: Boolean = { isDefined(quantilesCol) && $(quantilesCol).nonEmpty @@ -153,7 +155,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S */ @Since("1.6.0") def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) - setDefault(fitIntercept -> true) /** * Set the maximum number of iterations. @@ -162,7 +163,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S */ @Since("1.6.0") def setMaxIter(value: Int): this.type = set(maxIter, value) - setDefault(maxIter -> 100) /** * Set the convergence tolerance of iterations. @@ -172,7 +172,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S */ @Since("1.6.0") def setTol(value: Double): this.type = set(tol, value) - setDefault(tol -> 1E-6) /** * Suggested depth for treeAggregate (greater than or equal to 2). @@ -183,7 +182,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S */ @Since("2.1.0") def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) - setDefault(aggregationDepth -> 2) /** * Set block size for stacking input data in matrices. @@ -202,7 +200,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S */ @Since("3.1.0") def setBlockSize(value: Int): this.type = set(blockSize, value) - setDefault(blockSize -> 1) /** * Extract [[featuresCol]], [[labelCol]] and [[censorCol]] from input dataset, diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index de559142a9261..235a7f9b6ebd5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -105,6 +105,10 @@ private[regression] trait LinearRegressionParams extends PredictorParams @Since("2.3.0") def getEpsilon: Double = $(epsilon) + setDefault(regParam -> 0.0, fitIntercept -> true, standardization -> true, + elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6, solver -> Auto, + aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35, blockSize -> 1) + override protected def validateAndTransformSchema( schema: StructType, fitting: Boolean, @@ -191,7 +195,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("1.3.0") def setRegParam(value: Double): this.type = set(regParam, value) - setDefault(regParam -> 0.0) /** * Set if we should fit the intercept. @@ -201,7 +204,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("1.5.0") def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) - setDefault(fitIntercept -> true) /** * Whether to standardize the training features before fitting the model. @@ -217,7 +219,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("1.5.0") def setStandardization(value: Boolean): this.type = set(standardization, value) - setDefault(standardization -> true) /** * Set the ElasticNet mixing parameter. @@ -233,7 +234,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("1.4.0") def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) - setDefault(elasticNetParam -> 0.0) /** * Set the maximum number of iterations. @@ -243,7 +243,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("1.3.0") def setMaxIter(value: Int): this.type = set(maxIter, value) - setDefault(maxIter -> 100) /** * Set the convergence tolerance of iterations. @@ -254,7 +253,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("1.4.0") def setTol(value: Double): this.type = set(tol, value) - setDefault(tol -> 1E-6) /** * Whether to over-/under-sample training instances according to the given weights in weightCol. @@ -283,7 +281,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("1.6.0") def setSolver(value: String): this.type = set(solver, value) - setDefault(solver -> Auto) /** * Suggested depth for treeAggregate (greater than or equal to 2). @@ -295,7 +292,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("2.1.0") def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) - setDefault(aggregationDepth -> 2) /** * Sets the value of param [[loss]]. @@ -305,7 +301,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("2.3.0") def setLoss(value: String): this.type = set(loss, value) - setDefault(loss -> SquaredError) /** * Sets the value of param [[epsilon]]. @@ -315,7 +310,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("2.3.0") def setEpsilon(value: Double): this.type = set(epsilon, value) - setDefault(epsilon -> 1.35) /** * Set block size for stacking input data in matrices. @@ -334,7 +328,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String */ @Since("3.1.0") def setBlockSize(value: Int): this.type = set(blockSize, value) - setDefault(blockSize -> 1) override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr => instr.logPipelineStage(this) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index cc8ce0567bd7f..463dbee9a3b77 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -512,6 +512,12 @@ class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitInterce " all predictions 0.0 and -Inf will make all predictions 1.0.", typeConverter=TypeConverters.toFloat) + def __init__(self): + super(_LinearSVCParams, self).__init__() + self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True, + standardization=True, threshold=0.0, aggregationDepth=2, + blockSize=1) + @inherit_doc class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadable): @@ -598,9 +604,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(LinearSVC, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.LinearSVC", self.uid) - self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True, - standardization=True, threshold=0.0, aggregationDepth=2, - blockSize=1) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -816,6 +819,11 @@ class _LogisticRegressionParams(_ProbabilisticClassifierParams, HasRegParam, "classes for multinomial regression.", typeConverter=TypeConverters.toVector) + def __init__(self): + super(_LogisticRegressionParams, self).__init__() + self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto", + blockSize=1) + @since("1.4.0") def setThreshold(self, value): """ @@ -1037,8 +1045,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(LogisticRegression, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.LogisticRegression", self.uid) - self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto", - blockSize=1) kwargs = self._input_kwargs self.setParams(**kwargs) self._checkThresholdConsistency() @@ -1305,7 +1311,12 @@ class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams): """ Params for :py:class:`DecisionTreeClassifier` and :py:class:`DecisionTreeClassificationModel`. """ - pass + + def __init__(self): + super(_DecisionTreeClassifierParams, self).__init__() + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + impurity="gini", leafCol="", minWeightFractionPerNode=0.0) @inherit_doc @@ -1404,9 +1415,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(DecisionTreeClassifier, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.DecisionTreeClassifier", self.uid) - self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, - maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, - impurity="gini", leafCol="", minWeightFractionPerNode=0.0) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -1540,7 +1548,14 @@ class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams): """ Params for :py:class:`RandomForestClassifier` and :py:class:`RandomForestClassificationModel`. """ - pass + + def __init__(self): + super(_RandomForestClassifierParams, self).__init__() + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + impurity="gini", numTrees=20, featureSubsetStrategy="auto", + subsamplingRate=1.0, leafCol="", minWeightFractionPerNode=0.0, + bootstrap=True) @inherit_doc @@ -1634,11 +1649,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(RandomForestClassifier, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.RandomForestClassifier", self.uid) - self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, - maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, - impurity="gini", numTrees=20, featureSubsetStrategy="auto", - subsamplingRate=1.0, leafCol="", minWeightFractionPerNode=0.0, - bootstrap=True) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -1882,6 +1892,14 @@ class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity): "Supported options: " + ", ".join(supportedLossTypes), typeConverter=TypeConverters.toString) + def __init__(self): + super(_GBTClassifierParams, self).__init__() + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0, + impurity="variance", featureSubsetStrategy="all", validationTol=0.01, + leafCol="", minWeightFractionPerNode=0.0) + @since("1.4.0") def getLossType(self): """ @@ -2009,11 +2027,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(GBTClassifier, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.GBTClassifier", self.uid) - self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, - maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, - lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0, - impurity="variance", featureSubsetStrategy="all", validationTol=0.01, - leafCol="", minWeightFractionPerNode=0.0) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -2211,6 +2224,10 @@ class _NaiveBayesParams(_PredictorParams, HasWeightCol): "and gaussian.", typeConverter=TypeConverters.toString) + def __init__(self): + super(_NaiveBayesParams, self).__init__() + self._setDefault(smoothing=1.0, modelType="multinomial") + @since("1.5.0") def getSmoothing(self): """ @@ -2329,7 +2346,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(NaiveBayes, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.NaiveBayes", self.uid) - self._setDefault(smoothing=1.0, modelType="multinomial") kwargs = self._input_kwargs self.setParams(**kwargs) diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 54a184bc081ee..6ca413d696368 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -109,6 +109,10 @@ class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC k = Param(Params._dummy(), "k", "Number of independent Gaussians in the mixture model. " + "Must be > 1.", typeConverter=TypeConverters.toInt) + def __init__(self): + super(_GaussianMixtureParams, self).__init__() + self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2, blockSize=1) + @since("2.0.0") def getK(self): """ @@ -339,7 +343,6 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2, super(GaussianMixture, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.GaussianMixture", self.uid) - self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2, blockSize=1) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -500,6 +503,11 @@ class _KMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasTo initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " + "initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt) + def __init__(self): + super(_KMeansParams, self).__init__() + self._setDefault(k=2, initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, + distanceMeasure="euclidean") + @since("1.5.0") def getK(self): """ @@ -644,8 +652,6 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2, """ super(KMeans, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid) - self._setDefault(k=2, initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, - distanceMeasure="euclidean") kwargs = self._input_kwargs self.setParams(**kwargs) @@ -754,6 +760,10 @@ class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC "proportion of points (if < 1.0) of a divisible cluster.", typeConverter=TypeConverters.toFloat) + def __init__(self): + super(_BisectingKMeansParams, self).__init__() + self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0) + @since("2.0.0") def getK(self): """ @@ -920,7 +930,6 @@ def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=2 super(BisectingKMeans, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.BisectingKMeans", self.uid) - self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -1063,6 +1072,13 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval): " partition is lost, so set this bit with care.", TypeConverters.toBoolean) + def __init__(self): + super(_LDAParams, self).__init__() + self._setDefault(maxIter=20, checkpointInterval=10, + k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51, + subsamplingRate=0.05, optimizeDocConcentration=True, + topicDistributionCol="topicDistribution", keepLastCheckpoint=True) + @since("2.0.0") def getK(self): """ @@ -1392,10 +1408,6 @@ def __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInte """ super(LDA, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.LDA", self.uid) - self._setDefault(maxIter=20, checkpointInterval=10, - k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51, - subsamplingRate=0.05, optimizeDocConcentration=True, - topicDistributionCol="topicDistribution", keepLastCheckpoint=True) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -1588,6 +1600,10 @@ class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol): "Name of the input column for destination vertex IDs.", typeConverter=TypeConverters.toString) + def __init__(self): + super(_PowerIterationClusteringParams, self).__init__() + self._setDefault(k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst") + @since("2.4.0") def getK(self): """ @@ -1675,7 +1691,6 @@ def __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst super(PowerIterationClustering, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.clustering.PowerIterationClustering", self.uid) - self._setDefault(k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst") kwargs = self._input_kwargs self.setParams(**kwargs) diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py index 7a5591f3fbf76..7a564df356f1f 100644 --- a/python/pyspark/ml/fpm.py +++ b/python/pyspark/ml/fpm.py @@ -56,6 +56,11 @@ class _FPGrowthParams(HasPredictionCol): "but will affect the association rules generation.", typeConverter=TypeConverters.toFloat) + def __init__(self): + super(_FPGrowthParams, self).__init__() + self._setDefault(minSupport=0.3, minConfidence=0.8, + itemsCol="items", predictionCol="prediction") + def getItemsCol(self): """ Gets the value of itemsCol or its default value. @@ -206,8 +211,6 @@ def __init__(self, minSupport=0.3, minConfidence=0.8, itemsCol="items", """ super(FPGrowth, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.fpm.FPGrowth", self.uid) - self._setDefault(minSupport=0.3, minConfidence=0.8, - itemsCol="items", predictionCol="prediction") kwargs = self._input_kwargs self.setParams(**kwargs) diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index e82a35c8e78f1..6fe6486c5a04a 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -104,6 +104,11 @@ class _LinearRegressionParams(_PredictorParams, HasRegParam, HasElasticNetParam, "robustness. Must be > 1.0. Only valid when loss is huber", typeConverter=TypeConverters.toFloat) + def __init__(self): + super(_LinearRegressionParams, self).__init__() + self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, loss="squaredError", epsilon=1.35, + blockSize=1) + @since("2.3.0") def getEpsilon(self): """ @@ -206,8 +211,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(LinearRegression, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.regression.LinearRegression", self.uid) - self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, loss="squaredError", epsilon=1.35, - blockSize=1) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -619,6 +622,10 @@ class _IsotonicRegressionParams(HasFeaturesCol, HasLabelCol, HasPredictionCol, H "The index of the feature if featuresCol is a vector column, no effect otherwise.", typeConverter=TypeConverters.toInt) + def __init__(self): + super(_IsotonicRegressionParams, self).__init__() + self._setDefault(isotonic=True, featureIndex=0) + def getIsotonic(self): """ Gets the value of isotonic or its default value. @@ -681,7 +688,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(IsotonicRegression, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.regression.IsotonicRegression", self.uid) - self._setDefault(isotonic=True, featureIndex=0) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -808,7 +814,11 @@ class _DecisionTreeRegressorParams(_DecisionTreeParams, _TreeRegressorParams, Ha .. versionadded:: 3.0.0 """ - pass + def __init__(self): + super(_DecisionTreeRegressorParams, self).__init__() + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + impurity="variance", leafCol="", minWeightFractionPerNode=0.0) @inherit_doc @@ -895,9 +905,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(DecisionTreeRegressor, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.regression.DecisionTreeRegressor", self.uid) - self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, - maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, - impurity="variance", leafCol="", minWeightFractionPerNode=0.0) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -1052,7 +1059,14 @@ class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams): .. versionadded:: 3.0.0 """ - pass + + def __init__(self): + super(_RandomForestRegressorParams, self).__init__() + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + impurity="variance", subsamplingRate=1.0, numTrees=20, + featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0, + bootstrap=True) @inherit_doc @@ -1135,11 +1149,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(RandomForestRegressor, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.regression.RandomForestRegressor", self.uid) - self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, - maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, - impurity="variance", subsamplingRate=1.0, numTrees=20, - featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0, - bootstrap=True) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -1310,6 +1319,14 @@ class _GBTRegressorParams(_GBTParams, _TreeRegressorParams): "Supported options: " + ", ".join(supportedLossTypes), typeConverter=TypeConverters.toString) + def __init__(self): + super(_GBTRegressorParams, self).__init__() + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, + checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, + impurity="variance", featureSubsetStrategy="all", validationTol=0.01, + leafCol="", minWeightFractionPerNode=0.0) + @since("1.4.0") def getLossType(self): """ @@ -1407,11 +1424,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred """ super(GBTRegressor, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid) - self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, - maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, - checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, - impurity="variance", featureSubsetStrategy="all", validationTol=0.01, - leafCol="", minWeightFractionPerNode=0.0) kwargs = self._input_kwargs self.setParams(**kwargs) @@ -1630,6 +1642,12 @@ class _AFTSurvivalRegressionParams(_PredictorParams, HasMaxIter, HasTol, HasFitI "corresponding quantileProbabilities if it is set.", typeConverter=TypeConverters.toString) + def __init__(self): + super(_AFTSurvivalRegressionParams, self).__init__() + self._setDefault(censorCol="censor", + quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], + maxIter=100, tol=1E-6, blockSize=1) + @since("1.6.0") def getCensorCol(self): """ @@ -1722,9 +1740,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred super(AFTSurvivalRegression, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.regression.AFTSurvivalRegression", self.uid) - self._setDefault(censorCol="censor", - quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], - maxIter=100, tol=1E-6, blockSize=1) kwargs = self._input_kwargs self.setParams(**kwargs)