diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
index 6ef42500f86f7..2f2b29908b0e1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
@@ -85,7 +85,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setFactorSize(value: Int): this.type = set(factorSize, value)
-  setDefault(factorSize -> 8)
 
   /**
    * Set whether to fit intercept term.
@@ -95,7 +94,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
-  setDefault(fitIntercept -> true)
 
   /**
    * Set whether to fit linear term.
@@ -105,7 +103,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setFitLinear(value: Boolean): this.type = set(fitLinear, value)
-  setDefault(fitLinear -> true)
 
   /**
    * Set the L2 regularization parameter.
@@ -115,7 +112,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setRegParam(value: Double): this.type = set(regParam, value)
-  setDefault(regParam -> 0.0)
 
   /**
    * Set the mini-batch fraction parameter.
@@ -125,7 +121,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setMiniBatchFraction(value: Double): this.type = set(miniBatchFraction, value)
-  setDefault(miniBatchFraction -> 1.0)
 
   /**
    * Set the standard deviation of initial coefficients.
@@ -135,7 +130,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setInitStd(value: Double): this.type = set(initStd, value)
-  setDefault(initStd -> 0.01)
 
   /**
    * Set the maximum number of iterations.
@@ -145,7 +139,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setMaxIter(value: Int): this.type = set(maxIter, value)
-  setDefault(maxIter -> 100)
 
   /**
    * Set the initial step size for the first step (like learning rate).
@@ -155,7 +148,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setStepSize(value: Double): this.type = set(stepSize, value)
-  setDefault(stepSize -> 1.0)
 
   /**
    * Set the convergence tolerance of iterations.
@@ -165,7 +157,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setTol(value: Double): this.type = set(tol, value)
-  setDefault(tol -> 1E-6)
 
   /**
    * Set the solver algorithm used for optimization.
@@ -176,7 +167,6 @@ class FMClassifier @Since("3.0.0") (
    */
   @Since("3.0.0")
   def setSolver(value: String): this.type = set(solver, value)
-  setDefault(solver -> AdamW)
 
   /**
    * Set the random seed for weight initialization.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 4adc527c89b36..77272c65eb231 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -55,6 +55,9 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
    */
   final override val threshold: DoubleParam = new DoubleParam(this, "threshold",
     "threshold in binary classification prediction applied to rawPrediction")
+
+  setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6,
+    standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSize -> 1)
 }
 
 /**
@@ -82,7 +85,6 @@ class LinearSVC @Since("2.2.0") (
    */
   @Since("2.2.0")
   def setRegParam(value: Double): this.type = set(regParam, value)
-  setDefault(regParam -> 0.0)
 
   /**
    * Set the maximum number of iterations.
@@ -92,7 +94,6 @@ class LinearSVC @Since("2.2.0") (
    */
   @Since("2.2.0")
   def setMaxIter(value: Int): this.type = set(maxIter, value)
-  setDefault(maxIter -> 100)
 
   /**
    * Whether to fit an intercept term.
@@ -102,7 +103,6 @@ class LinearSVC @Since("2.2.0") (
    */
   @Since("2.2.0")
   def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
-  setDefault(fitIntercept -> true)
 
   /**
    * Set the convergence tolerance of iterations.
@@ -113,7 +113,6 @@ class LinearSVC @Since("2.2.0") (
    */
   @Since("2.2.0")
   def setTol(value: Double): this.type = set(tol, value)
-  setDefault(tol -> 1E-6)
 
   /**
    * Whether to standardize the training features before fitting the model.
@@ -123,7 +122,6 @@ class LinearSVC @Since("2.2.0") (
    */
   @Since("2.2.0")
   def setStandardization(value: Boolean): this.type = set(standardization, value)
-  setDefault(standardization -> true)
 
   /**
    * Set the value of param [[weightCol]].
@@ -142,7 +140,6 @@ class LinearSVC @Since("2.2.0") (
    */
   @Since("2.2.0")
   def setThreshold(value: Double): this.type = set(threshold, value)
-  setDefault(threshold -> 0.0)
 
   /**
    * Suggested depth for treeAggregate (greater than or equal to 2).
@@ -154,7 +151,6 @@ class LinearSVC @Since("2.2.0") (
    */
   @Since("2.2.0")
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
-  setDefault(aggregationDepth -> 2)
 
   /**
    * Set block size for stacking input data in matrices.
@@ -173,7 +169,6 @@ class LinearSVC @Since("2.2.0") (
    */
   @Since("3.1.0")
   def setBlockSize(value: Int): this.type = set(blockSize, value)
-  setDefault(blockSize -> 1)
 
   @Since("2.2.0")
   override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
@@ -381,7 +376,6 @@ class LinearSVCModel private[classification] (
 
   @Since("2.2.0")
   def setThreshold(value: Double): this.type = set(threshold, value)
-  setDefault(threshold, 0.0)
 
   private val margin: Vector => Double = (features) => {
     BLAS.dot(features, coefficients) + intercept
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 47b3e2de7695c..0e3708bfe9a68 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -243,6 +243,10 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   @Since("2.2.0")
   def getUpperBoundsOnIntercepts: Vector = $(upperBoundsOnIntercepts)
 
+  setDefault(regParam -> 0.0, elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6,
+    fitIntercept -> true, family -> "auto", standardization -> true, threshold -> 0.5,
+    aggregationDepth -> 2, blockSize -> 1)
+
   protected def usingBoundConstrainedOptimization: Boolean = {
     isSet(lowerBoundsOnCoefficients) || isSet(upperBoundsOnCoefficients) ||
       isSet(lowerBoundsOnIntercepts) || isSet(upperBoundsOnIntercepts)
@@ -290,7 +294,6 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("1.2.0")
   def setRegParam(value: Double): this.type = set(regParam, value)
-  setDefault(regParam -> 0.0)
 
   /**
    * Set the ElasticNet mixing parameter.
@@ -306,7 +309,6 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("1.4.0")
   def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
-  setDefault(elasticNetParam -> 0.0)
 
   /**
    * Set the maximum number of iterations.
@@ -316,7 +318,6 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("1.2.0")
   def setMaxIter(value: Int): this.type = set(maxIter, value)
-  setDefault(maxIter -> 100)
 
   /**
    * Set the convergence tolerance of iterations.
@@ -327,7 +328,6 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("1.4.0")
   def setTol(value: Double): this.type = set(tol, value)
-  setDefault(tol -> 1E-6)
 
   /**
    * Whether to fit an intercept term.
@@ -337,7 +337,6 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("1.4.0")
   def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
-  setDefault(fitIntercept -> true)
 
   /**
    * Sets the value of param [[family]].
@@ -347,7 +346,6 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("2.1.0")
   def setFamily(value: String): this.type = set(family, value)
-  setDefault(family -> "auto")
 
   /**
    * Whether to standardize the training features before fitting the model.
@@ -361,11 +359,9 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("1.5.0")
   def setStandardization(value: Boolean): this.type = set(standardization, value)
-  setDefault(standardization -> true)
 
   @Since("1.5.0")
   override def setThreshold(value: Double): this.type = super.setThreshold(value)
-  setDefault(threshold -> 0.5)
 
   @Since("1.5.0")
   override def getThreshold: Double = super.getThreshold
@@ -396,7 +392,6 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("2.1.0")
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
-  setDefault(aggregationDepth -> 2)
 
   /**
    * Set the lower bounds on coefficients if fitting under bound constrained optimization.
@@ -447,7 +442,6 @@ class LogisticRegression @Since("1.2.0") (
    */
   @Since("3.1.0")
   def setBlockSize(value: Int): this.type = set(blockSize, value)
-  setDefault(blockSize -> 1)
 
   private def assertBoundConstrainedOptimizationParamsValid(
       numCoefficientSets: Int,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index e65295dbdaf55..6b1537bcc5069 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -64,6 +64,8 @@ private[classification] trait NaiveBayesParams extends PredictorParams with HasW
 
   /** @group getParam */
   final def getModelType: String = $(modelType)
+
+  setDefault(smoothing -> 1.0, modelType -> NaiveBayes.Multinomial)
 }
 
 // scalastyle:off line.size.limit
@@ -107,7 +109,6 @@ class NaiveBayes @Since("1.5.0") (
    */
   @Since("1.5.0")
   def setSmoothing(value: Double): this.type = set(smoothing, value)
-  setDefault(smoothing -> 1.0)
 
   /**
    * Set the model type using a string (case-sensitive).
@@ -117,7 +118,6 @@ class NaiveBayes @Since("1.5.0") (
    */
   @Since("1.5.0")
   def setModelType(value: String): this.type = set(modelType, value)
-  setDefault(modelType -> Multinomial)
 
   /**
    * Sets the value of param [[weightCol]].
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index b09f11dcfe156..5a60bed2652f7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -72,6 +72,8 @@ private[clustering] trait BisectingKMeansParams extends Params with HasMaxIter
   @Since("2.0.0")
   def getMinDivisibleClusterSize: Double = $(minDivisibleClusterSize)
 
+  setDefault(k -> 4, maxIter -> 20, minDivisibleClusterSize -> 1.0)
+
   /**
    * Validates and transforms the input schema.
    * @param schema input schema
@@ -226,11 +228,6 @@ class BisectingKMeans @Since("2.0.0") (
     @Since("2.0.0") override val uid: String)
   extends Estimator[BisectingKMeansModel] with BisectingKMeansParams with DefaultParamsWritable {
 
-  setDefault(
-    k -> 4,
-    maxIter -> 20,
-    minDivisibleClusterSize -> 1.0)
-
   @Since("2.0.0")
   override def copy(extra: ParamMap): BisectingKMeans = defaultCopy(extra)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 18fd220b4ca9c..996036e2d6330 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -59,6 +59,8 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
   @Since("2.0.0")
   def getK: Int = $(k)
 
+  setDefault(k -> 2, maxIter -> 100, tol -> 0.01, blockSize -> 1)
+
   /**
    * Validates and transforms the input schema.
    *
@@ -328,11 +330,6 @@ class GaussianMixture @Since("2.0.0") (
     @Since("2.0.0") override val uid: String)
   extends Estimator[GaussianMixtureModel] with GaussianMixtureParams with DefaultParamsWritable {
 
-  setDefault(
-    k -> 2,
-    maxIter -> 100,
-    tol -> 0.01)
-
   @Since("2.0.0")
   override def copy(extra: ParamMap): GaussianMixture = defaultCopy(extra)
 
@@ -392,7 +389,6 @@ class GaussianMixture @Since("2.0.0") (
    */
   @Since("3.1.0")
   def setBlockSize(value: Int): this.type = set(blockSize, value)
-  setDefault(blockSize -> 1)
 
   /**
    * Number of samples per cluster to use when initializing Gaussians.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 806015b633c23..5c06973e618bd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -87,6 +87,9 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
   @Since("1.5.0")
   def getInitSteps: Int = $(initSteps)
 
+  setDefault(k -> 2, maxIter -> 20, initMode -> MLlibKMeans.K_MEANS_PARALLEL, initSteps -> 2,
+    tol -> 1e-4, distanceMeasure -> DistanceMeasure.EUCLIDEAN)
+
   /**
    * Validates and transforms the input schema.
    * @param schema input schema
@@ -271,14 +274,6 @@ class KMeans @Since("1.5.0") (
     @Since("1.5.0") override val uid: String)
   extends Estimator[KMeansModel] with KMeansParams with DefaultParamsWritable {
 
-  setDefault(
-    k -> 2,
-    maxIter -> 20,
-    initMode -> MLlibKMeans.K_MEANS_PARALLEL,
-    initSteps -> 2,
-    tol -> 1e-4,
-    distanceMeasure -> DistanceMeasure.EUCLIDEAN)
-
   @Since("1.5.0")
   override def copy(extra: ParamMap): KMeans = defaultCopy(extra)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 4e86b3b247ace..c1b76fb40b2f6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -199,8 +199,6 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
     " with estimates of the topic mixture distribution for each document (often called \"theta\"" +
     " in the literature).  Returns a vector of zeros for an empty document.")
 
-  setDefault(topicDistributionCol -> "topicDistribution")
-
   /** @group getParam */
   @Since("1.6.0")
   def getTopicDistributionCol: String = $(topicDistributionCol)
@@ -315,6 +313,11 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
   @Since("2.0.0")
   def getKeepLastCheckpoint: Boolean = $(keepLastCheckpoint)
 
+  setDefault(maxIter -> 20, k -> 10, optimizer -> "online", checkpointInterval -> 10,
+    learningOffset -> 1024, learningDecay -> 0.51, subsamplingRate -> 0.05,
+    optimizeDocConcentration -> true, keepLastCheckpoint -> true,
+    topicDistributionCol -> "topicDistribution")
+
   /**
    * Validates and transforms the input schema.
    *
@@ -863,10 +866,6 @@ class LDA @Since("1.6.0") (
   @Since("1.6.0")
   def this() = this(Identifiable.randomUID("lda"))
 
-  setDefault(maxIter -> 20, k -> 10, optimizer -> "online", checkpointInterval -> 10,
-    learningOffset -> 1024, learningDecay -> 0.51, subsamplingRate -> 0.05,
-    optimizeDocConcentration -> true, keepLastCheckpoint -> true)
-
   /**
    * The features for LDA should be a `Vector` representing the word counts in a document.
    * The vector should be of length vocabSize, with counts for each term (word).
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
index 812a426a062c1..1466b32bef530 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
@@ -91,7 +91,7 @@ private[clustering] trait PowerIterationClusteringParams extends Params with Has
   @Since("2.4.0")
   def getDstCol: String = $(dstCol)
 
-  setDefault(srcCol -> "src", dstCol -> "dst")
+  setDefault(srcCol -> "src", dstCol -> "dst", k -> 2, maxIter -> 20, initMode -> "random")
 }
 
 /**
@@ -111,11 +111,6 @@ class PowerIterationClustering private[clustering] (
     @Since("2.4.0") override val uid: String)
   extends PowerIterationClusteringParams with DefaultParamsWritable {
 
-  setDefault(
-    k -> 2,
-    maxIter -> 20,
-    initMode -> "random")
-
   @Since("2.4.0")
   def this() = this(Identifiable.randomUID("PowerIterationClustering"))
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
index 52be22f714981..93b66f3ab7007 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -81,8 +81,6 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
   @Since("3.0.0")
   def setNumBins(value: Int): this.type = set(numBins, value)
 
-  setDefault(numBins -> 1000)
-
   /** @group setParam */
   @Since("1.5.0")
   def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)
@@ -95,7 +93,7 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
   @Since("3.0.0")
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
-  setDefault(metricName -> "areaUnderROC")
+  setDefault(metricName -> "areaUnderROC", numBins -> 1000)
 
   @Since("2.0.0")
   override def evaluate(dataset: Dataset[_]): Double = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
index 3d77792c4fc88..beeefde8c5fac 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
@@ -64,8 +64,6 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   @Since("1.5.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
-  setDefault(metricName -> "f1")
-
   /** @group setParam */
   @Since("1.5.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
@@ -105,8 +103,6 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   @Since("3.0.0")
   def setMetricLabel(value: Double): this.type = set(metricLabel, value)
 
-  setDefault(metricLabel -> 0.0)
-
   /**
    * The beta value, which controls precision vs recall weighting,
    * used in `"weightedFMeasure"`, `"fMeasureByLabel"`.
@@ -128,8 +124,6 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   @Since("3.0.0")
   def setBeta(value: Double): this.type = set(beta, value)
 
-  setDefault(beta -> 1.0)
-
   /**
    * param for eps. log-loss is undefined for p=0 or p=1, so probabilities are clipped to
    * max(eps, min(1 - eps, p)). Must be in range (0, 0.5). The default value is 1e-15.
@@ -150,7 +144,7 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   @Since("3.0.0")
   def setEps(value: Double): this.type = set(eps, value)
 
-  setDefault(eps -> 1e-15)
+  setDefault(metricName -> "f1", eps -> 1e-15, metricLabel -> 0.0, beta -> 1.0)
 
   @Since("2.0.0")
   override def evaluate(dataset: Dataset[_]): Double = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
index 1a82ac7a9472f..8ed26502407a8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
@@ -65,8 +65,6 @@ class MultilabelClassificationEvaluator @Since("3.0.0") (@Since("3.0.0") overrid
   @Since("3.0.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
-  setDefault(metricName -> "f1Measure")
-
   /**
    * param for the class whose metric will be computed in `"precisionByLabel"`, `"recallByLabel"`,
    * `"f1MeasureByLabel"`.
@@ -86,8 +84,6 @@ class MultilabelClassificationEvaluator @Since("3.0.0") (@Since("3.0.0") overrid
   /** @group setParam */
   def setMetricLabel(value: Double): this.type = set(metricLabel, value)
 
-  setDefault(metricLabel -> 0.0)
-
   /** @group setParam */
   @Since("3.0.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
@@ -96,6 +92,8 @@ class MultilabelClassificationEvaluator @Since("3.0.0") (@Since("3.0.0") overrid
   @Since("3.0.0")
   def setLabelCol(value: String): this.type = set(labelCol, value)
 
+  setDefault(metricLabel -> 0.0, metricName -> "f1Measure")
+
   @Since("3.0.0")
   override def evaluate(dataset: Dataset[_]): Double = {
     val metrics = getMetrics(dataset)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
index 82dda4109771d..01fb0599160b0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
@@ -61,8 +61,6 @@ class RankingEvaluator @Since("3.0.0") (@Since("3.0.0") override val uid: String
   @Since("3.0.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
-  setDefault(metricName -> "meanAveragePrecision")
-
   /**
    * param for ranking position value used in `"meanAveragePrecisionAtK"`, `"precisionAtK"`,
    * `"ndcgAtK"`, `"recallAtK"`. Must be &gt; 0. The default value is 10.
@@ -83,8 +81,6 @@ class RankingEvaluator @Since("3.0.0") (@Since("3.0.0") override val uid: String
   @Since("3.0.0")
   def setK(value: Int): this.type = set(k, value)
 
-  setDefault(k -> 10)
-
   /** @group setParam */
   @Since("3.0.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
@@ -93,6 +89,8 @@ class RankingEvaluator @Since("3.0.0") (@Since("3.0.0") override val uid: String
   @Since("3.0.0")
   def setLabelCol(value: String): this.type = set(labelCol, value)
 
+  setDefault(k -> 10, metricName -> "meanAveragePrecision")
+
   @Since("3.0.0")
   override def evaluate(dataset: Dataset[_]): Double = {
     val metrics = getMetrics(dataset)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index f0b7c345c3285..902869cc681b8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -80,8 +80,6 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
   @Since("3.0.0")
   def setThroughOrigin(value: Boolean): this.type = set(throughOrigin, value)
 
-  setDefault(throughOrigin -> false)
-
   /** @group setParam */
   @Since("1.4.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
@@ -94,7 +92,7 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
   @Since("3.0.0")
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
-  setDefault(metricName -> "rmse")
+  setDefault(metricName -> "rmse", throughOrigin -> false)
 
   @Since("2.0.0")
   override def evaluate(dataset: Dataset[_]): Double = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index f1a68edaed950..7aab4ef62c4d9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -50,7 +50,6 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
    */
   @Since("2.2.0")
   val itemsCol: Param[String] = new Param[String](this, "itemsCol", "items column name")
-  setDefault(itemsCol -> "items")
 
   /** @group getParam */
   @Since("2.2.0")
@@ -66,7 +65,6 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
   val minSupport: DoubleParam = new DoubleParam(this, "minSupport",
     "the minimal support level of a frequent pattern",
     ParamValidators.inRange(0.0, 1.0))
-  setDefault(minSupport -> 0.3)
 
   /** @group getParam */
   @Since("2.2.0")
@@ -95,12 +93,13 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
   val minConfidence: DoubleParam = new DoubleParam(this, "minConfidence",
     "minimal confidence for generating Association Rule",
     ParamValidators.inRange(0.0, 1.0))
-  setDefault(minConfidence -> 0.8)
 
   /** @group getParam */
   @Since("2.2.0")
   def getMinConfidence: Double = $(minConfidence)
 
+  setDefault(minSupport -> 0.3, itemsCol -> "items", minConfidence -> 0.8)
+
   /**
    * Validates and transforms the input schema.
    * @param schema input schema
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 2c30e44b93467..f301c349a2dc7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -62,7 +62,6 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
   /** @group getParam */
   @Since("1.6.0")
   def getCensorCol: String = $(censorCol)
-  setDefault(censorCol -> "censor")
 
   /**
    * Param for quantile probabilities array.
@@ -78,7 +77,6 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
   /** @group getParam */
   @Since("1.6.0")
   def getQuantileProbabilities: Array[Double] = $(quantileProbabilities)
-  setDefault(quantileProbabilities -> Array(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99))
 
   /**
    * Param for quantiles column name.
@@ -92,6 +90,10 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
   @Since("1.6.0")
   def getQuantilesCol: String = $(quantilesCol)
 
+  setDefault(censorCol -> "censor",
+    quantileProbabilities -> Array(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99),
+    fitIntercept -> true, maxIter -> 100, tol -> 1E-6, aggregationDepth -> 2, blockSize -> 1)
+
   /** Checks whether the input has quantiles column name. */
   private[regression] def hasQuantilesCol: Boolean = {
     isDefined(quantilesCol) && $(quantilesCol).nonEmpty
@@ -153,7 +155,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
    */
   @Since("1.6.0")
   def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
-  setDefault(fitIntercept -> true)
 
   /**
    * Set the maximum number of iterations.
@@ -162,7 +163,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
    */
   @Since("1.6.0")
   def setMaxIter(value: Int): this.type = set(maxIter, value)
-  setDefault(maxIter -> 100)
 
   /**
    * Set the convergence tolerance of iterations.
@@ -172,7 +172,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
    */
   @Since("1.6.0")
   def setTol(value: Double): this.type = set(tol, value)
-  setDefault(tol -> 1E-6)
 
   /**
    * Suggested depth for treeAggregate (greater than or equal to 2).
@@ -183,7 +182,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
    */
   @Since("2.1.0")
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
-  setDefault(aggregationDepth -> 2)
 
   /**
    * Set block size for stacking input data in matrices.
@@ -202,7 +200,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
    */
   @Since("3.1.0")
   def setBlockSize(value: Int): this.type = set(blockSize, value)
-  setDefault(blockSize -> 1)
 
   /**
    * Extract [[featuresCol]], [[labelCol]] and [[censorCol]] from input dataset,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index de559142a9261..235a7f9b6ebd5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -105,6 +105,10 @@ private[regression] trait LinearRegressionParams extends PredictorParams
   @Since("2.3.0")
   def getEpsilon: Double = $(epsilon)
 
+  setDefault(regParam -> 0.0, fitIntercept -> true, standardization -> true,
+    elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6, solver -> Auto,
+    aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35, blockSize -> 1)
+
   override protected def validateAndTransformSchema(
       schema: StructType,
       fitting: Boolean,
@@ -191,7 +195,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.3.0")
   def setRegParam(value: Double): this.type = set(regParam, value)
-  setDefault(regParam -> 0.0)
 
   /**
    * Set if we should fit the intercept.
@@ -201,7 +204,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.5.0")
   def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
-  setDefault(fitIntercept -> true)
 
   /**
    * Whether to standardize the training features before fitting the model.
@@ -217,7 +219,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.5.0")
   def setStandardization(value: Boolean): this.type = set(standardization, value)
-  setDefault(standardization -> true)
 
   /**
    * Set the ElasticNet mixing parameter.
@@ -233,7 +234,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.4.0")
   def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
-  setDefault(elasticNetParam -> 0.0)
 
   /**
    * Set the maximum number of iterations.
@@ -243,7 +243,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.3.0")
   def setMaxIter(value: Int): this.type = set(maxIter, value)
-  setDefault(maxIter -> 100)
 
   /**
    * Set the convergence tolerance of iterations.
@@ -254,7 +253,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.4.0")
   def setTol(value: Double): this.type = set(tol, value)
-  setDefault(tol -> 1E-6)
 
   /**
    * Whether to over-/under-sample training instances according to the given weights in weightCol.
@@ -283,7 +281,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.6.0")
   def setSolver(value: String): this.type = set(solver, value)
-  setDefault(solver -> Auto)
 
   /**
    * Suggested depth for treeAggregate (greater than or equal to 2).
@@ -295,7 +292,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("2.1.0")
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
-  setDefault(aggregationDepth -> 2)
 
   /**
    * Sets the value of param [[loss]].
@@ -305,7 +301,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("2.3.0")
   def setLoss(value: String): this.type = set(loss, value)
-  setDefault(loss -> SquaredError)
 
   /**
    * Sets the value of param [[epsilon]].
@@ -315,7 +310,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("2.3.0")
   def setEpsilon(value: Double): this.type = set(epsilon, value)
-  setDefault(epsilon -> 1.35)
 
   /**
    * Set block size for stacking input data in matrices.
@@ -334,7 +328,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("3.1.0")
   def setBlockSize(value: Int): this.type = set(blockSize, value)
-  setDefault(blockSize -> 1)
 
   override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr =>
     instr.logPipelineStage(this)
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index cc8ce0567bd7f..463dbee9a3b77 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -512,6 +512,12 @@ class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitInterce
                       " all predictions 0.0 and -Inf will make all predictions 1.0.",
                       typeConverter=TypeConverters.toFloat)
 
+    def __init__(self):
+        super(_LinearSVCParams, self).__init__()
+        self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True,
+                         standardization=True, threshold=0.0, aggregationDepth=2,
+                         blockSize=1)
+
 
 @inherit_doc
 class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadable):
@@ -598,9 +604,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(LinearSVC, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.LinearSVC", self.uid)
-        self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True,
-                         standardization=True, threshold=0.0, aggregationDepth=2,
-                         blockSize=1)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -816,6 +819,11 @@ class _LogisticRegressionParams(_ProbabilisticClassifierParams, HasRegParam,
                                     "classes for multinomial regression.",
                                     typeConverter=TypeConverters.toVector)
 
+    def __init__(self):
+        super(_LogisticRegressionParams, self).__init__()
+        self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto",
+                         blockSize=1)
+
     @since("1.4.0")
     def setThreshold(self, value):
         """
@@ -1037,8 +1045,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(LogisticRegression, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.LogisticRegression", self.uid)
-        self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto",
-                         blockSize=1)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
         self._checkThresholdConsistency()
@@ -1305,7 +1311,12 @@ class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams):
     """
     Params for :py:class:`DecisionTreeClassifier` and :py:class:`DecisionTreeClassificationModel`.
     """
-    pass
+
+    def __init__(self):
+        super(_DecisionTreeClassifierParams, self).__init__()
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="gini", leafCol="", minWeightFractionPerNode=0.0)
 
 
 @inherit_doc
@@ -1404,9 +1415,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(DecisionTreeClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.DecisionTreeClassifier", self.uid)
-        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="gini", leafCol="", minWeightFractionPerNode=0.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1540,7 +1548,14 @@ class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams):
     """
     Params for :py:class:`RandomForestClassifier` and :py:class:`RandomForestClassificationModel`.
     """
-    pass
+
+    def __init__(self):
+        super(_RandomForestClassifierParams, self).__init__()
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="gini", numTrees=20, featureSubsetStrategy="auto",
+                         subsamplingRate=1.0, leafCol="", minWeightFractionPerNode=0.0,
+                         bootstrap=True)
 
 
 @inherit_doc
@@ -1634,11 +1649,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(RandomForestClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.RandomForestClassifier", self.uid)
-        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="gini", numTrees=20, featureSubsetStrategy="auto",
-                         subsamplingRate=1.0, leafCol="", minWeightFractionPerNode=0.0,
-                         bootstrap=True)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1882,6 +1892,14 @@ class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity):
                      "Supported options: " + ", ".join(supportedLossTypes),
                      typeConverter=TypeConverters.toString)
 
+    def __init__(self):
+        super(_GBTClassifierParams, self).__init__()
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0,
+                         impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
+                         leafCol="", minWeightFractionPerNode=0.0)
+
     @since("1.4.0")
     def getLossType(self):
         """
@@ -2009,11 +2027,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(GBTClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.GBTClassifier", self.uid)
-        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0,
-                         impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
-                         leafCol="", minWeightFractionPerNode=0.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -2211,6 +2224,10 @@ class _NaiveBayesParams(_PredictorParams, HasWeightCol):
                       "and gaussian.",
                       typeConverter=TypeConverters.toString)
 
+    def __init__(self):
+        super(_NaiveBayesParams, self).__init__()
+        self._setDefault(smoothing=1.0, modelType="multinomial")
+
     @since("1.5.0")
     def getSmoothing(self):
         """
@@ -2329,7 +2346,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(NaiveBayes, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.NaiveBayes", self.uid)
-        self._setDefault(smoothing=1.0, modelType="multinomial")
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 54a184bc081ee..6ca413d696368 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -109,6 +109,10 @@ class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC
     k = Param(Params._dummy(), "k", "Number of independent Gaussians in the mixture model. " +
               "Must be > 1.", typeConverter=TypeConverters.toInt)
 
+    def __init__(self):
+        super(_GaussianMixtureParams, self).__init__()
+        self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2, blockSize=1)
+
     @since("2.0.0")
     def getK(self):
         """
@@ -339,7 +343,6 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
         super(GaussianMixture, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.GaussianMixture",
                                             self.uid)
-        self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2, blockSize=1)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -500,6 +503,11 @@ class _KMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasTo
     initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " +
                       "initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt)
 
+    def __init__(self):
+        super(_KMeansParams, self).__init__()
+        self._setDefault(k=2, initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20,
+                         distanceMeasure="euclidean")
+
     @since("1.5.0")
     def getK(self):
         """
@@ -644,8 +652,6 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
         """
         super(KMeans, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid)
-        self._setDefault(k=2, initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20,
-                         distanceMeasure="euclidean")
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -754,6 +760,10 @@ class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC
                                     "proportion of points (if < 1.0) of a divisible cluster.",
                                     typeConverter=TypeConverters.toFloat)
 
+    def __init__(self):
+        super(_BisectingKMeansParams, self).__init__()
+        self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0)
+
     @since("2.0.0")
     def getK(self):
         """
@@ -920,7 +930,6 @@ def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=2
         super(BisectingKMeans, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.BisectingKMeans",
                                             self.uid)
-        self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1063,6 +1072,13 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
                                " partition is lost, so set this bit with care.",
                                TypeConverters.toBoolean)
 
+    def __init__(self):
+        super(_LDAParams, self).__init__()
+        self._setDefault(maxIter=20, checkpointInterval=10,
+                         k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
+                         subsamplingRate=0.05, optimizeDocConcentration=True,
+                         topicDistributionCol="topicDistribution", keepLastCheckpoint=True)
+
     @since("2.0.0")
     def getK(self):
         """
@@ -1392,10 +1408,6 @@ def __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInte
         """
         super(LDA, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.LDA", self.uid)
-        self._setDefault(maxIter=20, checkpointInterval=10,
-                         k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
-                         subsamplingRate=0.05, optimizeDocConcentration=True,
-                         topicDistributionCol="topicDistribution", keepLastCheckpoint=True)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1588,6 +1600,10 @@ class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
                    "Name of the input column for destination vertex IDs.",
                    typeConverter=TypeConverters.toString)
 
+    def __init__(self):
+        super(_PowerIterationClusteringParams, self).__init__()
+        self._setDefault(k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst")
+
     @since("2.4.0")
     def getK(self):
         """
@@ -1675,7 +1691,6 @@ def __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst
         super(PowerIterationClustering, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.clustering.PowerIterationClustering", self.uid)
-        self._setDefault(k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst")
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 7a5591f3fbf76..7a564df356f1f 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -56,6 +56,11 @@ class _FPGrowthParams(HasPredictionCol):
         "but will affect the association rules generation.",
         typeConverter=TypeConverters.toFloat)
 
+    def __init__(self):
+        super(_FPGrowthParams, self).__init__()
+        self._setDefault(minSupport=0.3, minConfidence=0.8,
+                         itemsCol="items", predictionCol="prediction")
+
     def getItemsCol(self):
         """
         Gets the value of itemsCol or its default value.
@@ -206,8 +211,6 @@ def __init__(self, minSupport=0.3, minConfidence=0.8, itemsCol="items",
         """
         super(FPGrowth, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.fpm.FPGrowth", self.uid)
-        self._setDefault(minSupport=0.3, minConfidence=0.8,
-                         itemsCol="items", predictionCol="prediction")
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index e82a35c8e78f1..6fe6486c5a04a 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -104,6 +104,11 @@ class _LinearRegressionParams(_PredictorParams, HasRegParam, HasElasticNetParam,
                     "robustness. Must be > 1.0. Only valid when loss is huber",
                     typeConverter=TypeConverters.toFloat)
 
+    def __init__(self):
+        super(_LinearRegressionParams, self).__init__()
+        self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, loss="squaredError", epsilon=1.35,
+                         blockSize=1)
+
     @since("2.3.0")
     def getEpsilon(self):
         """
@@ -206,8 +211,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(LinearRegression, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.LinearRegression", self.uid)
-        self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, loss="squaredError", epsilon=1.35,
-                         blockSize=1)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -619,6 +622,10 @@ class _IsotonicRegressionParams(HasFeaturesCol, HasLabelCol, HasPredictionCol, H
         "The index of the feature if featuresCol is a vector column, no effect otherwise.",
         typeConverter=TypeConverters.toInt)
 
+    def __init__(self):
+        super(_IsotonicRegressionParams, self).__init__()
+        self._setDefault(isotonic=True, featureIndex=0)
+
     def getIsotonic(self):
         """
         Gets the value of isotonic or its default value.
@@ -681,7 +688,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(IsotonicRegression, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.IsotonicRegression", self.uid)
-        self._setDefault(isotonic=True, featureIndex=0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -808,7 +814,11 @@ class _DecisionTreeRegressorParams(_DecisionTreeParams, _TreeRegressorParams, Ha
     .. versionadded:: 3.0.0
     """
 
-    pass
+    def __init__(self):
+        super(_DecisionTreeRegressorParams, self).__init__()
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="variance", leafCol="", minWeightFractionPerNode=0.0)
 
 
 @inherit_doc
@@ -895,9 +905,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(DecisionTreeRegressor, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.DecisionTreeRegressor", self.uid)
-        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="variance", leafCol="", minWeightFractionPerNode=0.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1052,7 +1059,14 @@ class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams):
 
     .. versionadded:: 3.0.0
     """
-    pass
+
+    def __init__(self):
+        super(_RandomForestRegressorParams, self).__init__()
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="variance", subsamplingRate=1.0, numTrees=20,
+                         featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0,
+                         bootstrap=True)
 
 
 @inherit_doc
@@ -1135,11 +1149,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(RandomForestRegressor, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.RandomForestRegressor", self.uid)
-        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="variance", subsamplingRate=1.0, numTrees=20,
-                         featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0,
-                         bootstrap=True)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1310,6 +1319,14 @@ class _GBTRegressorParams(_GBTParams, _TreeRegressorParams):
                      "Supported options: " + ", ".join(supportedLossTypes),
                      typeConverter=TypeConverters.toString)
 
+    def __init__(self):
+        super(_GBTRegressorParams, self).__init__()
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
+                         checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
+                         impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
+                         leafCol="", minWeightFractionPerNode=0.0)
+
     @since("1.4.0")
     def getLossType(self):
         """
@@ -1407,11 +1424,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         """
         super(GBTRegressor, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
-        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
-                         checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
-                         impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
-                         leafCol="", minWeightFractionPerNode=0.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1630,6 +1642,12 @@ class _AFTSurvivalRegressionParams(_PredictorParams, HasMaxIter, HasTol, HasFitI
         "corresponding quantileProbabilities if it is set.",
         typeConverter=TypeConverters.toString)
 
+    def __init__(self):
+        super(_AFTSurvivalRegressionParams, self).__init__()
+        self._setDefault(censorCol="censor",
+                         quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99],
+                         maxIter=100, tol=1E-6, blockSize=1)
+
     @since("1.6.0")
     def getCensorCol(self):
         """
@@ -1722,9 +1740,6 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         super(AFTSurvivalRegression, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.AFTSurvivalRegression", self.uid)
-        self._setDefault(censorCol="censor",
-                         quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99],
-                         maxIter=100, tol=1E-6, blockSize=1)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)