Skip to content

Commit 5f54f4d

Browse files
committed
More mima excludes, added lots of warnings to not use impurity
1 parent d3b948b commit 5f54f4d

File tree

4 files changed

+35
-6
lines changed

4 files changed

+35
-6
lines changed

mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,8 @@ import org.apache.spark.sql.types.DoubleType
5959
* - In the case of squared error loss, variance impurity and mean leaf estimates happen
6060
* to make the SGB and TreeBoost algorithms identical.
6161
*
62-
* [[GBTClassifier]] will use the usual `"loss-based"` impurity by default, conforming to
62+
* [[GBTClassifier]] will use the `"loss-based"` impurity by default, conforming to
6363
* TreeBoost behavior. For SGB, set impurity to `"variance"`.
64-
* To use of TreeBoost, set impurity to `"loss-based"`.
6564
*
6665
* Currently, however, even TreeBoost behavior uses variance impurity for split selection for
6766
* ease and speed. This is the approach `R`'s
@@ -70,7 +69,7 @@ import org.apache.spark.sql.types.DoubleType
7069
@Since("1.4.0")
7170
class GBTClassifier @Since("1.4.0") (
7271
@Since("1.4.0") override val uid: String)
73-
extends Classifier[Vector, GBTClassifier, GBTClassificationModel]
72+
extends Predictor[Vector, GBTClassifier, GBTClassificationModel]
7473
with GBTClassifierParams with DefaultParamsWritable with Logging {
7574

7675
@Since("1.4.0")
@@ -102,6 +101,18 @@ class GBTClassifier @Since("1.4.0") (
102101
@Since("1.4.0")
103102
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
104103

104+
/**
105+
* Impurity-setting is currently only offered as a way to recover pre-2.0.2 Spark GBT
106+
* behavior (which is Stochastic Gradient Boosting): set impurity to `"variance"` for this.
107+
* @param value new impurity value
108+
* @return this
109+
*/
110+
@Since("1.4.0")
111+
@deprecated(
112+
"Control over impurity will be removed, as it is an implementation detail of GBTs",
113+
"2.0.2")
114+
override def setImpurity(value: String): this.type = super.setImpurity(value)
115+
105116
// Parameters from TreeEnsembleParams:
106117

107118
@Since("1.4.0")

mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,16 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
9898

9999
/**
100100
* Note that the loss-based impurity is currently NOT compatible with absolute loss.
101+
*
102+
* Impurity-setting is currently only offered as a way to recover pre-2.0.2 Spark GBT
103+
* behavior (which is Stochastic Gradient Boosting): set impurity to `"variance"` for this.
101104
* @param value new impurity value
102105
* @return this
103106
*/
104107
@Since("1.4.0")
108+
@deprecated(
109+
"Control over impurity will be removed, as it is an implementation detail of GBTs",
110+
"2.0.2")
105111
override def setImpurity(value: String): this.type = super.setImpurity(value)
106112

107113
// Parameters from TreeEnsembleParams:

mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -516,8 +516,11 @@ private[ml] trait GBTClassifierParams extends GBTParams with TreeClassifierParam
516516
* Also used for terminal leaf value prediction.
517517
* Supported: "loss-based" (default) and "variance"
518518
*
519-
* @group param
519+
* @group expertParam
520520
*/
521+
@deprecated(
522+
"Control over impurity will be removed, as it is an implementation detail of GBTs",
523+
"2.0.2")
521524
override val impurity: Param[String] = new Param[String](this, "impurity", "Criterion used for" +
522525
" information gain calculation (case-insensitive). Supported options:" +
523526
s" ${GBTClassifierParams.supportedImpurities.mkString(", ")}",
@@ -590,8 +593,11 @@ private[ml] trait GBTRegressorParams extends GBTParams with TreeRegressorParams
590593
* Also used for terminal leaf value prediction.
591594
* Supported: "loss-based" and "variance" (default)
592595
*
593-
* @group param
596+
* @group expertParam
594597
*/
598+
@deprecated(
599+
"Control over impurity will be removed, as it is an implementation detail of GBTs",
600+
"2.0.2")
595601
override val impurity: Param[String] = new Param[String](this, "impurity", "Criterion used for" +
596602
" information gain calculation (case-insensitive). Supported options:" +
597603
s" ${GBTRegressorParams.supportedImpurities.mkString(", ")}",

project/MimaExcludes.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,13 @@ object MimaExcludes {
869869
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassifier.getOldBoostingStrategy"),
870870
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.getOldBoostingStrategy"),
871871
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressor.getOldBoostingStrategy"),
872-
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.getOldBoostingStrategy")
872+
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.getOldBoostingStrategy"),
873+
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeRegressorParamsWithDefault.org$apache$spark$ml$tree$TreeRegressorParamsWithDefault$_setter_$impurity_="),
874+
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeClassifierParamsWithDefault.org$apache$spark$ml$tree$TreeClassifierParamsWithDefault$_setter_$impurity_="),
875+
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeClassifierParamsWithDefault.org$apache$spark$ml$tree$TreeClassifierParamsWithDefault$_setter_$impurity_="),
876+
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeRegressorParamsWithDefault.org$apache$spark$ml$tree$TreeRegressorParamsWithDefault$_setter_$impurity_="),
877+
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeClassifierParamsWithDefault.org$apache$spark$ml$tree$TreeClassifierParamsWithDefault$_setter_$impurity_="),
878+
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.TreeRegressorParamsWithDefault.org$apache$spark$ml$tree$TreeRegressorParamsWithDefault$_setter_$impurity_=")
873879
)
874880
}
875881

0 commit comments

Comments
 (0)