apache
diff --git a/‎examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTrees.java‎
Lines changed: 5 additions & 5 deletions b/‎examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTrees.java‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala‎
Lines changed: 2 additions & 2 deletions b/‎examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTrees.scala‎
Lines changed: 5 additions & 5 deletions b/‎examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTrees.scala‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala‎
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoosting.scala‎ renamed to ‎mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala‎
Lines changed: 29 additions & 95 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoosting.scala‎ renamed to ‎mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala‎
Lines changed: 29 additions & 95 deletions
@@ -27,9 +27,9 @@
 import org.apache.spark.api.java.function.Function2;
 import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.tree.GradientBoosting;
+import org.apache.spark.mllib.tree.GradientBoostedTrees;
 import org.apache.spark.mllib.tree.configuration.BoostingStrategy;
-import org.apache.spark.mllib.tree.model.WeightedEnsembleModel;
+import org.apache.spark.mllib.tree.model.TreeEnsembleModel;
 import org.apache.spark.mllib.util.MLUtils;
 
 /**
@@ -64,7 +64,7 @@ public static void main(String[] args) {
     //  Note: All features are treated as continuous.
     BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams(algo);
     boostingStrategy.setNumIterations(10);
-    boostingStrategy.weakLearnerParams().setMaxDepth(5);
+    boostingStrategy.treeStrategy().setMaxDepth(5);
 
     if (algo.equals("Classification")) {
       // Compute the number of classes from the data.
@@ -76,7 +76,7 @@ public static void main(String[] args) {
       boostingStrategy.setNumClassesForClassification(numClasses); // ignored for Regression
 
       // Train a GradientBoosting model for classification.
-      final WeightedEnsembleModel model = GradientBoosting.trainClassifier(data, boostingStrategy);
+      final TreeEnsembleModel model = GradientBoostedTrees.trainClassifier(data, boostingStrategy);
 
       // Evaluate model on training instances and compute training error
       JavaPairRDD<Double, Double> predictionAndLabel =
@@ -95,7 +95,7 @@ public static void main(String[] args) {
       System.out.println("Learned classification tree model:\n" + model);
     } else if (algo.equals("Regression")) {
       // Train a GradientBoosting model for classification.
-      final WeightedEnsembleModel model = GradientBoosting.trainRegressor(data, boostingStrategy);
+      final TreeEnsembleModel model = GradientBoostedTrees.trainRegressor(data, boostingStrategy);
 
       // Evaluate model on training instances and compute training error
       JavaPairRDD<Double, Double> predictionAndLabel =
 
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{RandomForest, DecisionTree, impurity}
 import org.apache.spark.mllib.tree.configuration.{Algo, Strategy}
 import org.apache.spark.mllib.tree.configuration.Algo._
-import org.apache.spark.mllib.tree.model.{WeightedEnsembleModel, DecisionTreeModel}
+import org.apache.spark.mllib.tree.model.{TreeEnsembleModel, DecisionTreeModel}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.Utils
@@ -363,7 +363,7 @@ object DecisionTreeRunner {
    * Calculates the mean squared error for regression.
    */
   private[mllib] def meanSquaredError(
-      tree: WeightedEnsembleModel,
+      tree: TreeEnsembleModel,
       data: RDD[LabeledPoint]): Double = {
     data.map { y =>
       val err = tree.predict(y.features) - y.label
 
@@ -21,7 +21,7 @@ import scopt.OptionParser
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.mllib.evaluation.MulticlassMetrics
-import org.apache.spark.mllib.tree.GradientBoosting
+import org.apache.spark.mllib.tree.GradientBoostedTrees
 import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Algo}
 import org.apache.spark.util.Utils
 
@@ -103,14 +103,14 @@ object GradientBoostedTrees {
       params.dataFormat, params.testInput, Algo.withName(params.algo), params.fracTest)
 
     val boostingStrategy = BoostingStrategy.defaultParams(params.algo)
-    boostingStrategy.numClassesForClassification = numClasses
+    boostingStrategy.treeStrategy.numClassesForClassification = numClasses
     boostingStrategy.numIterations = params.numIterations
-    boostingStrategy.weakLearnerParams.maxDepth = params.maxDepth
+    boostingStrategy.treeStrategy.maxDepth = params.maxDepth
 
     val randomSeed = Utils.random.nextInt()
     if (params.algo == "Classification") {
       val startTime = System.nanoTime()
-      val model = GradientBoosting.trainClassifier(training, boostingStrategy)
+      val model = GradientBoostedTrees.train(training, boostingStrategy)
       val elapsedTime = (System.nanoTime() - startTime) / 1e9
       println(s"Training time: $elapsedTime seconds")
       if (model.totalNumNodes < 30) {
@@ -127,7 +127,7 @@ object GradientBoostedTrees {
       println(s"Test accuracy = $testAccuracy")
     } else if (params.algo == "Regression") {
       val startTime = System.nanoTime()
-      val model = GradientBoosting.trainRegressor(training, boostingStrategy)
+      val model = GradientBoostedTrees.trainRegressor(training, boostingStrategy)
       val elapsedTime = (System.nanoTime() - startTime) / 1e9
       println(s"Training time: $elapsedTime seconds")
       if (model.totalNumNodes < 30) {
 
@@ -62,7 +62,7 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo
     // Note: random seed will not be used since numTrees = 1.
     val rf = new RandomForest(strategy, numTrees = 1, featureSubsetStrategy = "all", seed = 0)
     val rfModel = rf.train(input)
-    rfModel.weakHypotheses(0)
+    rfModel.trees(0)
   }
 
 }
 
@@ -21,18 +21,17 @@ import org.apache.spark.Logging
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.BoostingStrategy
+import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.EnsembleCombiningStrategy.Sum
 import org.apache.spark.mllib.tree.impl.TimeTracker
-import org.apache.spark.mllib.tree.model.{WeightedEnsembleModel, DecisionTreeModel}
+import org.apache.spark.mllib.tree.model.{DecisionTreeModel, TreeEnsembleModel}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 /**
  * :: Experimental ::
- * A class that implements Stochastic Gradient Boosting
- * for regression and binary classification problems.
+ * A class that implements Stochastic Gradient Boosting for regression and binary classification.
  *
  * The implementation is based upon:
  *   J.H. Friedman.  "Stochastic Gradient Boosting."  1999.
@@ -45,146 +44,84 @@ import org.apache.spark.storage.StorageLevel
  *    but weak hypothesis weights are not computed correctly for LogLoss or AbsoluteError.
  *    Running with those losses will likely behave reasonably, but lacks the same guarantees.
  *
- * @param boostingStrategy Parameters for the gradient boosting algorithm
+ * @param boostingStrategy Parameters for the gradient boosting algorithm.
  */
 @Experimental
-class GradientBoosting (
+class GradientBoostedTrees (
     private val boostingStrategy: BoostingStrategy) extends Serializable with Logging {
 
-  boostingStrategy.weakLearnerParams.algo = Regression
-  boostingStrategy.weakLearnerParams.impurity = impurity.Variance
-
-  // Ensure values for weak learner are the same as what is provided to the boosting algorithm.
-  boostingStrategy.weakLearnerParams.numClassesForClassification =
-    boostingStrategy.numClassesForClassification
-
-  boostingStrategy.assertValid()
-
   /**
    * Method to train a gradient boosting model
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    * @return WeightedEnsembleModel that can be used for prediction
    */
-  def train(input: RDD[LabeledPoint]): WeightedEnsembleModel = {
-    val algo = boostingStrategy.algo
+  def train(input: RDD[LabeledPoint]): TreeEnsembleModel = {
+    val algo = boostingStrategy.treeStrategy.algo
     algo match {
-      case Regression => GradientBoosting.boost(input, boostingStrategy)
+      case Regression => GradientBoostedTrees.boost(input, boostingStrategy)
       case Classification =>
         // Map labels to -1, +1 so binary classification can be treated as regression.
         val remappedInput = input.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
-        GradientBoosting.boost(remappedInput, boostingStrategy)
+        GradientBoostedTrees.boost(remappedInput, boostingStrategy)
       case _ =>
         throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.")
     }
   }
-
 }
 
 
-object GradientBoosting extends Logging {
+object GradientBoostedTrees extends Logging {
 
   /**
    * Method to train a gradient boosting model.
    *
-   * Note: Using [[org.apache.spark.mllib.tree.GradientBoosting$#trainRegressor]]
-   *       is recommended to clearly specify regression.
-   *       Using [[org.apache.spark.mllib.tree.GradientBoosting$#trainClassifier]]
-   *       is recommended to clearly specify regression.
-   *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              For classification, labels should take values {0, 1, ..., numClasses-1}.
    *              For regression, labels are real numbers.
    * @param boostingStrategy Configuration options for the boosting algorithm.
-   * @return WeightedEnsembleModel that can be used for prediction
+   * @return a tree ensemble model that can be used for prediction
    */
   def train(
       input: RDD[LabeledPoint],
-      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
-    new GradientBoosting(boostingStrategy).train(input)
-  }
-
-  /**
-   * Method to train a gradient boosting classification model.
-   *
-   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
-   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
-   *              For regression, labels are real numbers.
-   * @param boostingStrategy Configuration options for the boosting algorithm.
-   * @return WeightedEnsembleModel that can be used for prediction
-   */
-  def trainClassifier(
-      input: RDD[LabeledPoint],
-      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
-    val algo = boostingStrategy.algo
-    require(algo == Classification, s"Only Classification algo supported. Provided algo is $algo.")
-    new GradientBoosting(boostingStrategy).train(input)
-  }
-
-  /**
-   * Method to train a gradient boosting regression model.
-   *
-   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
-   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
-   *              For regression, labels are real numbers.
-   * @param boostingStrategy Configuration options for the boosting algorithm.
-   * @return WeightedEnsembleModel that can be used for prediction
-   */
-  def trainRegressor(
-      input: RDD[LabeledPoint],
-      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
-    val algo = boostingStrategy.algo
-    require(algo == Regression, s"Only Regression algo supported. Provided algo is $algo.")
-    new GradientBoosting(boostingStrategy).train(input)
+      boostingStrategy: BoostingStrategy): TreeEnsembleModel = {
+    new GradientBoostedTrees(boostingStrategy).train(input)
   }
 
   /**
-   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoosting$#train]]
+   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees$#train]]
    */
   def train(
-    input: JavaRDD[LabeledPoint],
-    boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
-    train(input.rdd, boostingStrategy)
-  }
-
-  /**
-   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoosting$#trainClassifier]]
-   */
-  def trainClassifier(
       input: JavaRDD[LabeledPoint],
-      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
-    trainClassifier(input.rdd, boostingStrategy)
-  }
-
-  /**
-   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoosting$#trainRegressor]]
-   */
-  def trainRegressor(
-      input: JavaRDD[LabeledPoint],
-      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
-    trainRegressor(input.rdd, boostingStrategy)
+      boostingStrategy: BoostingStrategy): TreeEnsembleModel = {
+    train(input.rdd, boostingStrategy)
   }
 
   /**
    * Internal method for performing regression using trees as base learners.
    * @param input training dataset
    * @param boostingStrategy boosting parameters
-   * @return
+   * @return a tree ensemble model that can be used for prediction
    */
   private def boost(
       input: RDD[LabeledPoint],
-      boostingStrategy: BoostingStrategy): WeightedEnsembleModel = {
+      boostingStrategy: BoostingStrategy): TreeEnsembleModel = {
 
     val timer = new TimeTracker()
     timer.start("total")
     timer.start("init")
 
+    boostingStrategy.assertValid()
+
     // Initialize gradient boosting parameters
     val numIterations = boostingStrategy.numIterations
     val baseLearners = new Array[DecisionTreeModel](numIterations)
     val baseLearnerWeights = new Array[Double](numIterations)
     val loss = boostingStrategy.loss
     val learningRate = boostingStrategy.learningRate
-    val strategy = boostingStrategy.weakLearnerParams
+    val ensembleStrategy = boostingStrategy.treeStrategy.copy
+    ensembleStrategy.algo = Regression
+    ensembleStrategy.impurity = impurity.Variance
+    ensembleStrategy.assertValid()
 
     // Cache input
     if (input.getStorageLevel == StorageLevel.NONE) {
@@ -200,11 +137,10 @@ object GradientBoosting extends Logging {
 
     // Initialize tree
     timer.start("building tree 0")
-    val firstTreeModel = new DecisionTree(strategy).train(data)
+    val firstTreeModel = new DecisionTree(ensembleStrategy).train(data)
     baseLearners(0) = firstTreeModel
     baseLearnerWeights(0) = 1.0
-    val startingModel = new WeightedEnsembleModel(Array(firstTreeModel), Array(1.0), Regression,
-      Sum)
+    val startingModel = new TreeEnsembleModel(Array(firstTreeModel), Array(1.0), Regression, Sum)
     logDebug("error of gbt = " + loss.computeError(startingModel, input))
     // Note: A model of type regression is used since we require raw prediction
     timer.stop("building tree 0")
@@ -219,7 +155,7 @@ object GradientBoosting extends Logging {
       logDebug("###################################################")
       logDebug("Gradient boosting tree iteration " + m)
       logDebug("###################################################")
-      val model = new DecisionTree(strategy).train(data)
+      val model = new DecisionTree(ensembleStrategy).train(data)
       timer.stop(s"building tree $m")
       // Create partial model
       baseLearners(m) = model
@@ -228,7 +164,7 @@ object GradientBoosting extends Logging {
       //       However, the behavior should be reasonable, though not optimal.
       baseLearnerWeights(m) = learningRate
       // Note: A model of type regression is used since we require raw prediction
-      val partialModel = new WeightedEnsembleModel(baseLearners.slice(0, m + 1),
+      val partialModel = new TreeEnsembleModel(baseLearners.slice(0, m + 1),
         baseLearnerWeights.slice(0, m + 1), Regression, Sum)
       logDebug("error of gbt = " + loss.computeError(partialModel, input))
       // Update data with pseudo-residuals
@@ -242,8 +178,6 @@ object GradientBoosting extends Logging {
     logInfo("Internal timing for DecisionTree:")
     logInfo(s"$timer")
 
-    new WeightedEnsembleModel(baseLearners, baseLearnerWeights, boostingStrategy.algo, Sum)
-
+    new TreeEnsembleModel(baseLearners, baseLearnerWeights, boostingStrategy.treeStrategy.algo, Sum)
   }
-
 }
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo`
`62`	`62`	`// Note: random seed will not be used since numTrees = 1.`
`63`	`63`	`val rf = new RandomForest(strategy, numTrees = 1, featureSubsetStrategy = "all", seed = 0)`
`64`	`64`	`val rfModel = rf.train(input)`
`65`		`- rfModel.weakHypotheses(0)`
	`65`	`+ rfModel.trees(0)`
`66`	`66`	`}`
`67`	`67`
`68`	`68`	`}`