Skip to content

Commit c9bb5e4

Browse files
manishamdemengxr
authored andcommitted
[MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.
Before: [info] GradientBoostingSuite: [info] - Regression with continuous features: SquaredError (22 seconds, 115 milliseconds) [info] - Regression with continuous features: Absolute Error (19 seconds, 330 milliseconds) [info] - Binary classification with continuous features: Log Loss (19 seconds, 17 milliseconds) After: [info] - Regression with continuous features: SquaredError (7 seconds, 69 milliseconds) [info] - Regression with continuous features: Absolute Error (4 seconds, 617 milliseconds) [info] - Binary classification with continuous features: Log Loss (4 seconds, 658 milliseconds) cc: mengxr, jkbradley Author: Manish Amde <[email protected]> Closes #3214 from manishamde/gbt_test_speedup and squashes the following commits: 8994552 [Manish Amde] reducing gbt test run times (cherry picked from commit 2ef016b) Signed-off-by: Xiangrui Meng <[email protected]>
1 parent 12f5633 commit c9bb5e4

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
3535
test("Regression with continuous features: SquaredError") {
3636
GradientBoostingSuite.testCombinations.foreach {
3737
case (numIterations, learningRate, subsamplingRate) =>
38-
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
38+
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
3939
val rdd = sc.parallelize(arr)
4040
val categoricalFeaturesInfo = Map.empty[Int, Int]
4141

@@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
5353
assert(gbt.weakHypotheses.size === numIterations)
5454
val gbtTree = gbt.weakHypotheses(0)
5555

56-
EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
56+
EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
5757

5858
// Make sure trees are the same.
5959
assert(gbtTree.toString == dt.toString)
@@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
6363
test("Regression with continuous features: Absolute Error") {
6464
GradientBoostingSuite.testCombinations.foreach {
6565
case (numIterations, learningRate, subsamplingRate) =>
66-
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
66+
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
6767
val rdd = sc.parallelize(arr)
6868
val categoricalFeaturesInfo = Map.empty[Int, Int]
6969

@@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
8181
assert(gbt.weakHypotheses.size === numIterations)
8282
val gbtTree = gbt.weakHypotheses(0)
8383

84-
EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
84+
EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
8585

8686
// Make sure trees are the same.
8787
assert(gbtTree.toString == dt.toString)
@@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
9191
test("Binary classification with continuous features: Log Loss") {
9292
GradientBoostingSuite.testCombinations.foreach {
9393
case (numIterations, learningRate, subsamplingRate) =>
94-
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
94+
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
9595
val rdd = sc.parallelize(arr)
9696
val categoricalFeaturesInfo = Map.empty[Int, Int]
9797

0 commit comments

Comments
 (0)