-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-31925][ML] Summary.totalIterations greater than maxIters #28786
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -266,6 +266,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { | |
| assert(blorModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummary]) | ||
| assert(blorModel.summary.asBinary.isInstanceOf[BinaryLogisticRegressionSummary]) | ||
| assert(blorModel.binarySummary.isInstanceOf[BinaryLogisticRegressionTrainingSummary]) | ||
| assert(blorModel.summary.totalIterations == 1) | ||
| assert(blorModel.binarySummary.totalIterations == 1) | ||
|
|
||
| val mlorModel = lr.setFamily("multinomial").fit(smallMultinomialDataset) | ||
| assert(mlorModel.summary.isInstanceOf[LogisticRegressionTrainingSummary]) | ||
|
|
@@ -279,6 +281,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { | |
| mlorModel.summary.asBinary | ||
| } | ||
| } | ||
| assert(mlorModel.summary.totalIterations == 1) | ||
|
|
||
| val mlorBinaryModel = lr.setFamily("multinomial").fit(smallBinaryDataset) | ||
| assert(mlorBinaryModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummary]) | ||
|
|
@@ -2570,7 +2573,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { | |
| rows.map(_.getDouble(0)).toArray === binaryExpected | ||
| } | ||
| } | ||
| assert(model2.summary.totalIterations === 1) | ||
| assert(model2.summary.totalIterations === 0) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. InitialModel is set in this case. init state is good so no training is needed. I think totalIterations should be 0 instead of 1. |
||
|
|
||
| val lr3 = new LogisticRegression().setFamily("multinomial") | ||
| val model3 = lr3.fit(smallMultinomialDataset) | ||
|
|
@@ -2585,7 +2588,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { | |
| rows.map(_.getDouble(0)).toArray === multinomialExpected | ||
| } | ||
| } | ||
| assert(model4.summary.totalIterations === 1) | ||
| assert(model4.summary.totalIterations === 0) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as L2576. I think totalIterations should be 0 instead of 1 since no training. |
||
| } | ||
|
|
||
| test("binary logistic regression with all labels the same") { | ||
|
|
@@ -2605,13 +2608,15 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { | |
| assert(allZeroInterceptModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3) | ||
| assert(allZeroInterceptModel.intercept === Double.NegativeInfinity) | ||
| assert(allZeroInterceptModel.summary.totalIterations === 0) | ||
| assert(allZeroInterceptModel.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since I change the objectiveHistory from Array.empty to Array(0.0) for no training case, summary.objectiveHistory(0) should be 0.0 here. |
||
|
|
||
| val allOneInterceptModel = lrIntercept | ||
| .setLabelCol("oneLabel") | ||
| .fit(sameLabels) | ||
| assert(allOneInterceptModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3) | ||
| assert(allOneInterceptModel.intercept === Double.PositiveInfinity) | ||
| assert(allOneInterceptModel.summary.totalIterations === 0) | ||
| assert(allOneInterceptModel.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4) | ||
|
|
||
| // fitIntercept=false | ||
| val lrNoIntercept = new LogisticRegression() | ||
|
|
@@ -2647,6 +2652,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { | |
| assert(pred === 4.0) | ||
| } | ||
| assert(model.summary.totalIterations === 0) | ||
| assert(model.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4) | ||
|
|
||
| // force the model to be trained with only one class | ||
| val constantZeroData = Seq( | ||
|
|
@@ -2660,7 +2666,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { | |
| assert(prob === Vectors.dense(Array(1.0))) | ||
| assert(pred === 0.0) | ||
| } | ||
| assert(modelZeroLabel.summary.totalIterations > 0) | ||
| assert(modelZeroLabel.summary.totalIterations === 0) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No training here so I think the totalIterations should be 0 instead of 1 |
||
|
|
||
| // ensure that the correct value is predicted when numClasses passed through metadata | ||
| val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(6).toMetadata() | ||
|
|
@@ -2675,6 +2681,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { | |
| assert(pred === 4.0) | ||
| } | ||
| require(modelWithMetadata.summary.totalIterations === 0) | ||
| assert(model.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4) | ||
| } | ||
|
|
||
| test("compressed storage for constant label") { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -761,13 +761,15 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe | |
| .fit(datasetWithWeightConstantLabel) | ||
| if (fitIntercept) { | ||
| assert(model1.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4) | ||
| assert(model1.summary.totalIterations === 0) | ||
| } | ||
| val model2 = new LinearRegression() | ||
| .setFitIntercept(fitIntercept) | ||
| .setWeightCol("weight") | ||
| .setSolver("l-bfgs") | ||
| .fit(datasetWithWeightZeroLabel) | ||
| assert(model2.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4) | ||
| assert(model2.summary.totalIterations === 0) | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -940,6 +942,19 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe | |
| } | ||
| } | ||
|
|
||
| test("linear regression training summary totalIterations") { | ||
| Seq(1, 5, 10, 20).foreach { maxIter => | ||
| val trainer = new LinearRegression().setSolver("l-bfgs").setMaxIter(maxIter) | ||
| val model = trainer.fit(datasetWithDenseFeature) | ||
| assert(model.summary.totalIterations <= maxIter) | ||
| } | ||
| Seq("auto", "normal").foreach { solver => | ||
| val trainer = new LinearRegression().setSolver(solver) | ||
| val model = trainer.fit(datasetWithDenseFeature) | ||
| assert(model.summary.totalIterations === 0) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. before my change, summary.totalIterations is 1 for "normal". I think it should be 0 since Normal Equation is not an iterative method. totalIterations should be 0 too for "auto", since "auto" uses Normal Equation in this test (a small dataset).
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If no iterative optimizer was run, I think 0 makes sense? |
||
| } | ||
| } | ||
|
|
||
| test("linear regression with weighted samples") { | ||
| val sqlContext = spark.sqlContext | ||
| import sqlContext.implicits._ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ def test_linear_regression_summary(self): | |
| self.assertTrue(model.hasSummary) | ||
| s = model.summary | ||
| # test that api is callable and returns expected types | ||
| self.assertGreater(s.totalIterations, 0) | ||
| self.assertEqual(s.totalIterations, 0) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. solver="normal" in this case, so I think totalIterations should be 0. |
||
| self.assertTrue(isinstance(s.predictions, DataFrame)) | ||
| self.assertEqual(s.predictionCol, "prediction") | ||
| self.assertEqual(s.labelCol, "label") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When training is not needed, LinearRegression set objectiveHistory to Array(0.0). https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala#L511
I think LogisticRegression should have the same bahavior.