Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ class LogisticRegression @Since("1.2.0") (
Vectors.dense(if (numClasses == 2) Double.PositiveInfinity else Double.NegativeInfinity)
}
if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
return createModel(dataset, numClasses, coefMatrix, interceptVec, Array.empty)
return createModel(dataset, numClasses, coefMatrix, interceptVec, Array(0.0))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When training is not needed, LinearRegression set objectiveHistory to Array(0.0). https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala#L511
I think LogisticRegression should have the same bahavior.

}

if (!$(fitIntercept) && isConstantLabel) {
Expand Down Expand Up @@ -1545,13 +1545,19 @@ sealed trait LogisticRegressionSummary extends Serializable {
*/
sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary {

/** objective function (scaled loss + regularization) at each iteration. */
/**
* objective function (scaled loss + regularization) at each iteration.
* It contains one more element, the initial state, than number of iterations.
*/
@Since("1.5.0")
def objectiveHistory: Array[Double]

/** Number of training iterations. */
@Since("1.5.0")
def totalIterations: Int = objectiveHistory.length
def totalIterations: Int = {
assert(objectiveHistory.length > 0, s"objectiveHistory length should be greater than 1.")
objectiveHistory.length - 1
}

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
Vectors.dense(Array.fill(dim)(1.0))
}

val (parameters, objectiveHistory) = if ($(blockSize) == 1) {
val (parameters, objectiveHistory) = if ($(blockSize) == 1) {
trainOnRows(instances, yMean, yStd, featuresMean, featuresStd,
initialValues, regularization, optimizer)
} else {
Expand Down Expand Up @@ -939,8 +939,10 @@ class LinearRegressionTrainingSummary private[regression] (
* @see `LinearRegression.solver`
*/
@Since("1.5.0")
val totalIterations = objectiveHistory.length

val totalIterations = {
assert(objectiveHistory.length > 0, s"objectiveHistory length should be greater than 1.")
objectiveHistory.length - 1
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,6 @@ public void logisticRegressionTrainingSummary() {
LogisticRegressionModel model = lr.fit(dataset);

LogisticRegressionTrainingSummary summary = model.summary();
Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length);
Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length - 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
assert(blorModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummary])
assert(blorModel.summary.asBinary.isInstanceOf[BinaryLogisticRegressionSummary])
assert(blorModel.binarySummary.isInstanceOf[BinaryLogisticRegressionTrainingSummary])
assert(blorModel.summary.totalIterations == 1)
assert(blorModel.binarySummary.totalIterations == 1)

val mlorModel = lr.setFamily("multinomial").fit(smallMultinomialDataset)
assert(mlorModel.summary.isInstanceOf[LogisticRegressionTrainingSummary])
Expand All @@ -279,6 +281,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
mlorModel.summary.asBinary
}
}
assert(mlorModel.summary.totalIterations == 1)

val mlorBinaryModel = lr.setFamily("multinomial").fit(smallBinaryDataset)
assert(mlorBinaryModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummary])
Expand Down Expand Up @@ -2570,7 +2573,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
rows.map(_.getDouble(0)).toArray === binaryExpected
}
}
assert(model2.summary.totalIterations === 1)
assert(model2.summary.totalIterations === 0)
Copy link
Contributor Author

@huaxingao huaxingao Jun 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

InitialModel is set in this case. init state is good so no training is needed. I think totalIterations should be 0 instead of 1.


val lr3 = new LogisticRegression().setFamily("multinomial")
val model3 = lr3.fit(smallMultinomialDataset)
Expand All @@ -2585,7 +2588,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
rows.map(_.getDouble(0)).toArray === multinomialExpected
}
}
assert(model4.summary.totalIterations === 1)
assert(model4.summary.totalIterations === 0)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as L2576. I think totalIterations should be 0 instead of 1 since no training.

}

test("binary logistic regression with all labels the same") {
Expand All @@ -2605,13 +2608,15 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
assert(allZeroInterceptModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
assert(allZeroInterceptModel.intercept === Double.NegativeInfinity)
assert(allZeroInterceptModel.summary.totalIterations === 0)
assert(allZeroInterceptModel.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)
Copy link
Contributor Author

@huaxingao huaxingao Jun 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since I change the objectiveHistory from Array.empty to Array(0.0) for no training case, summary.objectiveHistory(0) should be 0.0 here.


val allOneInterceptModel = lrIntercept
.setLabelCol("oneLabel")
.fit(sameLabels)
assert(allOneInterceptModel.coefficients ~== Vectors.dense(0.0) absTol 1E-3)
assert(allOneInterceptModel.intercept === Double.PositiveInfinity)
assert(allOneInterceptModel.summary.totalIterations === 0)
assert(allOneInterceptModel.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)

// fitIntercept=false
val lrNoIntercept = new LogisticRegression()
Expand Down Expand Up @@ -2647,6 +2652,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
assert(pred === 4.0)
}
assert(model.summary.totalIterations === 0)
assert(model.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)

// force the model to be trained with only one class
val constantZeroData = Seq(
Expand All @@ -2660,7 +2666,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
assert(prob === Vectors.dense(Array(1.0)))
assert(pred === 0.0)
}
assert(modelZeroLabel.summary.totalIterations > 0)
assert(modelZeroLabel.summary.totalIterations === 0)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No training here so I think the totalIterations should be 0 instead of 1


// ensure that the correct value is predicted when numClasses passed through metadata
val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(6).toMetadata()
Expand All @@ -2675,6 +2681,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
assert(pred === 4.0)
}
require(modelWithMetadata.summary.totalIterations === 0)
assert(model.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)
}

test("compressed storage for constant label") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -761,13 +761,15 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
.fit(datasetWithWeightConstantLabel)
if (fitIntercept) {
assert(model1.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)
assert(model1.summary.totalIterations === 0)
}
val model2 = new LinearRegression()
.setFitIntercept(fitIntercept)
.setWeightCol("weight")
.setSolver("l-bfgs")
.fit(datasetWithWeightZeroLabel)
assert(model2.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)
assert(model2.summary.totalIterations === 0)
}
}
}
Expand Down Expand Up @@ -940,6 +942,19 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
}
}

test("linear regression training summary totalIterations") {
Seq(1, 5, 10, 20).foreach { maxIter =>
val trainer = new LinearRegression().setSolver("l-bfgs").setMaxIter(maxIter)
val model = trainer.fit(datasetWithDenseFeature)
assert(model.summary.totalIterations <= maxIter)
}
Seq("auto", "normal").foreach { solver =>
val trainer = new LinearRegression().setSolver(solver)
val model = trainer.fit(datasetWithDenseFeature)
assert(model.summary.totalIterations === 0)
Copy link
Contributor Author

@huaxingao huaxingao Jun 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

before my change, summary.totalIterations is 1 for "normal". I think it should be 0 since Normal Equation is not an iterative method. totalIterations should be 0 too for "auto", since "auto" uses Normal Equation in this test (a small dataset).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think summary.totalIterations is 1 is reasonable, since it needs one pass on the dataset

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If no iterative optimizer was run, I think 0 makes sense?

}
}

test("linear regression with weighted samples") {
val sqlContext = spark.sqlContext
import sqlContext.implicits._
Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,7 +1119,8 @@ class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
def objectiveHistory(self):
"""
Objective function (scaled loss + regularization) at each
iteration.
iteration. It contains one more element, the initial state,
than number of iterations.
"""
return self._call_java("objectiveHistory")

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/ml/tests/test_training_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_linear_regression_summary(self):
self.assertTrue(model.hasSummary)
s = model.summary
# test that api is callable and returns expected types
self.assertGreater(s.totalIterations, 0)
self.assertEqual(s.totalIterations, 0)
Copy link
Contributor Author

@huaxingao huaxingao Jun 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

solver="normal" in this case, so I think totalIterations should be 0.

self.assertTrue(isinstance(s.predictions, DataFrame))
self.assertEqual(s.predictionCol, "prediction")
self.assertEqual(s.labelCol, "label")
Expand Down