Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,12 @@ private class LogisticAggregator(
private var weightSum = 0.0
private var lossSum = 0.0

private val gradientSumArray = Array.fill[Double](coefficientSize)(0.0D)
@transient private lazy val coefficientsArray: Array[Double] = bcCoefficients.value match {
case DenseVector(values) => values
case _ => throw new IllegalArgumentException(s"coefficients only supports dense vector but " +
s"got type ${bcCoefficients.value.getClass}.)")
}
private val gradientSumArray = new Array[Double](coefficientSize)

if (multinomial && numClasses <= 2) {
logInfo(s"Multinomial logistic regression for binary classification yields separate " +
Expand All @@ -1447,7 +1452,7 @@ private class LogisticAggregator(
label: Double): Unit = {

val localFeaturesStd = bcFeaturesStd.value
val localCoefficients = bcCoefficients.value
val localCoefficients = coefficientsArray
val localGradientArray = gradientSumArray
val margin = - {
var sum = 0.0
Expand Down Expand Up @@ -1491,7 +1496,7 @@ private class LogisticAggregator(
logistic regression without pivoting.
*/
val localFeaturesStd = bcFeaturesStd.value
val localCoefficients = bcCoefficients.value
val localCoefficients = coefficientsArray
val localGradientArray = gradientSumArray

// marginOfLabel is margins(label) in the formula
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,32 @@ class LogisticRegressionSuite
assert(blrModel.intercept !== 0.0)
}

test("sparse coefficients in LogisticAggregator") {
val bcCoefficientsBinary = spark.sparkContext.broadcast(Vectors.sparse(2, Array(0), Array(1.0)))
val bcFeaturesStd = spark.sparkContext.broadcast(Array(1.0))
val binaryAgg = new LogisticAggregator(bcCoefficientsBinary, bcFeaturesStd, 2,
fitIntercept = true, multinomial = false)
val thrownBinary = withClue("binary logistic aggregator cannot handle sparse coefficients") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should handle sparse coefficients for further performance improvement. But not in this PR.

intercept[IllegalArgumentException] {
binaryAgg.add(Instance(1.0, 1.0, Vectors.dense(1.0)))
}
}
assert(thrownBinary.getMessage.contains("coefficients only supports dense"))

val bcCoefficientsMulti = spark.sparkContext.broadcast(Vectors.sparse(6, Array(0), Array(1.0)))
val multinomialAgg = new LogisticAggregator(bcCoefficientsMulti, bcFeaturesStd, 3,
fitIntercept = true, multinomial = true)
val thrown = withClue("multinomial logistic aggregator cannot handle sparse coefficients") {
intercept[IllegalArgumentException] {
multinomialAgg.add(Instance(1.0, 1.0, Vectors.dense(1.0)))
}
}
assert(thrown.getMessage.contains("coefficients only supports dense"))
bcCoefficientsBinary.destroy(blocking = false)
bcFeaturesStd.destroy(blocking = false)
bcCoefficientsMulti.destroy(blocking = false)
}

test("overflow prediction for multiclass") {
val model = new LogisticRegressionModel("mLogReg",
Matrices.dense(3, 2, Array(0.0, 0.0, 0.0, 1.0, 2.0, 3.0)),
Expand Down