Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,12 @@ sealed trait Vector extends Serializable {
@Since("2.0.0")
def toSparse: SparseVector

/**
* Converts this vector to a sparse vector with all explicit zeros removed when the size is known.
*/
@Since("2.3.0")
private[linalg] def toSparse(nnz: Int): SparseVector

/**
* Converts this vector to a dense vector.
*/
Expand All @@ -152,7 +158,7 @@ sealed trait Vector extends Serializable {
val nnz = numNonzeros
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
if (1.5 * (nnz + 1.0) < size) {
toSparse
toSparse(nnz)
} else {
toDense
}
Expand Down Expand Up @@ -495,8 +501,9 @@ class DenseVector @Since("2.0.0") ( @Since("2.0.0") val values: Array[Double]) e
nnz
}

override def toSparse: SparseVector = {
val nnz = numNonzeros
override def toSparse: SparseVector = toSparse(numNonzeros)

private[linalg] override def toSparse(nnz: Int): SparseVector = {
val ii = new Array[Int](nnz)
val vv = new Array[Double](nnz)
var k = 0
Expand Down Expand Up @@ -635,8 +642,9 @@ class SparseVector @Since("2.0.0") (
nnz
}

override def toSparse: SparseVector = {
val nnz = numNonzeros
override def toSparse: SparseVector = toSparse(numNonzeros)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't need to be overridden. Just define it in the superclass

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If define
def toSparse: SparseVector = toSparse(numNonzeros)
in the superclass, when call dv.toSparse (there are this kinds of call in the code), there will be error message:
Both toSparse in the DenseVector of type (nnz:Int) org.apache.spark.ml.linalg.SparseVector and toSparse in trait Vector of type =>org.apache.spark.ml.linalg.SparseVector match .
So we should change the name of toSparse(nnz: Int), maybe toSparseWithSize(nnz: Int).


private[linalg] override def toSparse(nnz: Int): SparseVector = {
if (nnz == numActives) {
this
} else {
Expand Down
20 changes: 15 additions & 5 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ sealed trait Vector extends Serializable {
@Since("1.4.0")
def toSparse: SparseVector

/**
* Converts this vector to a sparse vector with all explicit zeros removed when the size is known.
*/
@Since("2.3.0")
private[linalg] def toSparse(nnz: Int): SparseVector

/**
* Converts this vector to a dense vector.
*/
Expand All @@ -165,7 +171,7 @@ sealed trait Vector extends Serializable {
val nnz = numNonzeros
// A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
if (1.5 * (nnz + 1.0) < size) {
toSparse
toSparse(nnz)
} else {
toDense
}
Expand Down Expand Up @@ -670,8 +676,10 @@ class DenseVector @Since("1.0.0") (
}

@Since("1.4.0")
override def toSparse: SparseVector = {
val nnz = numNonzeros
override def toSparse: SparseVector = toSparse(numNonzeros)

@Since("2.3.0")
private[linalg] override def toSparse(nnz: Int): SparseVector = {
val ii = new Array[Int](nnz)
val vv = new Array[Double](nnz)
var k = 0
Expand Down Expand Up @@ -823,8 +831,10 @@ class SparseVector @Since("1.0.0") (
}

@Since("1.4.0")
override def toSparse: SparseVector = {
val nnz = numNonzeros
override def toSparse: SparseVector = toSparse(numNonzeros)

@Since("2.3.0")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not need Since because it is private

private[linalg] override def toSparse(nnz: Int): SparseVector = {
if (nnz == numActives) {
this
} else {
Expand Down
4 changes: 4 additions & 0 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,10 @@ object MimaExcludes {
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy")
) ++ Seq(
// [SPARK-21680][ML][MLLIB]optimzie Vector coompress
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, does this really cause a MiMa failure? what's the message, is it about adding the new method to the interface? I think it could be OK because it's a sealed trait that user code can't implement. CC maybe @MLnick or @sethah or @jkbradley for a thought on that

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message is"method toSparse(nnz: Int) in trait is present only in current version"

ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.toSparse"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.toSparse")
)
}

Expand Down