Skip to content

Commit 3fbdb61

Browse files
committed
update qr to indirect and add ut
1 parent 0d913d3 commit 3fbdb61

File tree

3 files changed

+55
-49
lines changed

3 files changed

+55
-49
lines changed

mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,11 @@ import org.apache.spark.annotation.Experimental
2525
*/
2626
@Experimental
2727
case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType)
28+
29+
/**
30+
* :: Experimental ::
31+
* Represents QR factors.
32+
*/
33+
@Experimental
34+
case class QRDecomposition[UType, VType](Q: UType, R: VType)
35+

mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala

Lines changed: 34 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import java.util.Arrays
2222
import scala.collection.mutable.ListBuffer
2323

2424
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV, axpy => brzAxpy,
25-
svd => brzSvd}
25+
svd => brzSvd, MatrixSingularException, inv}
2626
import breeze.numerics.{sqrt => brzSqrt}
2727
import com.github.fommil.netlib.BLAS.{getInstance => blas}
2828

@@ -498,62 +498,47 @@ class RowMatrix(
498498
}
499499

500500
/**
501-
* Compute QR decomposition for rowMatrix. The implementation is designed to optimize the QR
502-
* decomposition (factorizations) for the RowMatrix of a tall and skinny shape, yet it applies
503-
* to RowMatrix in general.
504-
*
501+
* Compute QR decomposition for [[RowMatrix]]. The implementation is designed to optimize the QR
502+
* decomposition (factorization) for the [[RowMatrix]] of a tall and skinny shape.
505503
* Reference:
506-
* Austin R. Benson, David F. Gleich, James Demmel. "Direct QR factorizations for tall-and
507-
* -skinny matrices in MapReduce architectures", 2013 IEEE International Conference on Big Data
508-
* @param computeQ: whether to computeQ, which is quite expensive.
509-
* @return the decomposition result as (Option[Q], R), where Q is a RowMatrix and R is Matrix.
504+
* Paul G. Constantine, David F. Gleich. "Tall and skinny QR factorizations in MapReduce
505+
* architectures" ([[http://dx.doi.org/10.1145/1996092.1996103]])
506+
*
507+
* @param computeQ: whether to computeQ
508+
* @return QRDecomposition(Q, R), Q = null if computeQ = false.
510509
*/
511-
def TSQR(computeQ: Boolean = false): (Option[RowMatrix], Matrix) = {
510+
def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
512511
val col = numCols().toInt
513-
514512
// split rows horizontally into smaller matrices, and compute QR for each of them
515-
val blockQRs = rows.mapPartitions(rowsIterator =>{
516-
val partRows = rowsIterator.toArray
517-
val rowCount = partRows.size
518-
var bdm = BDM.zeros[Double](partRows.size, col)
513+
val blockQRs = rows.glom().map{ partRows =>
514+
val bdm = BDM.zeros[Double](partRows.length, col)
519515
var i = 0
520-
partRows.foreach(row =>{
516+
partRows.foreach{ row =>
521517
bdm(i, ::) := row.toBreeze.t
522518
i += 1
523-
})
524-
525-
val blockQR = breeze.linalg.qr.reduced(bdm)
526-
Iterator((blockQR.r, blockQR.q))
527-
}).cache
528-
529-
// combine the R part from previous results horizontally into a tall matrix
530-
val blockRsRdd = blockQRs.map(_._1).collect()
531-
val CombinedR = blockRsRdd.reduceLeft((r1, r2) => BDM.vertcat(r1, r2))
532-
533-
val CombinedRDecomposition = breeze.linalg.qr.reduced(CombinedR)
534-
val finalR = Matrices.fromBreeze(CombinedRDecomposition.r.toDenseMatrix)
535-
536-
val finalQ = if(computeQ){
537-
val blockQ = blockQRs.map(_._2)
538-
val rightPartQ = CombinedRDecomposition.q
539-
val rightQArray = (0 until blockQ.count().toInt)
540-
.map(i => rightPartQ(i * col until (i + 1) * col, ::))
541-
.toArray
542-
val rightQrdd = blockQ.context.parallelize(rightQArray)
543-
544-
val qProducts = blockQ.zip(rightQrdd).map(m => m._1 * m._2)
545-
val newRows = qProducts.flatMap(m => {
546-
val row = m.rows
547-
(0 until row).map(i =>{
548-
val bv = m(i, ::).t
549-
Vectors.fromBreeze(bv)
550-
})
551-
})
552-
Some(new RowMatrix(newRows))
519+
}
520+
breeze.linalg.qr.reduced(bdm).r
521+
}.cache()
522+
523+
// combine the R part from previous results vertically into a tall matrix
524+
val combinedR = blockQRs.treeReduce((r1, r2) => BDM.vertcat(r1, r2))
525+
val breezeR = breeze.linalg.qr.reduced(combinedR).r.toDenseMatrix
526+
val finalR = Matrices.fromBreeze(breezeR)
527+
val finalQ = if (computeQ) {
528+
try {
529+
val invR = inv(breezeR)
530+
this.multiply(Matrices.fromBreeze(invR))
531+
}
532+
catch {
533+
case err: MatrixSingularException =>
534+
logWarning("R is not invertible and return Q as null")
535+
null
536+
}
553537
}
554-
else None
555-
556-
(finalQ, finalR)
538+
else {
539+
null
540+
}
541+
QRDecomposition(finalQ, finalR)
557542
}
558543

559544
/**

mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.linalg.distributed
1919

2020
import scala.util.Random
2121

22+
import breeze.numerics.abs
2223
import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, norm => brzNorm, svd => brzSvd}
2324

2425
import org.apache.spark.SparkFunSuite
@@ -238,6 +239,18 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
238239
}
239240
}
240241
}
242+
243+
test("QR Decomposition") {
244+
for (mat <- Seq(denseMat, sparseMat)) {
245+
val result = mat.tallSkinnyQR(true)
246+
val expected = breeze.linalg.qr.reduced(mat.toBreeze())
247+
val calcQ = result.Q
248+
val calcR = result.R
249+
assert(closeToZero(abs(expected.q) - abs(calcQ.toBreeze())))
250+
assert(closeToZero(abs(expected.r) - abs(calcR.toBreeze.asInstanceOf[BDM[Double]])))
251+
assert(closeToZero(calcQ.multiply(calcR).toBreeze - mat.toBreeze()))
252+
}
253+
}
241254
}
242255

243256
class RowMatrixClusterSuite extends SparkFunSuite with LocalClusterSparkContext {

0 commit comments

Comments
 (0)