@@ -22,7 +22,7 @@ import java.util.Arrays
2222import scala .collection .mutable .ListBuffer
2323
2424import breeze .linalg .{DenseMatrix => BDM , DenseVector => BDV , SparseVector => BSV , axpy => brzAxpy ,
25- svd => brzSvd }
25+ svd => brzSvd , MatrixSingularException , inv }
2626import breeze .numerics .{sqrt => brzSqrt }
2727import com .github .fommil .netlib .BLAS .{getInstance => blas }
2828
@@ -498,62 +498,47 @@ class RowMatrix(
498498 }
499499
500500 /**
501- * Compute QR decomposition for rowMatrix. The implementation is designed to optimize the QR
502- * decomposition (factorizations) for the RowMatrix of a tall and skinny shape, yet it applies
503- * to RowMatrix in general.
504- *
501+ * Compute QR decomposition for [[RowMatrix ]]. The implementation is designed to optimize the QR
502+ * decomposition (factorization) for the [[RowMatrix ]] of a tall and skinny shape.
505503 * Reference:
506- * Austin R. Benson, David F. Gleich, James Demmel. "Direct QR factorizations for tall-and
507- * -skinny matrices in MapReduce architectures", 2013 IEEE International Conference on Big Data
508- * @param computeQ: whether to computeQ, which is quite expensive.
509- * @return the decomposition result as (Option[Q], R), where Q is a RowMatrix and R is Matrix.
504+ * Paul G. Constantine, David F. Gleich. "Tall and skinny QR factorizations in MapReduce
505+ * architectures" ([[http://dx.doi.org/10.1145/1996092.1996103 ]])
506+ *
507+ * @param computeQ: whether to computeQ
508+ * @return QRDecomposition(Q, R), Q = null if computeQ = false.
510509 */
511- def TSQR (computeQ : Boolean = false ): ( Option [RowMatrix ] , Matrix ) = {
510+ def tallSkinnyQR (computeQ : Boolean = false ): QRDecomposition [RowMatrix , Matrix ] = {
512511 val col = numCols().toInt
513-
514512 // split rows horizontally into smaller matrices, and compute QR for each of them
515- val blockQRs = rows.mapPartitions(rowsIterator => {
516- val partRows = rowsIterator.toArray
517- val rowCount = partRows.size
518- var bdm = BDM .zeros[Double ](partRows.size, col)
513+ val blockQRs = rows.glom().map{ partRows =>
514+ val bdm = BDM .zeros[Double ](partRows.length, col)
519515 var i = 0
520- partRows.foreach( row => {
516+ partRows.foreach{ row =>
521517 bdm(i, :: ) := row.toBreeze.t
522518 i += 1
523- })
524-
525- val blockQR = breeze.linalg.qr.reduced(bdm)
526- Iterator ((blockQR.r, blockQR.q))
527- }).cache
528-
529- // combine the R part from previous results horizontally into a tall matrix
530- val blockRsRdd = blockQRs.map(_._1).collect()
531- val CombinedR = blockRsRdd.reduceLeft((r1, r2) => BDM .vertcat(r1, r2))
532-
533- val CombinedRDecomposition = breeze.linalg.qr.reduced(CombinedR )
534- val finalR = Matrices .fromBreeze(CombinedRDecomposition .r.toDenseMatrix)
535-
536- val finalQ = if (computeQ){
537- val blockQ = blockQRs.map(_._2)
538- val rightPartQ = CombinedRDecomposition .q
539- val rightQArray = (0 until blockQ.count().toInt)
540- .map(i => rightPartQ(i * col until (i + 1 ) * col, :: ))
541- .toArray
542- val rightQrdd = blockQ.context.parallelize(rightQArray)
543-
544- val qProducts = blockQ.zip(rightQrdd).map(m => m._1 * m._2)
545- val newRows = qProducts.flatMap(m => {
546- val row = m.rows
547- (0 until row).map(i => {
548- val bv = m(i, :: ).t
549- Vectors .fromBreeze(bv)
550- })
551- })
552- Some (new RowMatrix (newRows))
519+ }
520+ breeze.linalg.qr.reduced(bdm).r
521+ }.cache()
522+
523+ // combine the R part from previous results vertically into a tall matrix
524+ val combinedR = blockQRs.treeReduce((r1, r2) => BDM .vertcat(r1, r2))
525+ val breezeR = breeze.linalg.qr.reduced(combinedR).r.toDenseMatrix
526+ val finalR = Matrices .fromBreeze(breezeR)
527+ val finalQ = if (computeQ) {
528+ try {
529+ val invR = inv(breezeR)
530+ this .multiply(Matrices .fromBreeze(invR))
531+ }
532+ catch {
533+ case err : MatrixSingularException =>
534+ logWarning(" R is not invertible and return Q as null" )
535+ null
536+ }
553537 }
554- else None
555-
556- (finalQ, finalR)
538+ else {
539+ null
540+ }
541+ QRDecomposition (finalQ, finalR)
557542 }
558543
559544 /**
0 commit comments