-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20109][MLlib] Rewrote toBlockMatrix method on IndexedRowMatrix #17459
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
0614ebc
c3b9b8a
25f4989
6adb585
06c2b3a
12e78bf
4582a7e
a38851c
3fe21cf
d692d30
994b457
a7a03dc
289dbdb
f9c5506
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -98,6 +98,7 @@ class IndexedRowMatrix @Since("1.0.0") ( | |
| toBlockMatrix(1024, 1024) | ||
| } | ||
|
|
||
|
|
||
| /** | ||
| * Converts to BlockMatrix. Creates blocks of `SparseMatrix`. | ||
| * @param rowsPerBlock The number of rows of each block. The blocks at the bottom edge may have | ||
|
|
@@ -112,6 +113,67 @@ class IndexedRowMatrix @Since("1.0.0") ( | |
| toCoordinateMatrix().toBlockMatrix(rowsPerBlock, colsPerBlock) | ||
| } | ||
|
|
||
| /** | ||
| * Converts to BlockMatrix. Creates blocks of `DenseMatrix` with size 1024 x 1024. | ||
|
||
| */ | ||
| def toBlockMatrixDense(): BlockMatrix = { | ||
|
||
| toBlockMatrixDense(1024, 1024) | ||
| } | ||
|
|
||
| /** | ||
| * Converts to BlockMatrix. Creates blocks of `DenseMatrix`. | ||
| * @param rowsPerBlock The number of rows of each block. The blocks at the bottom edge may have | ||
| * a smaller value. Must be an integer value greater than 0. | ||
| * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have | ||
| * a smaller value. Must be an integer value greater than 0. | ||
| * @return a [[BlockMatrix]] | ||
| */ | ||
|
||
| def toBlockMatrixDense(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = { | ||
| require(rowsPerBlock > 0, | ||
| s"rowsPerBlock needs to be greater than 0. rowsPerBlock: $rowsPerBlock") | ||
| require(colsPerBlock > 0, | ||
| s"colsPerBlock needs to be greater than 0. colsPerBlock: $colsPerBlock") | ||
|
|
||
| val m = numRows() | ||
| val n = numCols() | ||
| val lastRowBlockIndex = m / rowsPerBlock | ||
|
||
| val lastColBlockIndex = n / colsPerBlock | ||
| val lastRowBlockSize = (m % rowsPerBlock).toInt | ||
| val lastColBlockSize = (n % colsPerBlock).toInt | ||
| val numRowBlocks = math.ceil(m.toDouble / rowsPerBlock).toInt | ||
| val numColBlocks = math.ceil(n.toDouble / colsPerBlock).toInt | ||
|
|
||
| val blocks: RDD[((Int, Int), Matrix)] = rows.flatMap({ ir => | ||
| val blockRow = ir.index / rowsPerBlock | ||
| val rowInBlock = ir.index % rowsPerBlock | ||
|
|
||
| ir.vector.toArray | ||
| .grouped(colsPerBlock) | ||
| .zipWithIndex | ||
| .map({ case (values, blockColumn) => | ||
|
||
| ((blockRow.toInt, blockColumn), (rowInBlock.toInt, values)) | ||
| }) | ||
| }).groupByKey(GridPartitioner(numRowBlocks, numColBlocks, rowsPerBlock, colsPerBlock)).map({ | ||
|
||
| case ((blockRow, blockColumn), itr) => | ||
| val actualNumRows: Int = | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We usually don't put a type on vals/vars unless it's important for clarity or needed for a cast |
||
| if (blockRow == lastRowBlockIndex) lastRowBlockSize else rowsPerBlock | ||
| val actualNumColumns: Int = | ||
| if (blockColumn == lastColBlockIndex) lastColBlockSize else colsPerBlock | ||
|
|
||
| val arraySize = actualNumRows * actualNumColumns | ||
| val matrixAsArray = new Array[Double](arraySize) | ||
| itr.foreach({ case (rowWithinBlock, values) => | ||
| var i = 0 | ||
| while (i < values.length) { | ||
| matrixAsArray.update(i * actualNumRows + rowWithinBlock, values(i)) | ||
| i += 1 | ||
| } | ||
| }) | ||
| ((blockRow, blockColumn), new DenseMatrix(actualNumRows, actualNumColumns, matrixAsArray)) | ||
| }) | ||
| new BlockMatrix(blocks, rowsPerBlock, colsPerBlock) | ||
| } | ||
|
|
||
| /** | ||
| * Converts this matrix to a | ||
| * [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]]. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -89,11 +89,42 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { | |
|
|
||
| test("toBlockMatrix") { | ||
| val idxRowMat = new IndexedRowMatrix(indexedRows) | ||
|
|
||
| // Tests when n % colsPerBlock != 0 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This only tests a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll write more tests today. |
||
| val blockMat = idxRowMat.toBlockMatrix(2, 2) | ||
| assert(blockMat.numRows() === m) | ||
| assert(blockMat.numCols() === n) | ||
| assert(blockMat.toBreeze() === idxRowMat.toBreeze()) | ||
|
|
||
| // Tests when m % rowsPerBlock != 0 | ||
| val blockMat2 = idxRowMat.toBlockMatrix(3, 1) | ||
| assert(blockMat2.numRows() === m) | ||
| assert(blockMat2.numCols() === n) | ||
| assert(blockMat2.toBreeze() === idxRowMat.toBreeze()) | ||
|
|
||
| intercept[IllegalArgumentException] { | ||
| idxRowMat.toBlockMatrix(-1, 2) | ||
| } | ||
| intercept[IllegalArgumentException] { | ||
| idxRowMat.toBlockMatrix(2, 0) | ||
| } | ||
| } | ||
|
|
||
| test("toBlockMatrixDense") { | ||
|
||
| val idxRowMat = new IndexedRowMatrix(indexedRows) | ||
|
|
||
| // Tests when n % colsPerBlock != 0 | ||
| val blockMat = idxRowMat.toBlockMatrix(2, 2) | ||
| assert(blockMat.numRows() === m) | ||
| assert(blockMat.numCols() === n) | ||
| assert(blockMat.toBreeze() === idxRowMat.toBreeze()) | ||
|
|
||
| // Tests when m % rowsPerBlock != 0 | ||
| val blockMat2 = idxRowMat.toBlockMatrix(3, 1) | ||
| assert(blockMat2.numRows() === m) | ||
| assert(blockMat2.numCols() === n) | ||
| assert(blockMat2.toBreeze() === idxRowMat.toBreeze()) | ||
|
|
||
| intercept[IllegalArgumentException] { | ||
| idxRowMat.toBlockMatrix(-1, 2) | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please remove the extra line.