Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/pyspark/ml/linalg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ def _convert_to_vector(l):
return DenseVector(l)
elif _have_scipy and scipy.sparse.issparse(l):
assert l.shape[1] == 1, "Expected column vector"
# Make sure the converted csc_matrix has sorted indices.
csc = l.tocsc()
if not csc.has_sorted_indices:
csc.sort_indices()
return SparseVector(l.shape[0], csc.indices, csc.data)
else:
raise TypeError("Cannot convert type %s into Vector" % type(l))
Expand Down
3 changes: 3 additions & 0 deletions python/pyspark/mllib/linalg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ def _convert_to_vector(l):
return DenseVector(l)
elif _have_scipy and scipy.sparse.issparse(l):
assert l.shape[1] == 1, "Expected column vector"
# Make sure the converted csc_matrix has sorted indices.
csc = l.tocsc()
if not csc.has_sorted_indices:
csc.sort_indices()
return SparseVector(l.shape[0], csc.indices, csc.data)
else:
raise TypeError("Cannot convert type %s into Vector" % type(l))
Expand Down
11 changes: 11 additions & 0 deletions python/pyspark/mllib/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,17 @@ def serialize(l):
self.assertEqual(sv, serialize(lil.tocsr()))
self.assertEqual(sv, serialize(lil.todok()))

def test_convert_to_vector(self):
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have no Scipy-related tests in ml submodule, so I don't add it. If we really need one, please let me know. I can add it quickly.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it's Ok; I made a separate JIRA to port the tests there.

from scipy.sparse import csc_matrix
# Create a CSC matrix with non-sorted indices
indptr = array([0, 2])
indices = array([3, 1])
data = array([2.0, 1.0])
csc = csc_matrix((data, indices, indptr))
self.assertFalse(csc.has_sorted_indices)
sv = SparseVector(4, {1: 1, 3: 2})
self.assertEqual(sv, _convert_to_vector(csc))

def test_dot(self):
from scipy.sparse import lil_matrix
lil = lil_matrix((4, 1))
Expand Down