From bf888758353dfb75da582acad06cd18eb825e88d Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Mon, 16 May 2016 16:18:55 -0700 Subject: [PATCH 1/5] add pyspark.ml.linalg to doc --- python/docs/pyspark.ml.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst index 86d4186a2c798..26f7415e1a423 100644 --- a/python/docs/pyspark.ml.rst +++ b/python/docs/pyspark.ml.rst @@ -41,6 +41,14 @@ pyspark.ml.clustering module :undoc-members: :inherited-members: +pyspark.ml.linalg module +---------------------------- + +.. automodule:: pyspark.ml.linalg + :members: + :undoc-members: + :inherited-members: + pyspark.ml.recommendation module -------------------------------- From 4336291ae0666aaf56164b4d7059b65b899f9ea2 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Mon, 16 May 2016 20:40:26 -0700 Subject: [PATCH 2/5] remove Vector.parse --- python/pyspark/ml/linalg/__init__.py | 88 ---------------------------- 1 file changed, 88 deletions(-) diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py index 0ab35e06ca4c1..ac90892eb6484 100644 --- a/python/pyspark/ml/linalg/__init__.py +++ b/python/pyspark/ml/linalg/__init__.py @@ -278,28 +278,6 @@ def __init__(self, ar): ar = ar.astype(np.float64) self.array = ar - @staticmethod - def parse(s): - """ - Parse string representation back into the DenseVector. - - >>> DenseVector.parse(' [ 0.0,1.0,2.0, 3.0]') - DenseVector([0.0, 1.0, 2.0, 3.0]) - """ - start = s.find('[') - if start == -1: - raise ValueError("Array should start with '['.") - end = s.find(']') - if end == -1: - raise ValueError("Array should end with ']'.") - s = s[start + 1: end] - - try: - values = [float(val) for val in s.split(',') if val] - except ValueError: - raise ValueError("Unable to parse values from %s" % s) - return DenseVector(values) - def __reduce__(self): return DenseVector, (self.array.tostring(),) @@ -557,55 +535,6 @@ def __reduce__(self): SparseVector, (self.size, self.indices.tostring(), self.values.tostring())) - @staticmethod - def parse(s): - """ - Parse string representation back into the SparseVector. - - >>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )') - SparseVector(4, {0: 4.0, 1: 5.0}) - """ - start = s.find('(') - if start == -1: - raise ValueError("Tuple should start with '('") - end = s.find(')') - if start == -1: - raise ValueError("Tuple should end with ')'") - s = s[start + 1: end].strip() - - size = s[: s.find(',')] - try: - size = int(size) - except ValueError: - raise ValueError("Cannot parse size %s." % size) - - ind_start = s.find('[') - if ind_start == -1: - raise ValueError("Indices array should start with '['.") - ind_end = s.find(']') - if ind_end == -1: - raise ValueError("Indices array should end with ']'") - new_s = s[ind_start + 1: ind_end] - ind_list = new_s.split(',') - try: - indices = [int(ind) for ind in ind_list if ind] - except ValueError: - raise ValueError("Unable to parse indices from %s." % new_s) - s = s[ind_end + 1:].strip() - - val_start = s.find('[') - if val_start == -1: - raise ValueError("Values array should start with '['.") - val_end = s.find(']') - if val_end == -1: - raise ValueError("Values array should end with ']'.") - val_list = s[val_start + 1: val_end].split(',') - try: - values = [float(val) for val in val_list if val] - except ValueError: - raise ValueError("Unable to parse values from %s." % s) - return SparseVector(size, indices, values) - def dot(self, other): """ Dot product with a SparseVector or 1- or 2-dimensional Numpy array. @@ -880,23 +809,6 @@ def norm(vector, p): """ return _convert_to_vector(vector).norm(p) - @staticmethod - def parse(s): - """Parse a string representation back into the Vector. - - >>> Vectors.parse('[2,1,2 ]') - DenseVector([2.0, 1.0, 2.0]) - >>> Vectors.parse(' ( 100, [0], [2])') - SparseVector(100, {0: 2.0}) - """ - if s.find('(') == -1 and s.find('[') != -1: - return DenseVector.parse(s) - elif s.find('(') != -1: - return SparseVector.parse(s) - else: - raise ValueError( - "Cannot find tokens '[' or '(' from the input string.") - @staticmethod def zeros(size): return DenseVector(np.zeros(size)) From 290ed0e0be5201eea2a8f49319d5c0df1aa2b54f Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Mon, 16 May 2016 20:50:23 -0700 Subject: [PATCH 3/5] remove stringify --- python/pyspark/ml/linalg/__init__.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py index ac90892eb6484..2dee504e5cb1c 100644 --- a/python/pyspark/ml/linalg/__init__.py +++ b/python/pyspark/ml/linalg/__init__.py @@ -774,19 +774,6 @@ def dense(*elements): elements = elements[0] return DenseVector(elements) - @staticmethod - def stringify(vector): - """ - Converts a vector into a string, which can be recognized by - Vectors.parse(). - - >>> Vectors.stringify(Vectors.sparse(2, [1], [1.0])) - '(2,[1],[1.0])' - >>> Vectors.stringify(Vectors.dense([0.0, 1.0])) - '[0.0,1.0]' - """ - return str(vector) - @staticmethod def squared_distance(v1, v2): """ From f605e7bff9d9b8c757512079420fd5094ae13a49 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Mon, 16 May 2016 20:52:10 -0700 Subject: [PATCH 4/5] remove QRDecomposition --- python/pyspark/ml/linalg/__init__.py | 31 +--------------------------- 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py index 2dee504e5cb1c..f42c589b92255 100644 --- a/python/pyspark/ml/linalg/__init__.py +++ b/python/pyspark/ml/linalg/__init__.py @@ -44,8 +44,7 @@ __all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors', - 'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices', - 'QRDecomposition'] + 'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices'] if sys.version_info[:2] == (2, 7): @@ -1136,34 +1135,6 @@ def sparse(numRows, numCols, colPtrs, rowIndices, values): return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values) -class QRDecomposition(object): - """ - .. note:: Experimental - - Represents QR factors. - """ - def __init__(self, Q, R): - self._Q = Q - self._R = R - - @property - @since('2.0.0') - def Q(self): - """ - An orthogonal matrix Q in a QR decomposition. - May be null if not computed. - """ - return self._Q - - @property - @since('2.0.0') - def R(self): - """ - An upper triangular matrix R in a QR decomposition. - """ - return self._R - - def _test(): import doctest (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS) From 8166baeb2ac3dd46bca04929456a00124bd53ed7 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Mon, 16 May 2016 22:39:42 -0700 Subject: [PATCH 5/5] update tests --- python/pyspark/ml/tests.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index b67097cfdb127..96b16d6fefae0 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -1348,22 +1348,6 @@ def test_dense_matrix_is_transposed(self): self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5])) self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9])) - def test_parse_vector(self): - a = DenseVector([]) - self.assertEqual(str(a), '[]') - self.assertEqual(Vectors.parse(str(a)), a) - a = DenseVector([3, 4, 6, 7]) - self.assertEqual(str(a), '[3.0,4.0,6.0,7.0]') - self.assertEqual(Vectors.parse(str(a)), a) - a = SparseVector(4, [], []) - self.assertEqual(str(a), '(4,[],[])') - self.assertEqual(SparseVector.parse(str(a)), a) - a = SparseVector(4, [0, 2], [3, 4]) - self.assertEqual(str(a), '(4,[0,2],[3.0,4.0])') - self.assertEqual(Vectors.parse(str(a)), a) - a = SparseVector(10, [0, 1], [4, 5]) - self.assertEqual(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a) - def test_norms(self): a = DenseVector([0, 2, 3, -1]) self.assertAlmostEqual(a.norm(2), 3.742, 3)