Skip to content

Commit 60c72d9

Browse files
committed
Fixed stat.py doc test to work for Python versions printing nan or NaN.
1 parent b20d90a commit 60c72d9

File tree

1 file changed

+12
-10
lines changed

1 file changed

+12
-10
lines changed

python/pyspark/mllib/stat.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,18 @@ def corr(x, y=None, method=None):
118118
>>> from linalg import Vectors
119119
>>> rdd = sc.parallelize([Vectors.dense([1, 0, 0, -2]), Vectors.dense([4, 5, 0, 3]),
120120
... Vectors.dense([6, 7, 0, 8]), Vectors.dense([9, 0, 0, 1])])
121-
>>> Statistics.corr(rdd)
122-
array([[ 1. , 0.05564149, NaN, 0.40047142],
123-
[ 0.05564149, 1. , NaN, 0.91359586],
124-
[ NaN, NaN, 1. , NaN],
125-
[ 0.40047142, 0.91359586, NaN, 1. ]])
126-
>>> Statistics.corr(rdd, method="spearman")
127-
array([[ 1. , 0.10540926, NaN, 0.4 ],
128-
[ 0.10540926, 1. , NaN, 0.9486833 ],
129-
[ NaN, NaN, 1. , NaN],
130-
[ 0.4 , 0.9486833 , NaN, 1. ]])
121+
>>> pearsonCorr = Statistics.corr(rdd)
122+
>>> print str(pearsonCorr).replace('nan', 'NaN')
123+
[[ 1. 0.05564149 NaN 0.40047142]
124+
[ 0.05564149 1. NaN 0.91359586]
125+
[ NaN NaN 1. NaN]
126+
[ 0.40047142 0.91359586 NaN 1. ]]
127+
>>> spearmanCorr = Statistics.corr(rdd, method="spearman")
128+
>>> print str(spearmanCorr).replace('nan', 'NaN')
129+
[[ 1. 0.10540926 NaN 0.4 ]
130+
[ 0.10540926 1. NaN 0.9486833 ]
131+
[ NaN NaN 1. NaN]
132+
[ 0.4 0.9486833 NaN 1. ]]
131133
>>> try:
132134
... Statistics.corr(rdd, "spearman")
133135
... print "Method name as second argument without 'method=' shouldn't be allowed."

0 commit comments

Comments
 (0)