@@ -52,6 +52,12 @@ def uniformRDD(sc, size, numPartitions=None, seed=None):
5252 C{RandomRDDs.uniformRDD(sc, n, p, seed)\
5353 .map(lambda v: a + (b - a) * v)}
5454
55+ :param sc: SparkContext used to create the RDD.
56+ :param size: Size of the RDD.
57+ :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
58+ :param seed: Random seed (default: a random long integer).
59+ :return: RDD of float comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
60+
5561 >>> x = RandomRDDs.uniformRDD(sc, 100).collect()
5662 >>> len(x)
5763 100
@@ -76,6 +82,12 @@ def normalRDD(sc, size, numPartitions=None, seed=None):
7682 C{RandomRDDs.normal(sc, n, p, seed)\
7783 .map(lambda v: mean + sigma * v)}
7884
85+ :param sc: SparkContext used to create the RDD.
86+ :param size: Size of the RDD.
87+ :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
88+ :param seed: Random seed (default: a random long integer).
89+ :return: RDD of float comprised of i.i.d. samples ~ N(0.0, 1.0).
90+
7991 >>> x = RandomRDDs.normalRDD(sc, 1000, seed=1L)
8092 >>> stats = x.stats()
8193 >>> stats.count()
@@ -93,6 +105,13 @@ def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
93105 Generates an RDD comprised of i.i.d. samples from the Poisson
94106 distribution with the input mean.
95107
108+ :param sc: SparkContext used to create the RDD.
109+ :param mean: Mean, or lambda, for the Poisson distribution.
110+ :param size: Size of the RDD.
111+ :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
112+ :param seed: Random seed (default: a random long integer).
113+ :return: RDD of float comprised of i.i.d. samples ~ Pois(mean).
114+
96115 >>> mean = 100.0
97116 >>> x = RandomRDDs.poissonRDD(sc, mean, 1000, seed=2L)
98117 >>> stats = x.stats()
@@ -104,7 +123,7 @@ def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
104123 >>> abs(stats.stdev() - sqrt(mean)) < 0.5
105124 True
106125 """
107- return callMLlibFunc ("poissonRDD" , sc ._jsc , mean , size , numPartitions , seed )
126+ return callMLlibFunc ("poissonRDD" , sc ._jsc , float ( mean ) , size , numPartitions , seed )
108127
109128 @staticmethod
110129 @toArray
@@ -113,6 +132,13 @@ def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
113132 Generates an RDD comprised of vectors containing i.i.d. samples drawn
114133 from the uniform distribution U(0.0, 1.0).
115134
135+ :param sc: SparkContext used to create the RDD.
136+ :param numRows: Number of Vectors in the RDD.
137+ :param numCols: Number of elements in each Vector.
138+ :param numPartitions: Number of partitions in the RDD.
139+ :param seed: Seed for the RNG that generates the seed for the generator in each partition.
140+ :return: RDD of Vector with vectors containing i.i.d samples ~ `U(0.0, 1.0)`.
141+
116142 >>> import numpy as np
117143 >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
118144 >>> mat.shape
@@ -131,6 +157,13 @@ def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
131157 Generates an RDD comprised of vectors containing i.i.d. samples drawn
132158 from the standard normal distribution.
133159
160+ :param sc: SparkContext used to create the RDD.
161+ :param numRows: Number of Vectors in the RDD.
162+ :param numCols: Number of elements in each Vector.
163+ :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
164+ :param seed: Random seed (default: a random long integer).
165+ :return: RDD of Vector with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
166+
134167 >>> import numpy as np
135168 >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1L).collect())
136169 >>> mat.shape
@@ -149,6 +182,14 @@ def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
149182 Generates an RDD comprised of vectors containing i.i.d. samples drawn
150183 from the Poisson distribution with the input mean.
151184
185+ :param sc: SparkContext used to create the RDD.
186+ :param mean: Mean, or lambda, for the Poisson distribution.
187+ :param numRows: Number of Vectors in the RDD.
188+ :param numCols: Number of elements in each Vector.
189+ :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`)
190+ :param seed: Random seed (default: a random long integer).
191+ :return: RDD of Vector with vectors containing i.i.d. samples ~ Pois(mean).
192+
152193 >>> import numpy as np
153194 >>> mean = 100.0
154195 >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1L)
@@ -161,7 +202,7 @@ def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
161202 >>> abs(mat.std() - sqrt(mean)) < 0.5
162203 True
163204 """
164- return callMLlibFunc ("poissonVectorRDD" , sc ._jsc , mean , numRows , numCols ,
205+ return callMLlibFunc ("poissonVectorRDD" , sc ._jsc , float ( mean ) , numRows , numCols ,
165206 numPartitions , seed )
166207
167208
0 commit comments