1818import array
1919from collections import namedtuple
2020
21- from pyspark import SparkContext
21+ from pyspark import SparkContext , since
2222from pyspark .rdd import RDD
2323from pyspark .mllib .common import JavaModelWrapper , callMLlibFunc , inherit_doc
2424from pyspark .mllib .util import JavaLoader , JavaSaveable
@@ -36,6 +36,8 @@ class Rating(namedtuple("Rating", ["user", "product", "rating"])):
3636 (1, 2, 5.0)
3737 >>> (r[0], r[1], r[2])
3838 (1, 2, 5.0)
39+
40+ .. addedversion:: 1.2.0
3941 """
4042
4143 def __reduce__ (self ):
@@ -111,13 +113,17 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
111113 ... rmtree(path)
112114 ... except OSError:
113115 ... pass
116+
117+ .. addedversion:: 0.9.0
114118 """
119+ @since ("0.9.0" )
115120 def predict (self , user , product ):
116121 """
117122 Predicts rating for the given user and product.
118123 """
119124 return self ._java_model .predict (int (user ), int (product ))
120125
126+ @since ("0.9.0" )
121127 def predictAll (self , user_product ):
122128 """
123129 Returns a list of predicted ratings for input user and product pairs.
@@ -128,27 +134,31 @@ def predictAll(self, user_product):
128134 user_product = user_product .map (lambda u_p : (int (u_p [0 ]), int (u_p [1 ])))
129135 return self .call ("predict" , user_product )
130136
137+ @since ("1.2.0" )
131138 def userFeatures (self ):
132139 """
133140 Returns a paired RDD, where the first element is the user and the
134141 second is an array of features corresponding to that user.
135142 """
136143 return self .call ("getUserFeatures" ).mapValues (lambda v : array .array ('d' , v ))
137144
145+ @since ("1.2.0" )
138146 def productFeatures (self ):
139147 """
140148 Returns a paired RDD, where the first element is the product and the
141149 second is an array of features corresponding to that product.
142150 """
143151 return self .call ("getProductFeatures" ).mapValues (lambda v : array .array ('d' , v ))
144152
153+ @since ("1.4.0" )
145154 def recommendUsers (self , product , num ):
146155 """
147156 Recommends the top "num" number of users for a given product and returns a list
148157 of Rating objects sorted by the predicted rating in descending order.
149158 """
150159 return list (self .call ("recommendUsers" , product , num ))
151160
161+ @since ("1.4.0" )
152162 def recommendProducts (self , user , num ):
153163 """
154164 Recommends the top "num" number of products for a given user and returns a list
@@ -157,17 +167,25 @@ def recommendProducts(self, user, num):
157167 return list (self .call ("recommendProducts" , user , num ))
158168
159169 @property
170+ @since ("1.3.1" )
160171 def rank (self ):
172+ """Rank for the features in this model"""
161173 return self .call ("rank" )
162174
163175 @classmethod
176+ @since ("1.3.1" )
164177 def load (cls , sc , path ):
178+ """Load a model from the given path"""
165179 model = cls ._load_java (sc , path )
166180 wrapper = sc ._jvm .MatrixFactorizationModelWrapper (model )
167181 return MatrixFactorizationModel (wrapper )
168182
169183
170184class ALS (object ):
185+ """Alternating Least Squares matrix factorization
186+
187+ .. addedversion:: 1.1.0
188+ """
171189
172190 @classmethod
173191 def _prepare (cls , ratings ):
@@ -188,15 +206,31 @@ def _prepare(cls, ratings):
188206 return ratings
189207
190208 @classmethod
209+ @since ("1.0.0" )
191210 def train (cls , ratings , rank , iterations = 5 , lambda_ = 0.01 , blocks = - 1 , nonnegative = False ,
192211 seed = None ):
212+ """
213+ Train a matrix factorization model given an RDD of ratings given by users to some products,
214+ in the form of (userID, productID, rating) pairs. We approximate the ratings matrix as the
215+ product of two lower-rank matrices of a given rank (number of features). To solve for these
216+ features, we run a given number of iterations of ALS. This is done using a level of
217+ parallelism given by `blocks`.
218+ """
193219 model = callMLlibFunc ("trainALSModel" , cls ._prepare (ratings ), rank , iterations ,
194220 lambda_ , blocks , nonnegative , seed )
195221 return MatrixFactorizationModel (model )
196222
197223 @classmethod
224+ @since ("1.0.0" )
198225 def trainImplicit (cls , ratings , rank , iterations = 5 , lambda_ = 0.01 , blocks = - 1 , alpha = 0.01 ,
199226 nonnegative = False , seed = None ):
227+ """
228+ Train a matrix factorization model given an RDD of 'implicit preferences' given by users
229+ to some products, in the form of (userID, productID, preference) pairs. We approximate the
230+ ratings matrix as the product of two lower-rank matrices of a given rank (number of
231+ features). To solve for these features, we run a given number of iterations of ALS.
232+ This is done using a level of parallelism given by `blocks`.
233+ """
200234 model = callMLlibFunc ("trainImplicitALSModel" , cls ._prepare (ratings ), rank ,
201235 iterations , lambda_ , blocks , alpha , nonnegative , seed )
202236 return MatrixFactorizationModel (model )
0 commit comments