apache · davies · Jul 25, 2014 · Jul 30, 2014 · Jul 30, 2014 · Jul 30, 2014
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -311,9 +311,9 @@ def map(self, f, preservesPartitioning=False):
         >>> sorted(rdd.map(lambda x: (x, 1)).collect())
         [('a', 1), ('b', 1), ('c', 1)]
         """
-        def func(split, iterator):
+        def func(_, iterator):
             return imap(f, iterator)
-        return PipelinedRDD(self, func, preservesPartitioning)
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
 
     def flatMap(self, f, preservesPartitioning=False):
         """
@@ -1070,7 +1070,7 @@ def func(split, iterator):
                 if not isinstance(x, basestring):
                     x = unicode(x)
                 yield x.encode("utf-8")
-        keyed = PipelinedRDD(self, func)
+        keyed = self.mapPartitionsWithIndex(func)
         keyed._bypass_serializer = True
         keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path)
 
@@ -1268,7 +1268,7 @@ def add_shuffle_key(split, iterator):
                 yield pack_long(split)
                 yield outputSerializer.dumps(items)
 
-        keyed = PipelinedRDD(self, add_shuffle_key)
+        keyed = self.mapPartitionsWithIndex(add_shuffle_key)
         keyed._bypass_serializer = True
         with _JavaStackTrace(self.context) as st:
             pairRDD = self.ctx._jvm.PairwiseRDD(