improve test cases, reverse the order of index

davies · davies · commit cebe5bfe263b · 2014-08-23T19:41:16.000-07:00
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -1727,8 +1727,8 @@ def zipWithIndex(self):
         This method needs to trigger a spark job when this RDD contains
         more than one partitions.
 
-        >>> sc.parallelize(range(4), 2).zipWithIndex().collect()
-        [(0, 0), (1, 1), (2, 2), (3, 3)]
+        >>> sc.parallelize(["a", "b", "c", "d"], 3).zipWithIndex().collect()
+        [('a', 0), ('b', 1), ('c', 2), ('d', 3)]
         """
         starts = [0]
         if self.getNumPartitions() > 1:
@@ -1737,7 +1737,8 @@ def zipWithIndex(self):
                 starts.append(starts[-1] + nums[i])
 
         def func(k, it):
-            return enumerate(it, starts[k])
+            for i, v in enumerate(it, starts[k]):
+                yield v, i
 
         return self.mapPartitionsWithIndex(func)
 
@@ -1750,14 +1751,14 @@ def zipWithUniqueId(self):
         method won't trigger a spark job, which is different from
         L{zipWithIndex}
 
-        >>> sc.parallelize(range(4), 2).zipWithUniqueId().collect()
-        [(0, 0), (2, 1), (1, 2), (3, 3)]
+        >>> sc.parallelize(["a", "b", "c", "d", "e"], 3).zipWithUniqueId().collect()
+        [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]
         """
         n = self.getNumPartitions()
 
         def func(k, it):
             for i, v in enumerate(it):
-                yield i * n + k, v
+                yield v, i * n + k
 
         return self.mapPartitionsWithIndex(func)