Skip to content

Commit cebe5bf

Browse files
committed
improve test cases, reverse the order of index
1 parent 0d2a128 commit cebe5bf

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

python/pyspark/rdd.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,8 +1727,8 @@ def zipWithIndex(self):
17271727
This method needs to trigger a spark job when this RDD contains
17281728
more than one partitions.
17291729
1730-
>>> sc.parallelize(range(4), 2).zipWithIndex().collect()
1731-
[(0, 0), (1, 1), (2, 2), (3, 3)]
1730+
>>> sc.parallelize(["a", "b", "c", "d"], 3).zipWithIndex().collect()
1731+
[('a', 0), ('b', 1), ('c', 2), ('d', 3)]
17321732
"""
17331733
starts = [0]
17341734
if self.getNumPartitions() > 1:
@@ -1737,7 +1737,8 @@ def zipWithIndex(self):
17371737
starts.append(starts[-1] + nums[i])
17381738

17391739
def func(k, it):
1740-
return enumerate(it, starts[k])
1740+
for i, v in enumerate(it, starts[k]):
1741+
yield v, i
17411742

17421743
return self.mapPartitionsWithIndex(func)
17431744

@@ -1750,14 +1751,14 @@ def zipWithUniqueId(self):
17501751
method won't trigger a spark job, which is different from
17511752
L{zipWithIndex}
17521753
1753-
>>> sc.parallelize(range(4), 2).zipWithUniqueId().collect()
1754-
[(0, 0), (2, 1), (1, 2), (3, 3)]
1754+
>>> sc.parallelize(["a", "b", "c", "d", "e"], 3).zipWithUniqueId().collect()
1755+
[('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]
17551756
"""
17561757
n = self.getNumPartitions()
17571758

17581759
def func(k, it):
17591760
for i, v in enumerate(it):
1760-
yield i * n + k, v
1761+
yield v, i * n + k
17611762

17621763
return self.mapPartitionsWithIndex(func)
17631764

0 commit comments

Comments
 (0)