@@ -1727,8 +1727,8 @@ def zipWithIndex(self):
17271727 This method needs to trigger a spark job when this RDD contains
17281728 more than one partitions.
17291729
1730- >>> sc.parallelize(range(4), 2 ).zipWithIndex().collect()
1731- [(0 , 0), (1 , 1), (2 , 2), (3 , 3)]
1730+ >>> sc.parallelize(["a", "b", "c", "d"], 3 ).zipWithIndex().collect()
1731+ [('a' , 0), ('b' , 1), ('c' , 2), ('d' , 3)]
17321732 """
17331733 starts = [0 ]
17341734 if self .getNumPartitions () > 1 :
@@ -1737,7 +1737,8 @@ def zipWithIndex(self):
17371737 starts .append (starts [- 1 ] + nums [i ])
17381738
17391739 def func (k , it ):
1740- return enumerate (it , starts [k ])
1740+ for i , v in enumerate (it , starts [k ]):
1741+ yield v , i
17411742
17421743 return self .mapPartitionsWithIndex (func )
17431744
@@ -1750,14 +1751,14 @@ def zipWithUniqueId(self):
17501751 method won't trigger a spark job, which is different from
17511752 L{zipWithIndex}
17521753
1753- >>> sc.parallelize(range(4), 2 ).zipWithUniqueId().collect()
1754- [(0 , 0), (2 , 1), (1, 2), (3, 3 )]
1754+ >>> sc.parallelize(["a", "b", "c", "d", "e"], 3 ).zipWithUniqueId().collect()
1755+ [('a' , 0), ('b' , 1), ('c', 4), ('d', 2), ('e', 5 )]
17551756 """
17561757 n = self .getNumPartitions ()
17571758
17581759 def func (k , it ):
17591760 for i , v in enumerate (it ):
1760- yield i * n + k , v
1761+ yield v , i * n + k
17611762
17621763 return self .mapPartitionsWithIndex (func )
17631764
0 commit comments