Skip to content

Commit e3b8eab

Browse files
author
Davies Liu
committed
fix narrow dependency
1 parent 2a1857a commit e3b8eab

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

python/pyspark/rdd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1802,7 +1802,7 @@ def combine(iterator):
18021802
merger.mergeValues(iterator)
18031803
return merger.iteritems()
18041804

1805-
locally_combined = self.mapPartitions(combine)
1805+
locally_combined = self.mapPartitions(combine, preservesPartitioning=True)
18061806
shuffled = locally_combined.partitionBy(numPartitions)
18071807

18081808
def groupByKey(it):

python/pyspark/tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -795,9 +795,9 @@ def test_narrow_dependency_in_join(self):
795795
self.assertEqual(rdd.getNumPartitions() + 2, parted.union(rdd).getNumPartitions())
796796
self.assertEqual(rdd.getNumPartitions() + 2, rdd.union(parted).getNumPartitions())
797797

798-
self.sc.setJobGroup("test1", "test", True)
799798
tracker = self.sc.statusTracker()
800799

800+
self.sc.setJobGroup("test1", "test", True)
801801
d = sorted(parted.join(parted).collect())
802802
self.assertEqual(10, len(d))
803803
self.assertEqual((0, (0, 0)), d[0])

0 commit comments

Comments
 (0)