-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-2774] Set preferred locations for reduce tasks #6652
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3b464b7
34d0283
774751b
bc4dfd6
0171d3c
5093aea
df14cee
ad7cb53
e7d5449
0df3180
8e31266
9d5831a
6cfae98
e5d56bd
77ce7d8
1090b58
68bc29e
f5be578
897a914
2ef2d39
492e25e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -137,6 +137,22 @@ class DAGScheduler( | |
| private[scheduler] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this) | ||
| taskScheduler.setDAGScheduler(this) | ||
|
|
||
| // Flag to control if reduce tasks are assigned preferred locations | ||
| private val shuffleLocalityEnabled = | ||
| sc.getConf.getBoolean("spark.shuffle.reduceLocality.enabled", true) | ||
| // Number of map, reduce tasks above which we do not assign preferred locations | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a comment here saying that we limit the size because of scalability issues with sorting the best locations?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| // based on map output sizes. We limit the size of jobs for which assign preferred locations | ||
| // as computing the top locations by size becomes expensive. | ||
| private[this] val SHUFFLE_PREF_MAP_THRESHOLD = 1000 | ||
| // NOTE: This should be less than 2000 as we use HighlyCompressedMapStatus beyond that | ||
| private[this] val SHUFFLE_PREF_REDUCE_THRESHOLD = 1000 | ||
|
|
||
| // Fraction of total map output that must be at a location for it to considered as a preferred | ||
| // location for a reduce task. | ||
| // Making this larger will focus on fewer locations where most data can be read locally, but | ||
| // may lead to more delay in scheduling if those locations are busy. | ||
| private[scheduler] val REDUCER_PREF_LOCS_FRACTION = 0.2 | ||
|
|
||
| // Called by TaskScheduler to report task's starting. | ||
| def taskStarted(task: Task[_], taskInfo: TaskInfo) { | ||
| eventProcessLoop.post(BeginEvent(task, taskInfo)) | ||
|
|
@@ -1384,17 +1400,32 @@ class DAGScheduler( | |
| if (rddPrefs.nonEmpty) { | ||
| return rddPrefs.map(TaskLocation(_)) | ||
| } | ||
| // If the RDD has narrow dependencies, pick the first partition of the first narrow dep | ||
| // that has any placement preferences. Ideally we would choose based on transfer sizes, | ||
| // but this will do for now. | ||
|
|
||
| rdd.dependencies.foreach { | ||
| case n: NarrowDependency[_] => | ||
| // If the RDD has narrow dependencies, pick the first partition of the first narrow dep | ||
| // that has any placement preferences. Ideally we would choose based on transfer sizes, | ||
| // but this will do for now. | ||
| for (inPart <- n.getParents(partition)) { | ||
| val locs = getPreferredLocsInternal(n.rdd, inPart, visited) | ||
| if (locs != Nil) { | ||
| return locs | ||
| } | ||
| } | ||
| case s: ShuffleDependency[_, _, _] => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a high-level comment here explaining what this is doing? Something like "For reduce tasks, return the 5 locations with the largest map outputs as preferred locations"
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added -- Also I moved the comment for narrow dependencies close to that. |
||
| // For shuffle dependencies, pick locations which have at least REDUCER_PREF_LOCS_FRACTION | ||
| // of data as preferred locations | ||
| if (shuffleLocalityEnabled && | ||
| rdd.partitions.size < SHUFFLE_PREF_REDUCE_THRESHOLD && | ||
| s.rdd.partitions.size < SHUFFLE_PREF_MAP_THRESHOLD) { | ||
| // Get the preferred map output locations for this reducer | ||
| val topLocsForReducer = mapOutputTracker.getLocationsWithLargestOutputs(s.shuffleId, | ||
| partition, rdd.partitions.size, REDUCER_PREF_LOCS_FRACTION) | ||
| if (topLocsForReducer.nonEmpty) { | ||
| return topLocsForReducer.get.map(loc => TaskLocation(loc.host, loc.executorId)) | ||
| } | ||
| } | ||
|
|
||
| case _ => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can delete this pattern now, it shouldn't ever occur. incidentally, this also suggests that It seems that code will break if they are ever given a |
||
| } | ||
| Nil | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -205,4 +205,39 @@ class MapOutputTrackerSuite extends SparkFunSuite { | |
| // masterTracker.stop() // this throws an exception | ||
| rpcEnv.shutdown() | ||
| } | ||
|
|
||
| test("getLocationsWithLargestOutputs with multiple outputs in same machine") { | ||
| val rpcEnv = createRpcEnv("test") | ||
| val tracker = new MapOutputTrackerMaster(conf) | ||
| tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, | ||
| new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf)) | ||
| // Setup 3 map tasks | ||
| // on hostA with output size 2 | ||
| // on hostA with output size 2 | ||
| // on hostB with output size 3 | ||
| tracker.registerShuffle(10, 3) | ||
| tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000), | ||
| Array(2L))) | ||
| tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("a", "hostA", 1000), | ||
| Array(2L))) | ||
| tracker.registerMapOutput(10, 2, MapStatus(BlockManagerId("b", "hostB", 1000), | ||
| Array(3L))) | ||
|
|
||
| // When the threshold is 50%, only host A should be returned as a preferred location | ||
| // as it has 4 out of 7 bytes of output. | ||
| val topLocs50 = tracker.getLocationsWithLargestOutputs(10, 0, 1, 0.5) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add comments to this test -- just one here saying "When the threshold is 50%, only host A should be returned a preferred location" and then below, "When the threshold is only 20%, both hosts should be returned"
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| assert(topLocs50.nonEmpty) | ||
| assert(topLocs50.get.size === 1) | ||
| assert(topLocs50.get.head === BlockManagerId("a", "hostA", 1000)) | ||
|
|
||
| // When the threshold is 20%, both hosts should be returned as preferred locations. | ||
| val topLocs20 = tracker.getLocationsWithLargestOutputs(10, 0, 1, 0.2) | ||
| assert(topLocs20.nonEmpty) | ||
| assert(topLocs20.get.size === 2) | ||
| assert(topLocs20.get.toSet === | ||
| Seq(BlockManagerId("a", "hostA", 1000), BlockManagerId("b", "hostB", 1000)).toSet) | ||
|
|
||
| tracker.stop() | ||
| rpcEnv.shutdown() | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you save statuses(mapIdx) here and then re-use it in the 3 places below? I just find it a little harder to read with the continued reference into the array
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good idea. Done