-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-8857][SPARK-8859][Core]Add an internal flag to Accumulable and send internal accumulator updates to the driver via heartbeats #7448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -152,4 +152,10 @@ abstract class TaskContext extends Serializable { | |
| * Returns the manager for this task's managed memory. | ||
| */ | ||
| private[spark] def taskMemoryManager(): TaskMemoryManager | ||
|
|
||
| private[spark] def registerAccumulator(a: Accumulable[_, _]): Unit | ||
|
|
||
| private[spark] def collectInternalAccumulators(): Map[Long, Any] | ||
|
|
||
| private[spark] def collectAccumulators(): Map[Long, Any] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add scaladoc for these methods? in particular, for this one does it contain all accumulators, including internal ones?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also make sure you document what the semantics is for the key (long), value (any) |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,14 +45,16 @@ import org.apache.spark.util.Utils | |
| */ | ||
| private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) extends Serializable { | ||
|
|
||
| type AccumulatorUpdates = Map[Long, Any] | ||
|
|
||
| /** | ||
| * Called by [[Executor]] to run this task. | ||
| * | ||
| * @param taskAttemptId an identifier for this task attempt that is unique within a SparkContext. | ||
| * @param attemptNumber how many times this task has been attempted (0 for the first attempt) | ||
| * @return the result of the task | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. update the documentation here |
||
| */ | ||
| final def run(taskAttemptId: Long, attemptNumber: Int): T = { | ||
| final def run(taskAttemptId: Long, attemptNumber: Int): (T, AccumulatorUpdates) = { | ||
| context = new TaskContextImpl( | ||
| stageId = stageId, | ||
| partitionId = partitionId, | ||
|
|
@@ -62,12 +64,13 @@ private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) ex | |
| runningLocally = false) | ||
| TaskContext.setTaskContext(context) | ||
| context.taskMetrics.setHostname(Utils.localHostName()) | ||
| context.taskMetrics.setAccumulatorsUpdater(context.collectInternalAccumulators) | ||
| taskThread = Thread.currentThread() | ||
| if (_killed) { | ||
| kill(interruptThread = false) | ||
| } | ||
| try { | ||
| runTask(context) | ||
| (runTask(context), context.collectAccumulators()) | ||
| } finally { | ||
| context.markTaskCompleted() | ||
| TaskContext.unset() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add internal to the scaladoc param to explain what it is