-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-3885] Provide mechanism to remove accumulators once they are no longer used #4021
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a77d11b
cbb9023
c49066a
3350852
0746e61
d78f4bf
b820ab4
28f705c
9a81928
18d62ec
7414a9c
8722b63
94ce754
c8e0f2b
7485a82
345fd4f
283a333
bb76ef0
8510943
4ba9575
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ import java.lang.ThreadLocal | |
|
|
||
| import scala.collection.generic.Growable | ||
| import scala.collection.mutable.Map | ||
| import scala.ref.WeakReference | ||
| import scala.reflect.ClassTag | ||
|
|
||
| import org.apache.spark.serializer.JavaSerializer | ||
|
|
@@ -280,10 +281,12 @@ object AccumulatorParam { | |
| // TODO: The multi-thread support in accumulators is kind of lame; check | ||
| // if there's a more intuitive way of doing it right | ||
| private[spark] object Accumulators { | ||
| // TODO: Use soft references? => need to make readObject work properly then | ||
| val originals = Map[Long, Accumulable[_, _]]() | ||
| val localAccums = new ThreadLocal[Map[Long, Accumulable[_, _]]]() { | ||
| override protected def initialValue() = Map[Long, Accumulable[_, _]]() | ||
| // Store a WeakReference instead of a StrongReference because this way accumulators can be | ||
| // appropriately garbage collected during long-running jobs and release memory | ||
| type WeakAcc = WeakReference[Accumulable[_, _]] | ||
| val originals = Map[Long, WeakAcc]() | ||
| val localAccums = new ThreadLocal[Map[Long, WeakAcc]]() { | ||
| override protected def initialValue() = Map[Long, WeakAcc]() | ||
| } | ||
| var lastId: Long = 0 | ||
|
|
||
|
|
@@ -294,9 +297,9 @@ private[spark] object Accumulators { | |
|
|
||
| def register(a: Accumulable[_, _], original: Boolean): Unit = synchronized { | ||
| if (original) { | ||
| originals(a.id) = a | ||
| originals(a.id) = new WeakAcc(a) | ||
| } else { | ||
| localAccums.get()(a.id) = a | ||
| localAccums.get()(a.id) = new WeakAcc(a) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -307,11 +310,22 @@ private[spark] object Accumulators { | |
| } | ||
| } | ||
|
|
||
| def remove(accId : Long) { | ||
| synchronized { | ||
| originals.remove(accId) | ||
| } | ||
| } | ||
|
|
||
| // Get the values of the local accumulators for the current thread (by ID) | ||
| def values: Map[Long, Any] = synchronized { | ||
| val ret = Map[Long, Any]() | ||
| for ((id, accum) <- localAccums.get) { | ||
| ret(id) = accum.localValue | ||
| // Since we are now storing weak references, we must check whether the underlying data | ||
| // is valid. | ||
| ret(id) = accum.get match { | ||
| case Some(values) => values.localValue | ||
| case None => None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was dumb for me to overlook, too: if the data structure is invalid, then this would just silently ignore it; even if this was a rare error-condition, there should have been a warning here. |
||
| } | ||
| } | ||
| return ret | ||
| } | ||
|
|
@@ -320,7 +334,13 @@ private[spark] object Accumulators { | |
| def add(values: Map[Long, Any]): Unit = synchronized { | ||
| for ((id, value) <- values) { | ||
| if (originals.contains(id)) { | ||
| originals(id).asInstanceOf[Accumulable[Any, Any]] ++= value | ||
| // Since we are now storing weak references, we must check whether the underlying data | ||
| // is valid. | ||
| originals(id).get match { | ||
| case Some(accum) => accum.asInstanceOf[Accumulable[Any, Any]] ++= value | ||
| case None => | ||
| throw new IllegalAccessError("Attempted to access garbage collected Accumulator.") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If Accumulator is garbage collected, should we log and continue ?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The exception thrown here is caught at higher levels of the stack. For example, DAGScheduler wraps calls to accumulator methods in a |
||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Guava MapMaper supports
weakValues; not sure if we want to use that here, since it's not super Scala-friendly (e.g. returns nulls, etc): http://docs.guava-libraries.googlecode.com/git-history/release/javadoc/com/google/common/collect/MapMaker.htmlThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi Josh - are you suggesting to replace this snippet with a MapMaker just to simplify the initialization code? I believe the usage of either object would be the same - do you see a specific advantage to trying to use the MapMaker?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's just leave this as-is; I don't think MapMaker will buy us much now that I think about it.