1818package org .apache .spark .util
1919
2020import java .util .concurrent .ConcurrentHashMap
21- import scala .collection .JavaConversions
22- import scala .collection .mutable .Map
23- import scala .collection .immutable
24- import org .apache .spark .scheduler .MapStatus
21+
2522import org .apache .spark .Logging
2623
24+ private [util] case class TimeStampedValue [T ](timestamp : Long , value : T )
25+
2726/**
28- * This is a custom implementation of scala.collection.mutable.Map which stores the insertion
29- * timestamp along with each key-value pair. If specified, the timestamp of each pair can be
30- * updated every time it is accessed. Key-value pairs whose timestamp are older than a particular
31- * threshold time can then be removed using the clearOldValues method. This is intended to
32- * be a drop-in replacement of scala.collection.mutable.HashMap.
27+ * A map that stores the timestamp of when a key was inserted along with the value. If specified,
28+ * the timestamp of each pair can be updated every time it is accessed.
29+ * Key-value pairs whose timestamps are older than a particular
30+ * threshold time can then be removed using the clearOldValues method. It exposes a
31+ * scala.collection.mutable.Map interface to allow it to be a drop-in replacement for Scala
32+ * HashMaps.
33+ *
34+ * Internally, it uses a Java ConcurrentHashMap, so all operations on this HashMap are thread-safe.
35+ *
3336 * @param updateTimeStampOnGet When enabled, the timestamp of a pair will be
3437 * updated when it is accessed
3538 */
36- class TimeStampedHashMap [A , B ](updateTimeStampOnGet : Boolean = false )
37- extends Map [A , B ]() with Logging {
38- val internalMap = new ConcurrentHashMap [A , (B , Long )]()
39-
40- def get (key : A ): Option [B ] = {
41- val value = internalMap.get(key)
42- if (value != null && updateTimeStampOnGet) {
43- internalMap.replace(key, value, (value._1, currentTime))
44- }
45- Option (value).map(_._1)
46- }
47-
48- def iterator : Iterator [(A , B )] = {
49- val jIterator = internalMap.entrySet().iterator()
50- JavaConversions .asScalaIterator(jIterator).map(kv => (kv.getKey, kv.getValue._1))
51- }
52-
53- override def + [B1 >: B ](kv : (A , B1 )): Map [A , B1 ] = {
54- val newMap = new TimeStampedHashMap [A , B1 ]
55- newMap.internalMap.putAll(this .internalMap)
56- newMap.internalMap.put(kv._1, (kv._2, currentTime))
57- newMap
58- }
59-
60- override def - (key : A ): Map [A , B ] = {
61- val newMap = new TimeStampedHashMap [A , B ]
62- newMap.internalMap.putAll(this .internalMap)
63- newMap.internalMap.remove(key)
64- newMap
65- }
39+ private [spark] class TimeStampedHashMap [A , B ](updateTimeStampOnGet : Boolean = false )
40+ extends WrappedJavaHashMap [A , B , A , TimeStampedValue [B ]] with Logging {
6641
67- override def += (kv : (A , B )): this .type = {
68- internalMap.put(kv._1, (kv._2, currentTime))
69- this
70- }
42+ protected [util] val internalJavaMap = new ConcurrentHashMap [A , TimeStampedValue [B ]]()
7143
72- // Should we return previous value directly or as Option ?
73- def putIfAbsent (key : A , value : B ): Option [B ] = {
74- val prev = internalMap.putIfAbsent(key, (value, currentTime))
75- if (prev != null ) Some (prev._1) else None
44+ protected [util] def newInstance [K1 , V1 ](): WrappedJavaHashMap [K1 , V1 , _, _] = {
45+ new TimeStampedHashMap [K1 , V1 ]()
7646 }
7747
48+ def internalMap = internalJavaMap
7849
79- override def -= (key : A ): this .type = {
80- internalMap.remove(key)
81- this
82- }
83-
84- override def update (key : A , value : B ) {
85- this += ((key, value))
50+ override def get (key : A ): Option [B ] = {
51+ val timeStampedValue = internalMap.get(key)
52+ if (updateTimeStampOnGet && timeStampedValue != null ) {
53+ internalJavaMap.replace(key, timeStampedValue, TimeStampedValue (currentTime, timeStampedValue.value))
54+ }
55+ Option (timeStampedValue).map(_.value)
8656 }
87-
88- override def apply (key : A ): B = {
89- val value = internalMap.get(key)
90- if (value == null ) throw new NoSuchElementException ()
91- value._1
57+ @ inline override protected def externalValueToInternalValue (v : B ): TimeStampedValue [B ] = {
58+ new TimeStampedValue (currentTime, v)
9259 }
9360
94- override def filter ( p : (( A , B )) => Boolean ) : Map [ A , B ] = {
95- JavaConversions .mapAsScalaConcurrentMap(internalMap).map(kv => (kv._1, kv._2._1)).filter(p)
61+ @ inline override protected def internalValueToExternalValue ( iv : TimeStampedValue [ B ]) : B = {
62+ iv.value
9663 }
9764
98- override def empty : Map [A , B ] = new TimeStampedHashMap [A , B ]()
99-
100- override def size : Int = internalMap.size
101-
102- override def foreach [U ](f : ((A , B )) => U ) {
103- val iterator = internalMap.entrySet().iterator()
104- while (iterator.hasNext) {
105- val entry = iterator.next()
106- val kv = (entry.getKey, entry.getValue._1)
107- f(kv)
108- }
65+ /** Atomically put if a key is absent. This exposes the existing API of ConcurrentHashMap. */
66+ def putIfAbsent (key : A , value : B ): Option [B ] = {
67+ val prev = internalJavaMap.putIfAbsent(key, TimeStampedValue (currentTime, value))
68+ Option (prev).map(_.value)
10969 }
11070
111- def toMap : immutable.Map [A , B ] = iterator.toMap
112-
11371 /**
11472 * Removes old key-value pairs that have timestamp earlier than `threshTime`,
11573 * calling the supplied function on each such entry before removing.
11674 */
11775 def clearOldValues (threshTime : Long , f : (A , B ) => Unit ) {
118- val iterator = internalMap .entrySet().iterator()
76+ val iterator = internalJavaMap .entrySet().iterator()
11977 while (iterator.hasNext) {
12078 val entry = iterator.next()
121- if (entry.getValue._2 < threshTime) {
122- f(entry.getKey, entry.getValue._1 )
79+ if (entry.getValue.timestamp < threshTime) {
80+ f(entry.getKey, entry.getValue.value )
12381 logDebug(" Removing key " + entry.getKey)
12482 iterator.remove()
12583 }
@@ -134,5 +92,4 @@ class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = false)
13492 }
13593
13694 private def currentTime : Long = System .currentTimeMillis()
137-
13895}
0 commit comments