@@ -21,6 +21,8 @@ import java.lang.management.ManagementFactory
2121import java .lang .reflect .{Field , Modifier }
2222import java .util .{IdentityHashMap , Random }
2323import java .util .concurrent .ConcurrentHashMap
24+ import org .apache .spark .annotation .DeveloperApi
25+
2426import scala .collection .mutable .ArrayBuffer
2527import scala .runtime .ScalaRunTime
2628
@@ -29,13 +31,27 @@ import org.apache.spark.util.collection.OpenHashSet
2931
3032
3133/**
34+ * :: DeveloperApi ::
3235 * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in
3336 * memory-aware caches.
3437 *
3538 * Based on the following JavaWorld article:
3639 * http://www.javaworld.com/javaworld/javaqa/2003-12/02-qa-1226-sizeof.html
3740 */
38- private [spark] object SizeEstimator extends Logging {
41+ @ DeveloperApi
42+ object SizeEstimator extends Logging {
43+
44+ /**
45+ * Estimate the number of bytes that the given object takes up on the JVM heap. The estimate
46+ * includes space taken up by objects referenced by the given object, their references, and so on
47+ * and so forth.
48+ *
49+ * This is useful for determining the amount of heap space a broadcast variable will occupy on
50+ * each executor or the amount of space each object will take when caching objects in
51+ * deserialized form. This is not the same as the serialized size of the object, which will
52+ * typically be much smaller.
53+ */
54+ def estimate (obj : AnyRef ): Long = estimate(obj, new IdentityHashMap [AnyRef , AnyRef ])
3955
4056 // Sizes of primitive types
4157 private val BYTE_SIZE = 1
@@ -161,8 +177,6 @@ private[spark] object SizeEstimator extends Logging {
161177 val shellSize : Long ,
162178 val pointerFields : List [Field ]) {}
163179
164- def estimate (obj : AnyRef ): Long = estimate(obj, new IdentityHashMap [AnyRef , AnyRef ])
165-
166180 private def estimate (obj : AnyRef , visited : IdentityHashMap [AnyRef , AnyRef ]): Long = {
167181 val state = new SearchState (visited)
168182 state.enqueue(obj)
0 commit comments