Skip to content

Commit f05fd17

Browse files
mateizmarkhamstra
authored andcommitted
Merge pull request alteryx#223 from rxin/transient
Mark partitioner, name, and generator field in RDD as @transient. As part of the effort to reduce serialized task size. (cherry picked from commit d6e5473) Signed-off-by: Patrick Wendell <[email protected]>
1 parent e29bcd7 commit f05fd17

File tree

1 file changed

+5
-5
lines changed
  • core/src/main/scala/org/apache/spark/rdd

1 file changed

+5
-5
lines changed

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ abstract class RDD[T: ClassManifest](
101101
protected def getPreferredLocations(split: Partition): Seq[String] = Nil
102102

103103
/** Optionally overridden by subclasses to specify how they are partitioned. */
104-
val partitioner: Option[Partitioner] = None
104+
@transient val partitioner: Option[Partitioner] = None
105105

106106
// =======================================================================
107107
// Methods and fields available on all RDDs
@@ -114,7 +114,7 @@ abstract class RDD[T: ClassManifest](
114114
val id: Int = sc.newRddId()
115115

116116
/** A friendly name for this RDD */
117-
var name: String = null
117+
@transient var name: String = null
118118

119119
/** Assign a name to this RDD */
120120
def setName(_name: String) = {
@@ -123,7 +123,7 @@ abstract class RDD[T: ClassManifest](
123123
}
124124

125125
/** User-defined generator of this RDD*/
126-
var generator = Utils.getCallSiteInfo.firstUserClass
126+
@transient var generator = Utils.getCallSiteInfo.firstUserClass
127127

128128
/** Reset generator*/
129129
def setGenerator(_generator: String) = {
@@ -925,7 +925,7 @@ abstract class RDD[T: ClassManifest](
925925
private var storageLevel: StorageLevel = StorageLevel.NONE
926926

927927
/** Record user function generating this RDD. */
928-
private[spark] val origin = Utils.formatSparkCallSite
928+
@transient private[spark] val origin = Utils.formatSparkCallSite
929929

930930
private[spark] def elementClassManifest: ClassManifest[T] = classManifest[T]
931931

@@ -940,7 +940,7 @@ abstract class RDD[T: ClassManifest](
940940
def context = sc
941941

942942
// Avoid handling doCheckpoint multiple times to prevent excessive recursion
943-
private var doCheckpointCalled = false
943+
@transient private var doCheckpointCalled = false
944944

945945
/**
946946
* Performs the checkpointing of this RDD by saving this. It is called by the DAGScheduler

0 commit comments

Comments
 (0)