@@ -34,6 +34,7 @@ import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, Job => NewHad
3434import org .apache .hadoop .mapreduce .lib .input .{FileInputFormat => NewFileInputFormat }
3535import org .apache .mesos .MesosNativeLibrary
3636
37+ import org .apache .spark .annotation .{DeveloperApi , Experimental }
3738import org .apache .spark .broadcast .Broadcast
3839import org .apache .spark .deploy .{LocalSparkCluster , SparkHadoopUtil }
3940import org .apache .spark .input .WholeTextFileInputFormat
@@ -48,22 +49,35 @@ import org.apache.spark.ui.SparkUI
4849import org .apache .spark .util .{ClosureCleaner , MetadataCleaner , MetadataCleanerType , TimeStampedWeakValueHashMap , Utils }
4950
5051/**
52+ * :: DeveloperApi ::
5153 * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
5254 * cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
5355 *
5456 * @param config a Spark Config object describing the application configuration. Any settings in
5557 * this config overrides the default configs as well as system properties.
56- * @param preferredNodeLocationData used in YARN mode to select nodes to launch containers on. Can
57- * be generated using [[org.apache.spark.scheduler.InputFormatInfo.computePreferredLocations ]]
58- * from a list of input files or InputFormats for the application.
5958 */
60- class SparkContext (
61- config : SparkConf ,
62- // This is used only by YARN for now, but should be relevant to other cluster types (Mesos,
63- // etc) too. This is typically generated from InputFormatInfo.computePreferredLocations. It
64- // contains a map from hostname to a list of input format splits on the host.
65- val preferredNodeLocationData : Map [String , Set [SplitInfo ]] = Map ())
66- extends Logging {
59+
60+ @ DeveloperApi
61+ class SparkContext (config : SparkConf ) extends Logging {
62+
63+ // This is used only by YARN for now, but should be relevant to other cluster types (Mesos,
64+ // etc) too. This is typically generated from InputFormatInfo.computePreferredLocations. It
65+ // contains a map from hostname to a list of input format splits on the host.
66+ private [spark] var preferredNodeLocationData : Map [String , Set [SplitInfo ]] = Map ()
67+
68+ /**
69+ * :: DeveloperApi ::
70+ * Alternative constructor for setting preferred locations where Spark will create executors.
71+ *
72+ * @param preferredNodeLocationData used in YARN mode to select nodes to launch containers on. Ca
73+ * be generated using [[org.apache.spark.scheduler.InputFormatInfo.computePreferredLocations ]]
74+ * from a list of input files or InputFormats for the application.
75+ */
76+ @ DeveloperApi
77+ def this (config : SparkConf , preferredNodeLocationData : Map [String , Set [SplitInfo ]]) = {
78+ this (config)
79+ this .preferredNodeLocationData = preferredNodeLocationData
80+ }
6781
6882 /**
6983 * Alternative constructor that allows setting common Spark properties directly
@@ -93,10 +107,45 @@ class SparkContext(
93107 environment : Map [String , String ] = Map (),
94108 preferredNodeLocationData : Map [String , Set [SplitInfo ]] = Map ()) =
95109 {
96- this (SparkContext .updatedConf(new SparkConf (), master, appName, sparkHome, jars, environment),
97- preferredNodeLocationData)
110+ this (SparkContext .updatedConf(new SparkConf (), master, appName, sparkHome, jars, environment))
111+ this .preferredNodeLocationData = preferredNodeLocationData
98112 }
99113
114+ // NOTE: The below constructors could be consolidated using default arguments. Due to
115+ // Scala bug SI-8479, however, this causes the compile step to fail when generating docs.
116+ // Until we have a good workaround for that bug the constructors remain broken out.
117+
118+ /**
119+ * Alternative constructor that allows setting common Spark properties directly
120+ *
121+ * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
122+ * @param appName A name for your application, to display on the cluster web UI.
123+ */
124+ private [spark] def this (master : String , appName : String ) =
125+ this (master, appName, null , Nil , Map (), Map ())
126+
127+ /**
128+ * Alternative constructor that allows setting common Spark properties directly
129+ *
130+ * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
131+ * @param appName A name for your application, to display on the cluster web UI.
132+ * @param sparkHome Location where Spark is installed on cluster nodes.
133+ */
134+ private [spark] def this (master : String , appName : String , sparkHome : String ) =
135+ this (master, appName, sparkHome, Nil , Map (), Map ())
136+
137+ /**
138+ * Alternative constructor that allows setting common Spark properties directly
139+ *
140+ * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
141+ * @param appName A name for your application, to display on the cluster web UI.
142+ * @param sparkHome Location where Spark is installed on cluster nodes.
143+ * @param jars Collection of JARs to send to the cluster. These can be paths on the local file
144+ * system or HDFS, HTTP, HTTPS, or FTP URLs.
145+ */
146+ private [spark] def this (master : String , appName : String , sparkHome : String , jars : Seq [String ]) =
147+ this (master, appName, sparkHome, jars, Map (), Map ())
148+
100149 private [spark] val conf = config.clone()
101150
102151 /**
@@ -186,7 +235,7 @@ class SparkContext(
186235 jars.foreach(addJar)
187236 }
188237
189- def warnSparkMem (value : String ): String = {
238+ private def warnSparkMem (value : String ): String = {
190239 logWarning(" Using SPARK_MEM to set amount of memory to use per executor process is " +
191240 " deprecated, please use spark.executor.memory instead." )
192241 value
@@ -651,6 +700,9 @@ class SparkContext(
651700 def union [T : ClassTag ](first : RDD [T ], rest : RDD [T ]* ): RDD [T ] =
652701 new UnionRDD (this , Seq (first) ++ rest)
653702
703+ /** Get an RDD that has no partitions or elements. */
704+ def emptyRDD [T : ClassTag ] = new EmptyRDD [T ](this )
705+
654706 // Methods for creating shared variables
655707
656708 /**
@@ -714,6 +766,11 @@ class SparkContext(
714766 postEnvironmentUpdate()
715767 }
716768
769+ /**
770+ * :: DeveloperApi ::
771+ * Register a listener to receive up-calls from events that happen during execution.
772+ */
773+ @ DeveloperApi
717774 def addSparkListener (listener : SparkListener ) {
718775 listenerBus.addListener(listener)
719776 }
@@ -1020,8 +1077,10 @@ class SparkContext(
10201077 }
10211078
10221079 /**
1080+ * :: DeveloperApi ::
10231081 * Run a job that can return approximate results.
10241082 */
1083+ @ DeveloperApi
10251084 def runApproximateJob [T , U , R ](
10261085 rdd : RDD [T ],
10271086 func : (TaskContext , Iterator [T ]) => U ,
@@ -1039,6 +1098,7 @@ class SparkContext(
10391098 /**
10401099 * Submit a job for execution and return a FutureJob holding the result.
10411100 */
1101+ @ Experimental
10421102 def submitJob [T , U , R ](
10431103 rdd : RDD [T ],
10441104 processPartition : Iterator [T ] => U ,
0 commit comments