@@ -1758,16 +1758,13 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
17581758
17591759 /**
17601760 * Run a function on a given set of partitions in an RDD and pass the results to the given
1761- * handler function. This is the main entry point for all actions in Spark. The allowLocal
1762- * flag specifies whether the scheduler can run the computation on the driver rather than
1763- * shipping it out to the cluster, for short actions like first().
1761+ * handler function. This is the main entry point for all actions in Spark.
17641762 */
17651763 def runJob [T , U : ClassTag ](
17661764 rdd : RDD [T ],
17671765 func : (TaskContext , Iterator [T ]) => U ,
17681766 partitions : Seq [Int ],
1769- allowLocal : Boolean ,
1770- resultHandler : (Int , U ) => Unit ) {
1767+ resultHandler : (Int , U ) => Unit ): Unit = {
17711768 if (stopped.get()) {
17721769 throw new IllegalStateException (" SparkContext has been shutdown" )
17731770 }
@@ -1777,54 +1774,104 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
17771774 if (conf.getBoolean(" spark.logLineage" , false )) {
17781775 logInfo(" RDD's recursive dependencies:\n " + rdd.toDebugString)
17791776 }
1780- dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, allowLocal,
1781- resultHandler, localProperties.get)
1777+ dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, resultHandler, localProperties.get)
17821778 progressBar.foreach(_.finishAll())
17831779 rdd.doCheckpoint()
17841780 }
17851781
17861782 /**
1787- * Run a function on a given set of partitions in an RDD and return the results as an array. The
1788- * allowLocal flag specifies whether the scheduler can run the computation on the driver rather
1789- * than shipping it out to the cluster, for short actions like first().
1783+ * Run a function on a given set of partitions in an RDD and return the results as an array.
1784+ */
1785+ def runJob [T , U : ClassTag ](
1786+ rdd : RDD [T ],
1787+ func : (TaskContext , Iterator [T ]) => U ,
1788+ partitions : Seq [Int ]): Array [U ] = {
1789+ val results = new Array [U ](partitions.size)
1790+ runJob[T , U ](rdd, func, partitions, (index, res) => results(index) = res)
1791+ results
1792+ }
1793+
1794+ /**
1795+ * Run a job on a given set of partitions of an RDD, but take a function of type
1796+ * `Iterator[T] => U` instead of `(TaskContext, Iterator[T]) => U`.
1797+ */
1798+ def runJob [T , U : ClassTag ](
1799+ rdd : RDD [T ],
1800+ func : Iterator [T ] => U ,
1801+ partitions : Seq [Int ]): Array [U ] = {
1802+ val cleanedFunc = clean(func)
1803+ runJob(rdd, (ctx : TaskContext , it : Iterator [T ]) => cleanedFunc(it), partitions)
1804+ }
1805+
1806+
1807+ /**
1808+ * Run a function on a given set of partitions in an RDD and pass the results to the given
1809+ * handler function. This is the main entry point for all actions in Spark.
1810+ *
1811+ * The allowLocal flag is deprecated as of Spark 1.5.0+.
1812+ */
1813+ @ deprecated(" use the version of runJob without the allowLocal parameter" , " 1.5.0" )
1814+ def runJob [T , U : ClassTag ](
1815+ rdd : RDD [T ],
1816+ func : (TaskContext , Iterator [T ]) => U ,
1817+ partitions : Seq [Int ],
1818+ allowLocal : Boolean ,
1819+ resultHandler : (Int , U ) => Unit ): Unit = {
1820+ if (allowLocal) {
1821+ logWarning(" sc.runJob with allowLocal=true is deprecated in Spark 1.5.0+" )
1822+ }
1823+ runJob(rdd, func, partitions, resultHandler)
1824+ }
1825+
1826+ /**
1827+ * Run a function on a given set of partitions in an RDD and return the results as an array.
1828+ *
1829+ * The allowLocal flag is deprecated as of Spark 1.5.0+.
17901830 */
1831+ @ deprecated(" use the version of runJob without the allowLocal parameter" , " 1.5.0" )
17911832 def runJob [T , U : ClassTag ](
17921833 rdd : RDD [T ],
17931834 func : (TaskContext , Iterator [T ]) => U ,
17941835 partitions : Seq [Int ],
17951836 allowLocal : Boolean
17961837 ): Array [U ] = {
1797- val results = new Array [U ](partitions.size)
1798- runJob[T , U ](rdd, func, partitions, allowLocal, (index, res) => results(index) = res)
1799- results
1838+ if (allowLocal) {
1839+ logWarning(" sc.runJob with allowLocal=true is deprecated in Spark 1.5.0+" )
1840+ }
1841+ runJob(rdd, func, partitions)
18001842 }
18011843
18021844 /**
18031845 * Run a job on a given set of partitions of an RDD, but take a function of type
18041846 * `Iterator[T] => U` instead of `(TaskContext, Iterator[T]) => U`.
1847+ *
1848+ * The allowLocal argument is deprecated as of Spark 1.5.0+.
18051849 */
1850+ @ deprecated(" use the version of runJob without the allowLocal parameter" , " 1.5.0" )
18061851 def runJob [T , U : ClassTag ](
18071852 rdd : RDD [T ],
18081853 func : Iterator [T ] => U ,
18091854 partitions : Seq [Int ],
18101855 allowLocal : Boolean
18111856 ): Array [U ] = {
1812- val cleanedFunc = clean(func)
1813- runJob(rdd, (ctx : TaskContext , it : Iterator [T ]) => cleanedFunc(it), partitions, allowLocal)
1857+ if (allowLocal) {
1858+ logWarning(" sc.runJob with allowLocal=true is deprecated in Spark 1.5.0+" )
1859+ }
1860+ runJob(rdd, func, partitions)
18141861 }
18151862
18161863 /**
18171864 * Run a job on all partitions in an RDD and return the results in an array.
18181865 */
18191866 def runJob [T , U : ClassTag ](rdd : RDD [T ], func : (TaskContext , Iterator [T ]) => U ): Array [U ] = {
1820- runJob(rdd, func, 0 until rdd.partitions.size, false )
1867+ runJob(rdd, func, 0 until rdd.partitions.length )
18211868 }
18221869
18231870 /**
18241871 * Run a job on all partitions in an RDD and return the results in an array.
18251872 */
18261873 def runJob [T , U : ClassTag ](rdd : RDD [T ], func : Iterator [T ] => U ): Array [U ] = {
1827- runJob(rdd, func, 0 until rdd.partitions.size, false )
1874+ runJob(rdd, func, 0 until rdd.partitions.length )
18281875 }
18291876
18301877 /**
@@ -1835,7 +1882,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
18351882 processPartition : (TaskContext , Iterator [T ]) => U ,
18361883 resultHandler : (Int , U ) => Unit )
18371884 {
1838- runJob[T , U ](rdd, processPartition, 0 until rdd.partitions.size, false , resultHandler)
1885+ runJob[T , U ](rdd, processPartition, 0 until rdd.partitions.length , resultHandler)
18391886 }
18401887
18411888 /**
@@ -1847,7 +1894,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
18471894 resultHandler : (Int , U ) => Unit )
18481895 {
18491896 val processFunc = (context : TaskContext , iter : Iterator [T ]) => processPartition(iter)
1850- runJob[T , U ](rdd, processFunc, 0 until rdd.partitions.size, false , resultHandler)
1897+ runJob[T , U ](rdd, processFunc, 0 until rdd.partitions.length , resultHandler)
18511898 }
18521899
18531900 /**
@@ -1892,7 +1939,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
18921939 (context : TaskContext , iter : Iterator [T ]) => cleanF(iter),
18931940 partitions,
18941941 callSite,
1895- allowLocal = false ,
18961942 resultHandler,
18971943 localProperties.get)
18981944 new SimpleFutureAction (waiter, resultFunc)
0 commit comments