-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-24005][CORE] Remove usage of Scala’s parallel collection #21913
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ac26dea
506fa93
515d393
e3df464
3601550
c711d80
592b5a7
a863bd4
ad03004
72cdfeb
6a5f2ae
701901c
cf48d8d
610154c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,8 +19,12 @@ package org.apache.spark.util | |
|
|
||
| import java.util.concurrent._ | ||
|
|
||
| import scala.collection.TraversableLike | ||
| import scala.collection.generic.CanBuildFrom | ||
| import scala.language.higherKinds | ||
|
|
||
| import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder} | ||
| import scala.concurrent.{Awaitable, ExecutionContext, ExecutionContextExecutor} | ||
| import scala.concurrent.{Awaitable, ExecutionContext, ExecutionContextExecutor, Future} | ||
| import scala.concurrent.duration.{Duration, FiniteDuration} | ||
| import scala.concurrent.forkjoin.{ForkJoinPool => SForkJoinPool, ForkJoinWorkerThread => SForkJoinWorkerThread} | ||
| import scala.util.control.NonFatal | ||
|
|
@@ -254,4 +258,62 @@ private[spark] object ThreadUtils { | |
| executor.shutdownNow() | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Transforms input collection by applying the given function to each element in parallel fashion. | ||
| * Comparing to the map() method of Scala parallel collections, this method can be interrupted | ||
| * at any time. This is useful on canceling of task execution, for example. | ||
| * | ||
| * @param in - the input collection which should be transformed in parallel. | ||
| * @param prefix - the prefix assigned to the underlying thread pool. | ||
| * @param maxThreads - maximum number of thread can be created during execution. | ||
| * @param f - the lambda function will be applied to each element of `in`. | ||
| * @tparam I - the type of elements in the input collection. | ||
| * @tparam O - the type of elements in resulted collection. | ||
| * @return new collection in which each element was given from the input collection `in` by | ||
| * applying the lambda function `f`. | ||
| */ | ||
| def parmap[I, O, Col[X] <: TraversableLike[X, Col[X]]] | ||
| (in: Col[I], prefix: String, maxThreads: Int) | ||
| (f: I => O) | ||
| (implicit | ||
| cbf: CanBuildFrom[Col[I], Future[O], Col[Future[O]]], // For in.map | ||
| cbf2: CanBuildFrom[Col[Future[O]], O, Col[O]] // for Future.sequence | ||
| ): Col[O] = { | ||
| val pool = newForkJoinPool(prefix, maxThreads) | ||
| try { | ||
| implicit val ec = ExecutionContext.fromExecutor(pool) | ||
|
|
||
| parmap(in)(f) | ||
| } finally { | ||
| pool.shutdownNow() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ConeyLiu this line interrupts the tasks in the thread pool. Scala
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zsxwing, thanks very much for your answer. |
||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Transforms input collection by applying the given function to each element in parallel fashion. | ||
| * Comparing to the map() method of Scala parallel collections, this method can be interrupted | ||
| * at any time. This is useful on canceling of task execution, for example. | ||
| * | ||
| * @param in - the input collection which should be transformed in parallel. | ||
| * @param f - the lambda function will be applied to each element of `in`. | ||
| * @param ec - an execution context for parallel applying of the given function `f`. | ||
| * @tparam I - the type of elements in the input collection. | ||
| * @tparam O - the type of elements in resulted collection. | ||
| * @return new collection in which each element was given from the input collection `in` by | ||
| * applying the lambda function `f`. | ||
| */ | ||
| def parmap[I, O, Col[X] <: TraversableLike[X, Col[X]]] | ||
| (in: Col[I]) | ||
| (f: I => O) | ||
| (implicit | ||
| cbf: CanBuildFrom[Col[I], Future[O], Col[Future[O]]], // For in.map | ||
| cbf2: CanBuildFrom[Col[Future[O]], O, Col[O]], // for Future.sequence | ||
| ec: ExecutionContext | ||
| ): Col[O] = { | ||
| val futures = in.map(x => Future(f(x))) | ||
| val futureSeq = Future.sequence(futures) | ||
|
|
||
| awaitResult(futureSeq, Duration.Inf) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -133,4 +133,37 @@ class ThreadUtilsSuite extends SparkFunSuite { | |
| "stack trace contains unexpected references to ThreadUtils" | ||
| ) | ||
| } | ||
|
|
||
| test("parmap should be interruptible") { | ||
| val t = new Thread() { | ||
| setDaemon(true) | ||
|
|
||
| override def run() { | ||
| try { | ||
| // "par" is uninterruptible. The following will keep running even if the thread is | ||
| // interrupted. We should prefer to use "ThreadUtils.parmap". | ||
| // | ||
| // (1 to 10).par.flatMap { i => | ||
| // Thread.sleep(100000) | ||
| // 1 to i | ||
| // } | ||
| // | ||
| ThreadUtils.parmap(1 to 10, "test", 2) { i => | ||
| Thread.sleep(100000) | ||
| 1 to i | ||
| }.flatten | ||
| } catch { | ||
| case _: InterruptedException => // excepted | ||
| } | ||
| } | ||
| } | ||
| t.start() | ||
| eventually(timeout(10.seconds)) { | ||
| assert(t.isAlive) | ||
| } | ||
| t.interrupt() | ||
| eventually(timeout(10.seconds)) { | ||
| assert(!t.isAlive) | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd still include a note in these docs about what this does differently from
.par. Just a sentence about it being interruptible.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added a comment about this.