Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions core/src/main/java/org/apache/spark/ExecutorPlugin.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,38 @@ default void init() {}
* will wait for the plugin to terminate before continuing its own shutdown.</p>
*/
default void shutdown() {}

/**
* Perform any action before the task is run.
*
* <p>This method is invoked from the same thread the task will be executed.
* Task-specific information can be accessed via {@link TaskContext#get}.</p>
*
* <p>Users should avoid expensive operations here, as this method will be called
* on every task, and doing something expensive can significantly slow down a job.
* It is not recommended for a user to call a remote service, for example.</p>
*
* <p>Exceptions thrown from this method do not propagate - they're caught,
* logged, and suppressed. Therefore exceptions when executing this method won't
* make the job fail.</p>
*/
default void onTaskStart() {}

/**
* Perform an action after tasks completes without exceptions.
*
* <p>As {@link #onTaskStart() onTaskStart} exceptions are suppressed, this method
* will still be invoked even if the corresponding {@link #onTaskStart} call for this
* task failed.</p>
*
* <p>Same warnings of {@link #onTaskStart() onTaskStart} apply here.</p>
*/
default void onTaskSucceeded() {}

/**
* Perform an action after tasks completes with exceptions.
*
* <p>Same warnings of {@link #onTaskStart() onTaskStart} apply here.</p>
*/
default void onTaskFailed(Throwable throwable) {}
}
6 changes: 4 additions & 2 deletions core/src/main/scala/org/apache/spark/executor/Executor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ private[spark] class Executor(
private[executor] def numRunningTasks: Int = runningTasks.size()

def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = {
val tr = new TaskRunner(context, taskDescription)
val tr = new TaskRunner(context, taskDescription, executorPlugins)
runningTasks.put(taskDescription.taskId, tr)
threadPool.execute(tr)
}
Expand Down Expand Up @@ -292,7 +292,8 @@ private[spark] class Executor(

class TaskRunner(
execBackend: ExecutorBackend,
private val taskDescription: TaskDescription)
private val taskDescription: TaskDescription,
private val executorPlugins: Seq[ExecutorPlugin])
extends Runnable {

val taskId = taskDescription.taskId
Expand Down Expand Up @@ -435,6 +436,7 @@ private[spark] class Executor(
val res = task.run(
taskAttemptId = taskId,
attemptNumber = taskDescription.attemptNumber,
executorPlugins = executorPlugins,
metricsSystem = env.metricsSystem)
threwException = false
res
Expand Down
54 changes: 50 additions & 4 deletions core/src/main/scala/org/apache/spark/scheduler/Task.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@ package org.apache.spark.scheduler
import java.nio.ByteBuffer
import java.util.Properties

import com.palantir.logsafe.UnsafeArg

import org.apache.spark._
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.internal.SafeLogging
import org.apache.spark.internal.config.APP_CALLER_CONTEXT
import org.apache.spark.memory.{MemoryMode, TaskMemoryManager}
import org.apache.spark.metrics.MetricsSystem
Expand Down Expand Up @@ -63,7 +66,7 @@ private[spark] abstract class Task[T](
val jobId: Option[Int] = None,
val appId: Option[String] = None,
val appAttemptId: Option[String] = None,
val isBarrier: Boolean = false) extends Serializable {
val isBarrier: Boolean = false) extends Serializable with SafeLogging {

@transient lazy val metrics: TaskMetrics =
SparkEnv.get.closureSerializer.newInstance().deserialize(ByteBuffer.wrap(serializedTaskMetrics))
Expand All @@ -72,13 +75,15 @@ private[spark] abstract class Task[T](
* Called by [[org.apache.spark.executor.Executor]] to run this task.
*
* @param taskAttemptId an identifier for this task attempt that is unique within a SparkContext.
* @param attemptNumber how many times this task has been attempted (0 for the first attempt)
* @param attemptNumber how many times this task has been attempted (0 for the first attempt).
* @param executorPlugins the plugins which will be notified of the run of this task.
* @return the result of the task along with updates of Accumulators.
*/
final def run(
taskAttemptId: Long,
attemptNumber: Int,
metricsSystem: MetricsSystem): T = {
metricsSystem: MetricsSystem,
executorPlugins: Seq[ExecutorPlugin]): T = {
SparkEnv.get.blockManager.registerTask(taskAttemptId)
// TODO SPARK-24874 Allow create BarrierTaskContext based on partitions, instead of whether
// the stage is barrier.
Expand Down Expand Up @@ -117,8 +122,12 @@ private[spark] abstract class Task[T](
Option(taskAttemptId),
Option(attemptNumber)).setCurrentContext()

sendTaskStartToPlugins(executorPlugins)

try {
runTask(context)
val taskResult = runTask(context)
sendTaskSucceededToPlugins(executorPlugins)
taskResult
} catch {
case e: Throwable =>
// Catch all errors; run task failure callbacks, and rethrow the exception.
Expand All @@ -129,6 +138,7 @@ private[spark] abstract class Task[T](
e.addSuppressed(t)
}
context.markTaskCompleted(Some(e))
sendTaskFailedToPlugins(executorPlugins, e)
throw e
} finally {
try {
Expand Down Expand Up @@ -159,6 +169,42 @@ private[spark] abstract class Task[T](
}
}

private def sendTaskStartToPlugins(executorPlugins: Seq[ExecutorPlugin]) {
executorPlugins.foreach { plugin =>
try {
plugin.onTaskStart()
} catch {
case e: Exception =>
safeLogWarning("Plugin onStart failed", e,
UnsafeArg.of("pluginName", plugin.getClass().getCanonicalName()))
}
}
}

private def sendTaskSucceededToPlugins(executorPlugins: Seq[ExecutorPlugin]) {
executorPlugins.foreach { plugin =>
try {
plugin.onTaskSucceeded()
} catch {
case e: Exception =>
safeLogWarning("Plugin onTaskSucceeded failed", e,
UnsafeArg.of("pluginName", plugin.getClass().getCanonicalName()))
}
}
}

private def sendTaskFailedToPlugins(executorPlugins: Seq[ExecutorPlugin], error: Throwable) {
executorPlugins.foreach { plugin =>
try {
plugin.onTaskFailed(error)
} catch {
case e: Exception =>
safeLogWarning("Plugin onTaskFailed failed", e,
UnsafeArg.of("pluginName", plugin.getClass().getCanonicalName()))
}
}
}

private var taskMemoryManager: TaskMemoryManager = _

def setTaskMemoryManager(taskMemoryManager: TaskMemoryManager): Unit = {
Expand Down
143 changes: 143 additions & 0 deletions core/src/test/java/org/apache/spark/ExecutorPluginTaskSuite.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark;

import com.google.common.collect.ImmutableList;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.*;

public class ExecutorPluginTaskSuite {
private static final String EXECUTOR_PLUGIN_CONF_NAME = "spark.executor.plugins";
private static final String taskWellBehavedPluginName = TestWellBehavedPlugin.class.getName();
private static final String taskBadlyBehavedPluginName = TestBadlyBehavedPlugin.class.getName();

// Static value modified by testing plugins to ensure plugins are called correctly.
public static int numOnTaskStart = 0;
public static int numOnTaskSucceeded = 0;
public static int numOnTaskFailed = 0;
Comment on lines +34 to +37
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies, not following. Why is non-static insufficient to ensure that?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, I was following the pattern of https://github.com/palantir/spark/blob/master/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java#L35. I think this is a preference thing, and I particularly would rather have these be variables in each plugin. Will refactor such this is the case.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, we need a static variable because we don't have access to the instances of the plugin being instantiated here (plugins are instantiated on the spark executors). The way upstream does now is a bit more verbose, but still relies on static variables... this is a bit simpler, so will leave like this.


private JavaSparkContext sc;

@Before
public void setUp() {
sc = null;
numOnTaskStart = 0;
numOnTaskSucceeded = 0;
numOnTaskFailed = 0;
}

@After
public void tearDown() {
if (sc != null) {
sc.stop();
sc = null;
}
}

private SparkConf initializeSparkConf(String pluginNames) {
return new SparkConf()
.setMaster("local")
.setAppName("test")
.set(EXECUTOR_PLUGIN_CONF_NAME, pluginNames);
}

@Test
public void testWellBehavedPlugin() {
SparkConf conf = initializeSparkConf(taskWellBehavedPluginName);

sc = new JavaSparkContext(conf);
JavaRDD<Integer> rdd = sc.parallelize(ImmutableList.of(1, 2));
rdd.filter(value -> value.equals(1)).collect();

assertEquals(numOnTaskStart, 1);
assertEquals(numOnTaskSucceeded, 1);
assertEquals(numOnTaskFailed, 0);
}

@Test
public void testBadlyBehavedPluginDoesNotAffectWellBehavedPlugin() {
SparkConf conf = initializeSparkConf(
taskWellBehavedPluginName + "," + taskBadlyBehavedPluginName);

sc = new JavaSparkContext(conf);
JavaRDD<Integer> rdd = sc.parallelize(ImmutableList.of(1, 2));
rdd.filter(value -> value.equals(1)).collect();

assertEquals(numOnTaskStart, 1);
assertEquals(numOnTaskSucceeded, 2);
assertEquals(numOnTaskFailed, 0);
}

@Test
public void testTaskWhichFails() {
SparkConf conf = initializeSparkConf(taskWellBehavedPluginName);

sc = new JavaSparkContext(conf);
JavaRDD<Integer> rdd = sc.parallelize(ImmutableList.of(1, 2));
try {
rdd.foreach(integer -> {
throw new RuntimeException();
});
} catch (Exception e) {
// ignore exception
}

assertEquals(numOnTaskStart, 1);
assertEquals(numOnTaskSucceeded, 0);
assertEquals(numOnTaskFailed, 1);
}

public static class TestWellBehavedPlugin implements ExecutorPlugin {
@Override
public void onTaskStart() {
numOnTaskStart++;
}

@Override
public void onTaskSucceeded() {
numOnTaskSucceeded++;
}

@Override
public void onTaskFailed(Throwable throwable) {
numOnTaskFailed++;
}
}

public static class TestBadlyBehavedPlugin implements ExecutorPlugin {
@Override
public void onTaskStart() {
throw new RuntimeException();
}

@Override
public void onTaskSucceeded() {
numOnTaskSucceeded++;
}

@Override
public void onTaskFailed(Throwable throwable) {
numOnTaskFailed++;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
0, 0, taskBinary, rdd.partitions(0), Seq.empty, 0, new Properties,
closureSerializer.serialize(TaskMetrics.registered).array())
intercept[RuntimeException] {
task.run(0, 0, null)
task.run(0, 0, null, Seq.empty)
}
assert(TaskContextSuite.completed === true)
}
Expand All @@ -92,7 +92,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
0, 0, taskBinary, rdd.partitions(0), Seq.empty, 0, new Properties,
closureSerializer.serialize(TaskMetrics.registered).array())
intercept[RuntimeException] {
task.run(0, 0, null)
task.run(0, 0, null, Seq.empty)
}
assert(TaskContextSuite.lastError.getMessage == "damn error")
}
Expand Down