From 0f9e9dcdb12b4bbb583fd8647667e74b961e8e0d Mon Sep 17 00:00:00 2001 From: zhangjunfan Date: Wed, 19 Jan 2022 11:17:03 +0800 Subject: [PATCH] Make job fail fast when container starting failed --- .../src/main/java/com/linkedin/tony/ApplicationMaster.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tony-core/src/main/java/com/linkedin/tony/ApplicationMaster.java b/tony-core/src/main/java/com/linkedin/tony/ApplicationMaster.java index 498fac18..d579792d 100644 --- a/tony-core/src/main/java/com/linkedin/tony/ApplicationMaster.java +++ b/tony-core/src/main/java/com/linkedin/tony/ApplicationMaster.java @@ -51,6 +51,7 @@ import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -1022,6 +1023,8 @@ public void onContainerStarted(ContainerId containerId, Map @Override public void onStartContainerError(ContainerId containerId, Throwable t) { LOG.error("Failed to start container " + containerId, t); + processFinishedContainer(containerId, ContainerExitStatus.INVALID, + "Errors on starting container, stacktrace as follows: \n" + ExceptionUtils.getStackTrace(t)); } @Override