Skip to content

Commit cfc666b

Browse files
wakunGitHub Enterprise
authored andcommitted
[CARMEL-7504] Log error message instead of stopping spark context (apache#274)
1 parent 12946cd commit cfc666b

File tree

1 file changed

+24
-10
lines changed

1 file changed

+24
-10
lines changed

resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -569,17 +569,31 @@ private[spark] class ApplicationMaster(
569569
var failureCount = 0
570570
while (!finished) {
571571
try {
572-
if (allocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
573-
finish(FinalApplicationStatus.FAILED,
574-
ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
575-
s"Max number of executor failures ($maxNumExecutorFailures) reached")
576-
} else if (allocator.isAllNodeExcluded) {
577-
finish(FinalApplicationStatus.FAILED,
578-
ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
579-
"Due to executor failures all available nodes are excluded")
572+
if (SparkConf.isETLCluster) {
573+
if (allocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
574+
finish(FinalApplicationStatus.FAILED,
575+
ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
576+
s"Max number of executor failures ($maxNumExecutorFailures) reached")
577+
} else if (allocator.isAllNodeExcluded) {
578+
finish(FinalApplicationStatus.FAILED,
579+
ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
580+
"Due to executor failures all available nodes are excluded")
581+
} else {
582+
logDebug("Sending progress")
583+
allocator.allocateResources()
584+
}
580585
} else {
581-
logDebug("Sending progress")
582-
allocator.allocateResources()
586+
if (allocator.isAllNodeExcluded) {
587+
finish(FinalApplicationStatus.FAILED,
588+
ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
589+
"Due to executor failures all available nodes are excluded")
590+
} else {
591+
if (allocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
592+
logError(s"Max number of executor failures ($maxNumExecutorFailures) reached")
593+
}
594+
logDebug("Sending progress")
595+
allocator.allocateResources()
596+
}
583597
}
584598
failureCount = 0
585599
} catch {

0 commit comments

Comments
 (0)