From 3abd3f6f494875d011b114a14136319a3561a8ca Mon Sep 17 00:00:00 2001 From: Ashutosh Gupta Date: Sun, 21 Aug 2022 02:33:24 +0100 Subject: [PATCH 1/5] MAPREDUCE-7407. Avoid stopContainer() on dead node --- .../app/launcher/ContainerLauncherImpl.java | 14 +++++++++++++ .../launcher/TestContainerLauncherImpl.java | 20 +++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java index d09b3cb1e56b3..c016246b9764b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java @@ -385,6 +385,20 @@ public void run() { // TODO: Do it only once per NodeManager. ContainerId containerID = event.getContainerID(); + // If the container failed to launch earlier (due to dead node for example), + // it has been marked as FAILED and removed from containers during + // CONTAINER_REMOTE_LAUNCH event handling. + // Skip kill() such container during CONTAINER_REMOTE_CLEANUP as + // it is not necessary and could cost 15 minutes delay if the node is dead. + if (event.getType() == EventType.CONTAINER_REMOTE_CLEANUP && + !containers.containsKey(containerID)) { + LOG.info("Skip cleanup of already-removed container " + containerID); + // send killed event to task attempt regardless like in kill(). + context.getEventHandler().handle(new TaskAttemptEvent(event.getTaskAttemptID(), + TaskAttemptEventType.TA_CONTAINER_CLEANED)); + return; + } + Container c = getContainer(event); switch(event.getType()) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java index 2057cc80ff01a..7da22dea2a9e5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java @@ -283,8 +283,24 @@ public void testOutOfOrder() throws Exception { ut.handle(mockLaunchEvent); ut.waitForPoolToIdle(); - - verify(mockCM, never()).startContainers(any(StartContainersRequest.class)); + + verify(mockCM).startContainers(any(StartContainersRequest.class)); + + LOG.info("inserting cleanup event"); + ContainerLauncherEvent mockCleanupEvent2 = + mock(ContainerLauncherEvent.class); + when(mockCleanupEvent2.getType()) + .thenReturn(EventType.CONTAINER_REMOTE_CLEANUP); + when(mockCleanupEvent2.getContainerID()) + .thenReturn(contId); + when(mockCleanupEvent2.getTaskAttemptID()).thenReturn(taskAttemptId); + when(mockCleanupEvent2.getContainerMgrAddress()).thenReturn(cmAddress); + ut.handle(mockCleanupEvent2); + + ut.waitForPoolToIdle(); + + // Verifies stopContainers is called on existing container + verify(mockCM).stopContainers(any(StopContainersRequest.class)); } finally { ut.stop(); } From e3baf8b4f96fd23ad44096c61ae87e9ced1d7329 Mon Sep 17 00:00:00 2001 From: Ashutosh Gupta Date: Wed, 14 Sep 2022 18:18:43 +0100 Subject: [PATCH 2/5] Moving container check inside switch case and updating Logger --- .../app/launcher/ContainerLauncherImpl.java | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java index c016246b9764b..0d2ccc227d763 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java @@ -379,41 +379,38 @@ class EventProcessor implements Runnable { @Override public void run() { - LOG.info("Processing the event " + event.toString()); + LOG.info("Processing the event {}", event.toString()); // Load ContainerManager tokens before creating a connection. // TODO: Do it only once per NodeManager. ContainerId containerID = event.getContainerID(); - // If the container failed to launch earlier (due to dead node for example), - // it has been marked as FAILED and removed from containers during - // CONTAINER_REMOTE_LAUNCH event handling. - // Skip kill() such container during CONTAINER_REMOTE_CLEANUP as - // it is not necessary and could cost 15 minutes delay if the node is dead. - if (event.getType() == EventType.CONTAINER_REMOTE_CLEANUP && - !containers.containsKey(containerID)) { - LOG.info("Skip cleanup of already-removed container " + containerID); - // send killed event to task attempt regardless like in kill(). - context.getEventHandler().handle(new TaskAttemptEvent(event.getTaskAttemptID(), - TaskAttemptEventType.TA_CONTAINER_CLEANED)); - return; - } - - Container c = getContainer(event); switch(event.getType()) { case CONTAINER_REMOTE_LAUNCH: ContainerRemoteLaunchEvent launchEvent = (ContainerRemoteLaunchEvent) event; - c.launch(launchEvent); + getContainer(event).launch(launchEvent); break; case CONTAINER_REMOTE_CLEANUP: - c.kill(event.getDumpContainerThreads()); + // If the container failed to launch earlier (due to dead node for example), + // it has been marked as FAILED and removed from containers during + // CONTAINER_REMOTE_LAUNCH event handling. + // Skip kill() such container during CONTAINER_REMOTE_CLEANUP as + // it is not necessary and could cost 15 minutes delay if the node is dead. + if (!containers.containsKey(containerID)) { + LOG.info("Skip cleanup of already-removed container {}", containerID); + // send killed event to task attempt regardless like in kill(). + context.getEventHandler().handle(new TaskAttemptEvent(event.getTaskAttemptID(), + TaskAttemptEventType.TA_CONTAINER_CLEANED)); + return; + } + getContainer(event).kill(event.getDumpContainerThreads()); break; case CONTAINER_COMPLETED: - c.done(); + getContainer(event).done(); break; } From 81bd5b59d9fef59b5fa8d2352cafd596bbf3b5a0 Mon Sep 17 00:00:00 2001 From: Ashutosh Gupta Date: Thu, 15 Sep 2022 02:08:24 +0100 Subject: [PATCH 3/5] removing .toString() from LOG --- .../hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java index 0d2ccc227d763..d184d9be64bf8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java @@ -379,7 +379,7 @@ class EventProcessor implements Runnable { @Override public void run() { - LOG.info("Processing the event {}", event.toString()); + LOG.info("Processing the event {}", event); // Load ContainerManager tokens before creating a connection. // TODO: Do it only once per NodeManager. From 1cbbcec9dde54e77308408ce8299d182145d1116 Mon Sep 17 00:00:00 2001 From: Ashutosh Gupta Date: Thu, 15 Sep 2022 02:20:16 +0100 Subject: [PATCH 4/5] formatting lines --- .../v2/app/launcher/TestContainerLauncherImpl.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java index 7da22dea2a9e5..524c9245065da 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java @@ -209,14 +209,11 @@ public void testHandle() throws Exception { ut.waitForPoolToIdle(); verify(mockCM).startContainers(any(StartContainersRequest.class)); - + LOG.info("inserting cleanup event"); - ContainerLauncherEvent mockCleanupEvent = - mock(ContainerLauncherEvent.class); - when(mockCleanupEvent.getType()) - .thenReturn(EventType.CONTAINER_REMOTE_CLEANUP); - when(mockCleanupEvent.getContainerID()) - .thenReturn(contId); + ContainerLauncherEvent mockCleanupEvent = mock(ContainerLauncherEvent.class); + when(mockCleanupEvent.getType()).thenReturn(EventType.CONTAINER_REMOTE_CLEANUP); + when(mockCleanupEvent.getContainerID()).thenReturn(contId); when(mockCleanupEvent.getTaskAttemptID()).thenReturn(taskAttemptId); when(mockCleanupEvent.getContainerMgrAddress()).thenReturn(cmAddress); ut.handle(mockCleanupEvent); From 37be43cb1394b41ac63ade371b2892b54514a3a4 Mon Sep 17 00:00:00 2001 From: Ashutosh Gupta Date: Thu, 15 Sep 2022 02:23:26 +0100 Subject: [PATCH 5/5] formatting lines --- .../v2/app/launcher/TestContainerLauncherImpl.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java index 524c9245065da..88ba8943ceb3f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java @@ -284,12 +284,9 @@ public void testOutOfOrder() throws Exception { verify(mockCM).startContainers(any(StartContainersRequest.class)); LOG.info("inserting cleanup event"); - ContainerLauncherEvent mockCleanupEvent2 = - mock(ContainerLauncherEvent.class); - when(mockCleanupEvent2.getType()) - .thenReturn(EventType.CONTAINER_REMOTE_CLEANUP); - when(mockCleanupEvent2.getContainerID()) - .thenReturn(contId); + ContainerLauncherEvent mockCleanupEvent2 = mock(ContainerLauncherEvent.class); + when(mockCleanupEvent2.getType()).thenReturn(EventType.CONTAINER_REMOTE_CLEANUP); + when(mockCleanupEvent2.getContainerID()).thenReturn(contId); when(mockCleanupEvent2.getTaskAttemptID()).thenReturn(taskAttemptId); when(mockCleanupEvent2.getContainerMgrAddress()).thenReturn(cmAddress); ut.handle(mockCleanupEvent2);