From f2804bbeef18bbdd887dbee39b51f8a88becb3b0 Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Thu, 28 Apr 2022 08:53:17 +0800 Subject: [PATCH 1/8] add action when a CLEANUP event is fired --- .../hdds/scm/container/ContainerStateManagerImpl.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 5941ac3f44cf..6356b4f4d9fc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -253,6 +253,12 @@ private void initialize() throws IOException { actions = new EnumMap<>(LifeCycleEvent.class); actions.put(FINALIZE, info -> pipelineManager .removeContainerFromPipeline(info.getPipelineID(), info.containerID())); + //when a CLEANUP event if fired , the state of the container will be changed + //to DELETE. at this time, we should remove the reference of the container + // from scm. the reference of a container will be removed from scm immediately + // after the state is changed to DELETED, thus DELETED is a transient state + // and hardly be captured. + actions.put(CLEANUP, info -> removeContainer(info.containerID().getProtobuf())); return actions; } From 47c8e0433359969f3d5b12f59d96ac8768ca5df5 Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Thu, 28 Apr 2022 09:07:10 +0800 Subject: [PATCH 2/8] remove DELETED container in RM --- .../hadoop/hdds/scm/container/ReplicationManager.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java index 279163b8f332..f2f000ffa593 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java @@ -514,10 +514,14 @@ private void processContainer(ContainerInfo container, } /** - * We don't need to take any action for a DELETE container - eventually - * it will be removed from SCM. + * a container reference should be removed from scm immediately + * after a CLEANUP event is fired, so DELETED is a transient + * state and hardly appears.if it appears at any unknown scenario, + * just remove it. */ if (state == LifeCycleState.DELETED) { + LOG.warn("container {} is in DELETED state, but not removed", id); + containerManager.deleteContainer(id); return; } From c7e9f8bd2c4f282f774d6b3c1b7be3021f3f5c7d Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Thu, 28 Apr 2022 09:34:00 +0800 Subject: [PATCH 3/8] add test case --- .../hdds/scm/container/TestContainerManagerImpl.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java index ee0b6378d21c..0e6a8891a935 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java @@ -123,6 +123,15 @@ public void testUpdateContainerState() throws Exception { HddsProtos.LifeCycleEvent.FORCE_CLOSE); Assert.assertEquals(HddsProtos.LifeCycleState.CLOSED, containerManager.getContainer(cid).getState()); + containerManager.updateContainerState(cid, + HddsProtos.LifeCycleEvent.DELETE); + Assert.assertEquals(HddsProtos.LifeCycleState.DELETING, + containerManager.getContainer(cid).getState()); + containerManager.updateContainerState(cid, + HddsProtos.LifeCycleEvent.CLEANUP); + //when a CLEANUP event is fired , the container should be + //removed from scm. + Assert.assertFalse(containerManager.containerExist(cid)); } @Test From 82ad2387e73a1a060b430681ec57b2c9894203f8 Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Thu, 28 Apr 2022 09:36:04 +0800 Subject: [PATCH 4/8] fix checkStyle --- .../hdds/scm/container/ContainerStateManagerImpl.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 6356b4f4d9fc..e309d781e2bf 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -255,10 +255,11 @@ private void initialize() throws IOException { .removeContainerFromPipeline(info.getPipelineID(), info.containerID())); //when a CLEANUP event if fired , the state of the container will be changed //to DELETE. at this time, we should remove the reference of the container - // from scm. the reference of a container will be removed from scm immediately - // after the state is changed to DELETED, thus DELETED is a transient state - // and hardly be captured. - actions.put(CLEANUP, info -> removeContainer(info.containerID().getProtobuf())); + // from scm. the reference of a container will be removed from scm + // immediately after the state is changed to DELETED, thus DELETED is + // a transient state and hardly be captured. + actions.put(CLEANUP, info -> + removeContainer(info.containerID().getProtobuf())); return actions; } From 22ce5f9dee57fa039bcc8da5416e9995fc8b863c Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Thu, 28 Apr 2022 11:46:50 +0800 Subject: [PATCH 5/8] fix unit test --- .../hdds/scm/container/ContainerStateManagerImpl.java | 7 +++---- .../hdds/scm/container/TestContainerManagerImpl.java | 1 + .../container/TestContainerStateManagerIntegration.java | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index e309d781e2bf..0dcdf005febd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -254,10 +254,9 @@ private void initialize() throws IOException { actions.put(FINALIZE, info -> pipelineManager .removeContainerFromPipeline(info.getPipelineID(), info.containerID())); //when a CLEANUP event if fired , the state of the container will be changed - //to DELETE. at this time, we should remove the reference of the container - // from scm. the reference of a container will be removed from scm - // immediately after the state is changed to DELETED, thus DELETED is - // a transient state and hardly be captured. + //to DELETED. at this time, we should remove the reference of the container + // from scm. this happens immediately, thus DELETED is a transient state + // and hardly be captured. actions.put(CLEANUP, info -> removeContainer(info.containerID().getProtobuf())); return actions; diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java index 0e6a8891a935..0b3fd0f3c045 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java @@ -127,6 +127,7 @@ public void testUpdateContainerState() throws Exception { HddsProtos.LifeCycleEvent.DELETE); Assert.assertEquals(HddsProtos.LifeCycleState.DELETING, containerManager.getContainer(cid).getState()); + Assert.assertTrue(containerManager.containerExist(cid)); containerManager.updateContainerState(cid, HddsProtos.LifeCycleEvent.CLEANUP); //when a CLEANUP event is fired , the container should be diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java index ea26ad2e2a97..a97d9e75bfa6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java @@ -308,9 +308,9 @@ public void testUpdateContainerState() throws IOException, containerManager .updateContainerState(container1.getContainerInfo().containerID(), HddsProtos.LifeCycleEvent.CLEANUP); - containerList = containerStateManager - .getContainerIDs(HddsProtos.LifeCycleState.DELETED); - Assert.assertEquals(1, containerList.size()); + //when CLEANUP is fired, container will be removed from scm + Assert.assertFalse(containerManager.containerExist( + container1.getContainerInfo().containerID())); // Allocate container1 and update its state from // OPEN -> CLOSING -> CLOSED From e1da67331c6d2c2ae000ddc2f06f08e3b07b111d Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Thu, 28 Apr 2022 14:13:03 +0800 Subject: [PATCH 6/8] fix integration test --- .../scm/container/TestContainerStateManagerIntegration.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java index a97d9e75bfa6..5d162d8f3bda 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManagerIntegration.java @@ -324,9 +324,6 @@ public void testUpdateContainerState() throws IOException, containerManager .updateContainerState(container3.getContainerInfo().containerID(), HddsProtos.LifeCycleEvent.CLOSE); - containerManager - .updateContainerState(container1.getContainerInfo().containerID(), - HddsProtos.LifeCycleEvent.CLOSE); containerList = containerStateManager .getContainerIDs(HddsProtos.LifeCycleState.CLOSED); Assert.assertEquals(1, containerList.size()); From afd2dceda4c67165917452e2bbc33b4a637fd768 Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Wed, 27 Jul 2022 17:51:25 +0800 Subject: [PATCH 7/8] add config --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 6 +++++ .../src/main/resources/ozone-default.xml | 9 ++++++++ .../container/ContainerStateManagerImpl.java | 22 ++++++++++++------- .../replication/LegacyReplicationManager.java | 12 ++++++++-- 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 49789f4d704a..93893c003526 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -422,6 +422,12 @@ public final class ScmConfigKeys { public static final String OZONE_SCM_PIPELINE_SCRUB_INTERVAL_DEFAULT = "5m"; + public static final String OZONE_SCM_REMOVE_DELETED_CONTAINER_ENABLED = + "ozone.scm.remove.deleted.container.enabled"; + + public static final boolean + OZONE_SCM_REMOVE_DELETED_CONTAINER_ENABLED_DEFAULT = true; + // Allow SCM to auto create factor ONE ratis pipeline. public static final String OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE = diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 9fa3dd3ab823..221ce625ffc1 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -2178,6 +2178,15 @@ + + ozone.scm.remove.deleted.container.enabled + true + OZONE,SCM,OPERATION + Boolean value to enable removing container reference from scm + if its state is DELETED + + + hdds.metadata.dir diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 306608dce716..285dae653aab 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -147,7 +147,7 @@ private ContainerStateManagerImpl(final Configuration conf, this.containerSize = getConfiguredContainerSize(conf); this.containers = new ContainerStateMap(); this.lastUsedMap = new ConcurrentHashMap<>(); - this.containerStateChangeActions = getContainerStateChangeActions(); + this.containerStateChangeActions = getContainerStateChangeActions(conf); this.transactionBuffer = buffer; initialize(); @@ -250,17 +250,23 @@ private void initialize() throws IOException { } private Map> - getContainerStateChangeActions() { + getContainerStateChangeActions(final Configuration conf) { final Map> actions = new EnumMap<>(LifeCycleEvent.class); actions.put(FINALIZE, info -> pipelineManager .removeContainerFromPipeline(info.getPipelineID(), info.containerID())); - //when a CLEANUP event if fired , the state of the container will be changed - //to DELETED. at this time, we should remove the reference of the container - // from scm. this happens immediately, thus DELETED is a transient state - // and hardly be captured. - actions.put(CLEANUP, info -> - removeContainer(info.containerID().getProtobuf())); + + boolean shouldRemoveDeletedContainer = conf.getBoolean( + ScmConfigKeys.OZONE_SCM_REMOVE_DELETED_CONTAINER_ENABLED, + ScmConfigKeys.OZONE_SCM_REMOVE_DELETED_CONTAINER_ENABLED_DEFAULT); + if (shouldRemoveDeletedContainer) { + // when a CLEANUP event if fired , the state of the container will be + // changed to DELETED. at this time, we should remove the reference + // of the container from scm. this happens immediately, thus DELETED + // is a transient state and hardly be captured. + actions.put(CLEANUP, info -> + removeContainer(info.containerID().getProtobuf())); + } return actions; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java index 43de49b39ec2..f9e10ea01c37 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java @@ -333,6 +333,8 @@ public enum MoveResult { */ private final MoveScheduler moveScheduler; + private final boolean shouldRemoveDeletedContainer; + /** * Constructs ReplicationManager instance with the given configuration. @@ -377,6 +379,10 @@ public LegacyReplicationManager(final ConfigurationSource conf, .setDBTransactionBuffer(scmhaManager.getDBTransactionBuffer()) .setRatisServer(scmhaManager.getRatisServer()) .setMoveTable(moveTable).build(); + + shouldRemoveDeletedContainer = conf.getBoolean( + ScmConfigKeys.OZONE_SCM_REMOVE_DELETED_CONTAINER_ENABLED, + ScmConfigKeys.OZONE_SCM_REMOVE_DELETED_CONTAINER_ENABLED_DEFAULT); } @@ -492,8 +498,10 @@ protected void processContainer(ContainerInfo container, * just remove it. */ if (state == LifeCycleState.DELETED) { - LOG.warn("container {} is in DELETED state, but not removed", id); - containerManager.deleteContainer(id); + if (shouldRemoveDeletedContainer) { + LOG.warn("container {} is in DELETED state, but not removed", id); + containerManager.deleteContainer(id); + } return; } From 461087fd3e56185c74bcd6b137683c2e82e4d64f Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Wed, 27 Jul 2022 18:52:17 +0800 Subject: [PATCH 8/8] fix ut --- .../hadoop/hdds/scm/container/TestContainerManagerImpl.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java index 6cf46651eff1..b8bed00f3c43 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java @@ -134,14 +134,14 @@ public void testUpdateContainerState() throws Exception { containerManager.getContainer(cid).getState()); containerManager.updateContainerState(cid, HddsProtos.LifeCycleEvent.DELETE); - Assert.assertEquals(HddsProtos.LifeCycleState.DELETING, + Assertions.assertEquals(HddsProtos.LifeCycleState.DELETING, containerManager.getContainer(cid).getState()); - Assert.assertTrue(containerManager.containerExist(cid)); + Assertions.assertTrue(containerManager.containerExist(cid)); containerManager.updateContainerState(cid, HddsProtos.LifeCycleEvent.CLEANUP); //when a CLEANUP event is fired , the container should be //removed from scm. - Assert.assertFalse(containerManager.containerExist(cid)); + Assertions.assertFalse(containerManager.containerExist(cid)); } @Test