From adcfc37637a179914fbc2b4bb9b36e44b3bd1c6d Mon Sep 17 00:00:00 2001 From: S O'Donnell Date: Mon, 12 Oct 2020 15:36:42 +0100 Subject: [PATCH 1/2] Added test to reproduce the issue and corrected the problem in ContainerInfo --- .../hdds/scm/container/ContainerInfo.java | 2 + .../org/apache/hadoop/ozone/TestDataUtil.java | 13 +- .../hadoop/ozone/scm/TestCloseContainer.java | 124 ++++++++++++++++++ 3 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestCloseContainer.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index a599021618c7..b8f1a926f186 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -117,6 +117,7 @@ public static ContainerInfo fromProtobuf(HddsProtos.ContainerInfoProto info) { .setDeleteTransactionId(info.getDeleteTransactionId()) .setReplicationFactor(info.getReplicationFactor()) .setReplicationType(info.getReplicationType()) + .setSequenceId(info.getSequenceId()) .build(); } @@ -212,6 +213,7 @@ public HddsProtos.ContainerInfoProto getProtobuf() { .setReplicationFactor(getReplicationFactor()) .setReplicationType(getReplicationType()) .setOwner(getOwner()) + .setSequenceId(getSequenceId()) .build(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestDataUtil.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestDataUtil.java index 115336e2ad82..362701c1b345 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestDataUtil.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestDataUtil.java @@ -72,10 +72,17 @@ public static OzoneBucket createVolumeAndBucket(MiniOzoneCluster cluster, } public static void createKey(OzoneBucket bucket, String keyName, - String content) throws IOException { + String content) throws IOException { + createKey(bucket, keyName, ReplicationFactor.ONE, + ReplicationType.STAND_ALONE, content); + } + + public static void createKey(OzoneBucket bucket, String keyName, + ReplicationFactor repFactor, ReplicationType repType, String content) + throws IOException { try (OutputStream stream = bucket - .createKey(keyName, content.length(), ReplicationType.STAND_ALONE, - ReplicationFactor.ONE, new HashMap<>())) { + .createKey(keyName, content.length(), repType, repFactor, + new HashMap<>())) { stream.write(content.getBytes()); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestCloseContainer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestCloseContainer.java new file mode 100644 index 000000000000..452b03c7eec9 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestCloseContainer.java @@ -0,0 +1,124 @@ +package org.apache.hadoop.ozone.scm; + +import org.apache.hadoop.hdds.client.ReplicationFactor; +import org.apache.hadoop.hdds.client.ReplicationType; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Before; +import org.junit.Test; + +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static java.util.concurrent.TimeUnit.SECONDS; +import static junit.framework.TestCase.assertEquals; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_COMMAND_STATUS_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.junit.Assert.fail; + +/** + * Integration test to ensure a container can be closed and its replicas + * reported back correctly after a SCM restart. + */ +public class TestCloseContainer { + + private static int numOfDatanodes = 3; + private static String bucketName = "bucket1"; + private static String volName = "vol1"; + private OzoneBucket bucket; + private MiniOzoneCluster cluster; + + @Before + public void setUp() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + final int interval = 100; + + conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, + interval, TimeUnit.MILLISECONDS); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_PIPELINE_REPORT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_COMMAND_STATUS_REPORT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_NODE_REPORT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); + conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); + + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(numOfDatanodes) + .build(); + cluster.waitForClusterToBeReady(); + + bucket = TestDataUtil.createVolumeAndBucket(cluster, volName, bucketName); + } + + @Test + public void testReplicasAreReportedForClosedContainerAfterRestart() + throws Exception { + // Create some keys to write data into the open containers + for (int i=0; i<10; i++) { + TestDataUtil.createKey(bucket, "key"+i, ReplicationFactor.THREE, + ReplicationType.RATIS, "this is the content"); + } + StorageContainerManager scm = cluster.getStorageContainerManager(); + + // Pick any container on the cluster, get its pipeline, close it and then + // wait for the container to close + ContainerInfo container = scm.getContainerManager().getContainers().get(0); + Pipeline pipeline = scm.getPipelineManager() + .getPipeline(container.getPipelineID()); + scm.getPipelineManager().finalizeAndDestroyPipeline(pipeline, false); + GenericTestUtils.waitFor(() -> + container.getState() == HddsProtos.LifeCycleState.CLOSED, + 200, 30000); + + long originalSeq = container.getSequenceId(); + + cluster.restartStorageContainerManager(true); + + scm = cluster.getStorageContainerManager(); + ContainerInfo newContainer + = scm.getContainerManager().getContainer(container.containerID()); + + // After restarting SCM, ensure the sequenceId for the container is the + // same as before. + assertEquals(originalSeq, newContainer.getSequenceId()); + + // Ensure 3 replicas are reported successfully as expected. + GenericTestUtils.waitFor(() -> + getContainerReplicas(newContainer).size() == 3, 200, 30000); + } + + /** + * Retrieves the containerReplica set for a given container or fails the test + * if the container cannot be found. This is a helper method to allow the + * container replica count to be checked in a lambda expression. + * @param c The container for which to retrieve replicas + * @return + */ + private Set getContainerReplicas(ContainerInfo c) { + Set replicas = null; + try { + replicas = cluster.getStorageContainerManager() + .getContainerManager().getContainerReplicas(c.containerID()); + } catch (ContainerNotFoundException e) { + fail("Unexpected ContainerNotFoundException"); + } + return replicas; + } + +} From 12eca8b61bc8167d4c5e359f8f7bbab614e4d6ec Mon Sep 17 00:00:00 2001 From: S O'Donnell Date: Mon, 12 Oct 2020 20:52:49 +0100 Subject: [PATCH 2/2] Fix integration test and RAT failure --- .../hadoop/ozone/scm/TestCloseContainer.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestCloseContainer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestCloseContainer.java index 452b03c7eec9..fe058592ba49 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestCloseContainer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestCloseContainer.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.ozone.scm; import org.apache.hadoop.hdds.client.ReplicationFactor; @@ -7,6 +24,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.MiniOzoneCluster; @@ -16,6 +34,7 @@ import org.junit.Before; import org.junit.Test; +import java.time.Duration; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -58,6 +77,11 @@ public void setUp() throws Exception { conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); + ReplicationManagerConfiguration replicationConf = + conf.getObject(ReplicationManagerConfiguration.class); + replicationConf.setInterval(Duration.ofSeconds(1)); + conf.setFromObject(replicationConf); + cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(numOfDatanodes) .build();