From 14b6b907ad053d962a0a23355f8bc7034ccf4339 Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Fri, 8 Jul 2022 19:09:41 +0800 Subject: [PATCH 1/3] HDDS-6978. EC: Cleanup RECOVERING container on DN restarts --- .../container/ozoneimpl/ContainerReader.java | 15 +++++++++++++-- .../ozone/container/ozoneimpl/OzoneContainer.java | 2 +- .../container/ozoneimpl/TestContainerReader.java | 6 +++--- .../ozone/debug/container/ContainerCommands.java | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java index 2a88a2fe741f..b34371ef85da 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java @@ -34,6 +34,8 @@ import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerDataProto.State.RECOVERING; import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil; import org.slf4j.Logger; @@ -73,17 +75,18 @@ public class ContainerReader implements Runnable { private final ConfigurationSource config; private final File hddsVolumeDir; private final MutableVolumeSet volumeSet; + private final boolean shouldDeleteRecovering; public ContainerReader( MutableVolumeSet volSet, HddsVolume volume, ContainerSet cset, - ConfigurationSource conf - ) { + ConfigurationSource conf, boolean shouldDeleteRecovering) { Preconditions.checkNotNull(volume); this.hddsVolume = volume; this.hddsVolumeDir = hddsVolume.getHddsRootDir(); this.containerSet = cset; this.config = conf; this.volumeSet = volSet; + this.shouldDeleteRecovering = shouldDeleteRecovering; } @Override @@ -207,6 +210,14 @@ public void verifyAndFixupContainerData(ContainerData containerData) KeyValueContainerUtil.parseKVContainerData(kvContainerData, config); KeyValueContainer kvContainer = new KeyValueContainer(kvContainerData, config); + if (kvContainer.getContainerState() == RECOVERING) { + if (shouldDeleteRecovering) { + kvContainer.delete(); + LOG.info("delete recovering container {}.", + kvContainer.getContainerData().getContainerID()); + } + return; + } containerSet.addContainer(kvContainer); } else { throw new StorageContainerException("Container File is corrupted. " + diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index df61cac960a1..1db2961ba9fc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -245,7 +245,7 @@ private void buildContainerSet() { while (volumeSetIterator.hasNext()) { StorageVolume volume = volumeSetIterator.next(); Thread thread = new Thread(new ContainerReader(volumeSet, - (HddsVolume) volume, containerSet, config)); + (HddsVolume) volume, containerSet, config, true)); thread.start(); volumeThreads.add(thread); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java index 2ab6f95cf658..76de04943205 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java @@ -217,7 +217,7 @@ private List addBlocks(KeyValueContainer keyValueContainer, @Test public void testContainerReader() throws Exception { ContainerReader containerReader = new ContainerReader(volumeSet, - hddsVolume, containerSet, conf); + hddsVolume, containerSet, conf, true); Thread thread = new Thread(containerReader); thread.start(); @@ -284,7 +284,7 @@ public void testContainerReaderWithLoadException() throws Exception { ContainerCache.getInstance(conf).shutdownCache(); ContainerReader containerReader = new ContainerReader(volumeSet1, - hddsVolume1, containerSet1, conf); + hddsVolume1, containerSet1, conf, true); containerReader.readVolume(hddsVolume1.getHddsRootDir()); Assert.assertEquals(containerCount - 1, containerSet1.containerCount()); } @@ -346,7 +346,7 @@ public void testMultipleContainerReader() throws Exception { Thread[] threads = new Thread[volumeNum]; for (int i = 0; i < volumeNum; i++) { containerReaders[i] = new ContainerReader(volumeSets, - (HddsVolume) volumes.get(i), containerSet, conf); + (HddsVolume) volumes.get(i), containerSet, conf, true); threads[i] = new Thread(containerReaders[i]); } long startTime = System.currentTimeMillis(); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ContainerCommands.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ContainerCommands.java index c5b21d604fe2..453370ed078d 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ContainerCommands.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ContainerCommands.java @@ -151,7 +151,7 @@ public void loadContainersFromVolumes() throws IOException { HddsVolume volume = volumeSetIterator.next(); LOG.info("Loading container metadata from volume " + volume.toString()); final ContainerReader reader = - new ContainerReader(volumeSet, volume, containerSet, conf); + new ContainerReader(volumeSet, volume, containerSet, conf, false); reader.run(); } From 79df6b19248dc6c0ff1d7732297ed6789374d2aa Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Fri, 8 Jul 2022 20:17:36 +0800 Subject: [PATCH 2/3] trigger CI From a341f9d7425f9c0613b082403087fdc0c8e9393a Mon Sep 17 00:00:00 2001 From: Jackson Yao Date: Sat, 9 Jul 2022 19:32:09 +0800 Subject: [PATCH 3/3] fix comments --- .../container/ozoneimpl/ContainerReader.java | 2 +- .../container/ozoneimpl/TestContainerReader.java | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java index b34371ef85da..573cee9bf501 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java @@ -213,7 +213,7 @@ public void verifyAndFixupContainerData(ContainerData containerData) if (kvContainer.getContainerState() == RECOVERING) { if (shouldDeleteRecovering) { kvContainer.delete(); - LOG.info("delete recovering container {}.", + LOG.info("Delete recovering container {}.", kvContainer.getContainerData().getContainerID()); } return; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java index 76de04943205..4f633b1d4ef1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java @@ -22,8 +22,8 @@ import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.helpers.BlockData; @@ -57,6 +57,7 @@ import java.util.List; import java.util.UUID; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.RECOVERING; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anyLong; @@ -216,6 +217,18 @@ private List addBlocks(KeyValueContainer keyValueContainer, @Test public void testContainerReader() throws Exception { + KeyValueContainerData recoveringContainerData = new KeyValueContainerData( + 10, layout, (long) StorageUnit.GB.toBytes(5), + UUID.randomUUID().toString(), datanodeId.toString()); + //create a container with recovering state + recoveringContainerData.setState(RECOVERING); + + KeyValueContainer recoveringKeyValueContainer = + new KeyValueContainer(recoveringContainerData, + conf); + recoveringKeyValueContainer.create( + volumeSet, volumeChoosingPolicy, clusterId); + ContainerReader containerReader = new ContainerReader(volumeSet, hddsVolume, containerSet, conf, true); @@ -223,6 +236,7 @@ public void testContainerReader() throws Exception { thread.start(); thread.join(); + //recovering container should be deleted, so the count should be 2 Assert.assertEquals(2, containerSet.containerCount()); for (int i = 0; i < 2; i++) {