From 7f4d56609f9b6f55191463d2f0ff6d698848670e Mon Sep 17 00:00:00 2001 From: Ritesh H Shukla Date: Wed, 8 Mar 2023 20:47:39 -0800 Subject: [PATCH 1/6] HDDS-8118. Fail container delete on non empty chunks dir Add a defensive check if the container directory is empty before deleting the container to make sure data is not lost. This may leave a small amount of orphaned data before HDDS-8117 is complete. --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 7 + .../src/main/resources/ozone-default.xml | 8 ++ .../common/helpers/ContainerMetrics.java | 27 ++++ .../common/interfaces/Container.java | 7 + .../container/keyvalue/KeyValueContainer.java | 5 + .../container/keyvalue/KeyValueHandler.java | 36 ++++- .../helpers/KeyValueContainerUtil.java | 21 +++ .../container/ozoneimpl/OzoneContainer.java | 9 +- .../TestDeleteContainerHandler.java | 123 +++++++++++++++++- 9 files changed, 238 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 2c80c74f2992..283f98fb83d3 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -581,6 +581,13 @@ public final class ScmConfigKeys { public static final String OZONE_AUDIT_LOG_DEBUG_CMD_LIST_SCMAUDIT = "ozone.audit.log.debug.cmd.list.scmaudit"; + + public static final String OZONE_SCM_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE = + "ozone.scm.check.empty.container.delete"; + public static final Boolean + OZONE_SCM_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT = + true; + /** * Never constructed. */ diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index f525478e8f27..2e39c77bbc8c 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3607,4 +3607,12 @@ without using the optimised DAG based pruning approach + + ozone.scm.check.empty.container.delete + true + DATANODE + + Disallow deletion of containers if on disk there are chunks still present. + + diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java index 09eb65dfe8c1..e77b0031b94f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java @@ -49,6 +49,10 @@ public class ContainerMetrics { public static final String STORAGE_CONTAINER_METRICS = "StorageContainerMetrics"; @Metric private MutableCounterLong numOps; + @Metric private MutableCounterLong containerDeleteFailedNonEmptyDir; + @Metric private MutableCounterLong containerDeleteFailedBlockCountNotZero; + @Metric private MutableCounterLong containerForceDelete; + private MutableCounterLong[] numOpsArray; private MutableCounterLong[] opsBytesArray; private MutableRate[] opsLatency; @@ -125,4 +129,27 @@ public void incContainerBytesStats(ContainerProtos.Type type, long bytes) { public long getContainerBytesMetrics(ContainerProtos.Type type) { return opsBytesArray[type.ordinal()].value(); } + + public void incContainerDeleteFailedBlockCountNotZero() { + containerDeleteFailedBlockCountNotZero.incr(); + } + public void incContainerDeleteFailedNonEmpty() { + containerDeleteFailedNonEmptyDir.incr(); + } + + public void incContainersForceDelete() { + containerForceDelete.incr(); + } + + public long getContainerDeleteFailedNonEmptyDir() { + return containerDeleteFailedNonEmptyDir.value(); + } + + public long getContainerDeleteFailedBlockCountNotZero() { + return containerDeleteFailedBlockCountNotZero.value(); + } + + public long getContainerForceDelete() { + return containerForceDelete.value(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java index 67a7a1677924..09a9b2f25de0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java @@ -57,6 +57,13 @@ void create(VolumeSet volumeSet, VolumeChoosingPolicy volumeChoosingPolicy, */ void delete() throws StorageContainerException; + /** + * Returns true if container is empty. + * @return true of container is empty + * @throws IOException if was unable to check container status. + */ + boolean isContainerEmpty() throws IOException; + /** * Update the container. * diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index e1ee88114228..f169ddda594a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -308,6 +308,11 @@ public void delete() throws StorageContainerException { } } + @Override + public boolean isContainerEmpty() throws IOException { + return KeyValueContainerUtil.noBlocksInContainer(containerData); + } + @Override public void markContainerForClose() throws StorageContainerException { writeLock(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 1c60109431ed..922a37d4375a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -131,6 +131,7 @@ public class KeyValueHandler extends Handler { private final long maxContainerSize; private final Function byteBufferToByteString; private final boolean validateChunkChecksumData; + private boolean checkIfNoBlockFiles; // A striped lock that is held during container creation. private final Striped containerCreationLocks; @@ -155,6 +156,13 @@ public KeyValueHandler(ConfigurationSource config, } catch (Exception e) { throw new RuntimeException(e); } + + checkIfNoBlockFiles = + conf.getBoolean( + ScmConfigKeys.OZONE_SCM_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE, + ScmConfigKeys. + OZONE_SCM_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT); + maxContainerSize = (long) config.getStorageSize( ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); @@ -1231,19 +1239,41 @@ private void deleteInternal(Container container, boolean force) } // Safety check that the container is empty. // If the container is not empty, it should not be deleted unless the - // container is beinf forcefully deleted (which happens when + // container is being forcefully deleted (which happens when // container is unhealthy or over-replicated). if (container.getContainerData().getBlockCount() != 0) { + metrics.incContainerDeleteFailedBlockCountNotZero(); LOG.error("Received container deletion command for container {} but" + - " the container is not empty.", - container.getContainerData().getContainerID()); + " the container is not empty with blockCount {}", + container.getContainerData().getContainerID(), + container.getContainerData().getBlockCount()); throw new StorageContainerException("Non-force deletion of " + "non-empty container is not allowed.", DELETE_ON_NON_EMPTY_CONTAINER); } + + if (checkIfNoBlockFiles && !container.isContainerEmpty()) { + metrics.incContainerDeleteFailedNonEmpty(); + LOG.error("Received container deletion command for container {} but" + + " the container is not empty", + container.getContainerData().getContainerID()); + throw new StorageContainerException("Non-force deletion of " + + "non-empty container:" + + container.getContainerData().getContainerID() + + " is not allowed.", + DELETE_ON_NON_EMPTY_CONTAINER); + } + } else { + metrics.incContainersForceDelete(); } long containerId = container.getContainerData().getContainerID(); containerSet.removeContainer(containerId); + } catch (IOException e) { + LOG.error("Could not determine if the container {} is empty", + container.getContainerData().getContainerID(), e); + throw new StorageContainerException("Could not determine if container " + + container.getContainerData().getContainerID() + + " is empty", DELETE_ON_NON_EMPTY_CONTAINER); } finally { container.writeUnlock(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java index bc3d96d9b211..6d1a54b2a24a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java @@ -19,6 +19,7 @@ import java.io.File; import java.io.IOException; +import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -159,6 +160,26 @@ public static void removeContainer(KeyValueContainerData containerData, FileUtils.deleteDirectory(containerMetaDataPath.getParentFile()); } + /** + * Returns if there are no blocks in the container. + * @param containerData Container to check + * @param conf configuration + * @return true if the directory containing blocks is empty + * @throws IOException + */ + public static boolean noBlocksInContainer(KeyValueContainerData + containerData) + throws IOException { + Preconditions.checkNotNull(containerData); + File chunksPath = new File(containerData.getChunksPath()); + Preconditions.checkArgument(chunksPath.isDirectory()); + + try (DirectoryStream dir + = Files.newDirectoryStream(chunksPath.toPath())) { + return !dir.iterator().hasNext(); + } + } + /** * Parse KeyValueContainerData and verify checksum. Set block related * metadata like block commit sequence id, block count, bytes used and diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index b3db557b6fdb..a1d159ef353c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -119,6 +119,9 @@ public class OzoneContainer { private DatanodeDetails datanodeDetails; private StateContext context; + + private final ContainerMetrics metrics; + enum InitializingStatus { UNINITIALIZED, INITIALIZING, INITIALIZED } @@ -162,7 +165,7 @@ public OzoneContainer( metadataScanner = null; buildContainerSet(); - final ContainerMetrics metrics = ContainerMetrics.create(conf); + metrics = ContainerMetrics.create(conf); handlers = Maps.newHashMap(); IncrementalReportSender icrSender = container -> { @@ -521,4 +524,8 @@ public MutableVolumeSet getDbVolumeSet() { StorageVolumeChecker getVolumeChecker(ConfigurationSource conf) { return new StorageVolumeChecker(conf, new Timer()); } + + public ContainerMetrics getMetrics() { + return metrics; + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java index e39fe7ffd5f6..79e1ce8957f3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java @@ -34,8 +34,10 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand; @@ -47,6 +49,7 @@ import org.junit.BeforeClass; import org.junit.Test; +import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.UUID; @@ -107,6 +110,120 @@ public static void shutdown() { } } + @Test(timeout = 60000) + public void testDeleteNonEmptyContainerDir() + throws Exception { + // 1. Test if a non force deletion fails if chunks are still present with + // block count set to 0 + // 2. Test if a force deletion passes even if chunks are still present + + //the easiest way to create an open container is creating a key + + String keyName = UUID.randomUUID().toString(); + + // create key + createKey(keyName); + + // get containerID of the key + ContainerID containerId = getContainerID(keyName); + + ContainerInfo container = cluster.getStorageContainerManager() + .getContainerManager().getContainer(containerId); + + Pipeline pipeline = cluster.getStorageContainerManager() + .getPipelineManager().getPipeline(container.getPipelineID()); + + // We need to close the container because delete container only happens + // on closed containers when force flag is set to false. + + HddsDatanodeService hddsDatanodeService = + cluster.getHddsDatanodes().get(0); + + Assert.assertFalse(isContainerClosed(hddsDatanodeService, + containerId.getId())); + + DatanodeDetails datanodeDetails = hddsDatanodeService.getDatanodeDetails(); + + NodeManager nodeManager = + cluster.getStorageContainerManager().getScmNodeManager(); + //send the order to close the container + SCMCommand command = new CloseContainerCommand( + containerId.getId(), pipeline.getId()); + command.setTerm( + cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); + nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command); + + Container containerInternalObj = + hddsDatanodeService. + getDatanodeStateMachine(). + getContainer().getContainerSet().getContainer(containerId.getId()); + + // Write a file to the container chunks directory indicating that there + // might be a discrepancy between block count as recorded in RocksDB and + // what is actually on disk. + File lingeringBlock = + new File(containerInternalObj. + getContainerData().getChunksPath() + "/1.block"); + lingeringBlock.createNewFile(); + ContainerMetrics metrics = + hddsDatanodeService + .getDatanodeStateMachine().getContainer().getMetrics(); + GenericTestUtils.waitFor(() -> + isContainerClosed(hddsDatanodeService, containerId.getId()), + 500, 5 * 1000); + + //double check if it's really closed (waitFor also throws an exception) + Assert.assertTrue(isContainerClosed(hddsDatanodeService, + containerId.getId())); + + // Check container exists before sending delete container command + Assert.assertFalse(isContainerDeleted(hddsDatanodeService, + containerId.getId())); + + // Set container blockCount to 0 to mock that it is empty as per RocksDB + getContainerfromDN(hddsDatanodeService, containerId.getId()) + .getContainerData().setBlockCount(0); + + // send delete container to the datanode + command = new DeleteContainerCommand(containerId.getId(), false); + + // Send the delete command. It should fail as even though block count + // is zero there is a lingering block on disk. + command.setTerm( + cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); + nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command); + + // Deleting a non-empty container should pass on the DN when the force flag + // is true + // Check the log for the error message when deleting non-empty containers + GenericTestUtils.LogCapturer logCapturer = + GenericTestUtils.LogCapturer.captureLogs( + LoggerFactory.getLogger(KeyValueHandler.class)); + GenericTestUtils.waitFor(() -> + logCapturer.getOutput().contains("container is not empty"), + 500, + 5 * 2000); + Assert.assertTrue(!isContainerDeleted(hddsDatanodeService, + containerId.getId())); + Assert.assertEquals(1, + metrics.getContainerDeleteFailedNonEmptyDir()); + // Send the delete command. It should pass with force flag. + long beforeForceCount = metrics.getContainerForceDelete(); + command = new DeleteContainerCommand(containerId.getId(), true); + + command.setTerm( + cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); + nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command); + + GenericTestUtils.waitFor(() -> + isContainerDeleted(hddsDatanodeService, containerId.getId()), + 500, 5 * 1000); + Assert.assertTrue(isContainerDeleted(hddsDatanodeService, + containerId.getId())); + Assert.assertTrue(beforeForceCount < + metrics.getContainerForceDelete()); + } + @Test(timeout = 60000) public void testDeleteContainerRequestHandlerOnClosedContainer() throws Exception { @@ -175,7 +292,11 @@ public void testDeleteContainerRequestHandlerOnClosedContainer() GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains("Non" + "-force deletion of non-empty container is not allowed"), 500, 5 * 1000); - + ContainerMetrics metrics = + hddsDatanodeService + .getDatanodeStateMachine().getContainer().getMetrics(); + Assert.assertEquals(1, + metrics.getContainerDeleteFailedBlockCountNotZero()); // Set container blockCount to 0 to mock that it is empty getContainerfromDN(hddsDatanodeService, containerId.getId()) .getContainerData().setBlockCount(0); From a6e85ecbad837933462b6fd39d83f4b9d471c836 Mon Sep 17 00:00:00 2001 From: Ritesh H Shukla Date: Wed, 8 Mar 2023 22:39:31 -0800 Subject: [PATCH 2/6] Address review comments --- .../hadoop/ozone/container/common/interfaces/Container.java | 2 +- .../hadoop/ozone/container/keyvalue/KeyValueContainer.java | 2 +- .../apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java index 09a9b2f25de0..ef7c4f742051 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java @@ -62,7 +62,7 @@ void create(VolumeSet volumeSet, VolumeChoosingPolicy volumeChoosingPolicy, * @return true of container is empty * @throws IOException if was unable to check container status. */ - boolean isContainerEmpty() throws IOException; + boolean isEmpty() throws IOException; /** * Update the container. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index f169ddda594a..e21664c4d3ee 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -309,7 +309,7 @@ public void delete() throws StorageContainerException { } @Override - public boolean isContainerEmpty() throws IOException { + public boolean isEmpty() throws IOException { return KeyValueContainerUtil.noBlocksInContainer(containerData); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 922a37d4375a..557c2ed35af0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1252,7 +1252,7 @@ private void deleteInternal(Container container, boolean force) DELETE_ON_NON_EMPTY_CONTAINER); } - if (checkIfNoBlockFiles && !container.isContainerEmpty()) { + if (checkIfNoBlockFiles && !container.isEmpty()) { metrics.incContainerDeleteFailedNonEmpty(); LOG.error("Received container deletion command for container {} but" + " the container is not empty", From 39a81856ce097871a4efc47596cd5f6485e0027c Mon Sep 17 00:00:00 2001 From: Ritesh H Shukla Date: Thu, 9 Mar 2023 17:35:02 -0800 Subject: [PATCH 3/6] Fix integration tests and unit tests --- .../org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 6 ------ .../common/src/main/resources/ozone-default.xml | 2 +- .../container/common/helpers/ContainerMetrics.java | 1 + .../common/statemachine/DatanodeConfiguration.java | 6 ++++++ .../ozone/container/keyvalue/KeyValueHandler.java | 14 ++++++++++---- .../container/keyvalue/TestKeyValueHandler.java | 4 +--- .../commandhandler/TestDeleteContainerHandler.java | 12 +++++++++--- 7 files changed, 28 insertions(+), 17 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 283f98fb83d3..279c5e811532 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -582,12 +582,6 @@ public final class ScmConfigKeys { public static final String OZONE_AUDIT_LOG_DEBUG_CMD_LIST_SCMAUDIT = "ozone.audit.log.debug.cmd.list.scmaudit"; - public static final String OZONE_SCM_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE = - "ozone.scm.check.empty.container.delete"; - public static final Boolean - OZONE_SCM_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT = - true; - /** * Never constructed. */ diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 2e39c77bbc8c..f53117d096b8 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3608,7 +3608,7 @@ - ozone.scm.check.empty.container.delete + hdds.datanode.check.empty.container.delete true DATANODE diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java index e77b0031b94f..e20833200396 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.DFSConfigKeysLegacy; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index 74d0d19fafef..43644ab8a043 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -87,6 +87,12 @@ public class DatanodeConfiguration { ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_KEY = "hdds.datanode.rocksdb.delete_obsolete_files_period"; + public static final String OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE = + "hdds.datanode.check.empty.container.delete"; + public static final Boolean + OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT = + true; + /** * Number of threads per volume that Datanode will use for chunk read. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 557c2ed35af0..fbc6b38e561b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -159,9 +159,10 @@ public KeyValueHandler(ConfigurationSource config, checkIfNoBlockFiles = conf.getBoolean( - ScmConfigKeys.OZONE_SCM_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE, - ScmConfigKeys. - OZONE_SCM_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT); + DatanodeConfiguration. + OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE, + DatanodeConfiguration. + OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT); maxContainerSize = (long) config.getStorageSize( ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, @@ -1268,7 +1269,12 @@ private void deleteInternal(Container container, boolean force) } long containerId = container.getContainerData().getContainerID(); containerSet.removeContainer(containerId); - } catch (IOException e) { + } catch (StorageContainerException e) { + throw e; + } + catch (IOException e) { + // All other IO Exceptions should be treated as if the container is not + // empty as a defensive check. LOG.error("Could not determine if the container {} is empty", container.getContainerData().getContainerID(), e); throw new StorageContainerException("Could not determine if container " diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index a249f66467d3..1df7c7a08eac 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -363,9 +363,7 @@ public void testDeleteContainer() throws IOException { Mockito.when(volumeSet.getVolumesList()) .thenReturn(Collections.singletonList(hddsVolume)); - final int[] interval = new int[1]; - interval[0] = 2; - final ContainerMetrics metrics = new ContainerMetrics(interval); + final ContainerMetrics metrics = ContainerMetrics.create(conf); final AtomicInteger icrReceived = new AtomicInteger(0); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java index 79e1ce8957f3..bbcdd9e370e0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.container.common.statemachine.commandhandler; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; @@ -298,9 +299,14 @@ public void testDeleteContainerRequestHandlerOnClosedContainer() Assert.assertEquals(1, metrics.getContainerDeleteFailedBlockCountNotZero()); // Set container blockCount to 0 to mock that it is empty - getContainerfromDN(hddsDatanodeService, containerId.getId()) - .getContainerData().setBlockCount(0); - + Container containerToDelete = getContainerfromDN( + hddsDatanodeService, containerId.getId()); + containerToDelete.getContainerData().setBlockCount(0); + File chunkDir = new File(containerToDelete. + getContainerData().getChunksPath()); + for (File file: chunkDir.listFiles()) { + FileUtils.delete(file); + } // Send the delete command again. It should succeed this time. command.setTerm( cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); From 0428ce66ad5c91a8755ce3ae50e6d7785b3eb53e Mon Sep 17 00:00:00 2001 From: Ritesh H Shukla Date: Thu, 9 Mar 2023 22:57:29 -0800 Subject: [PATCH 4/6] Fix checkstyle --- .../ozone/container/common/helpers/ContainerMetrics.java | 1 - .../common/statemachine/DatanodeConfiguration.java | 3 ++- .../hadoop/ozone/container/keyvalue/KeyValueHandler.java | 3 +-- .../commandhandler/TestDeleteContainerHandler.java | 9 ++++++--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java index e20833200396..e77b0031b94f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java @@ -22,7 +22,6 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.DFSConfigKeysLegacy; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index 43644ab8a043..9f3754c3ecd1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -87,7 +87,8 @@ public class DatanodeConfiguration { ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_KEY = "hdds.datanode.rocksdb.delete_obsolete_files_period"; - public static final String OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE = + public static final String + OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE = "hdds.datanode.check.empty.container.delete"; public static final Boolean OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT = diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index fbc6b38e561b..e3396a17554c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1271,8 +1271,7 @@ private void deleteInternal(Container container, boolean force) containerSet.removeContainer(containerId); } catch (StorageContainerException e) { throw e; - } - catch (IOException e) { + } catch (IOException e) { // All other IO Exceptions should be treated as if the container is not // empty as a defensive check. LOG.error("Could not determine if the container {} is empty", diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java index bbcdd9e370e0..2b319365aaf6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java @@ -301,11 +301,14 @@ public void testDeleteContainerRequestHandlerOnClosedContainer() // Set container blockCount to 0 to mock that it is empty Container containerToDelete = getContainerfromDN( hddsDatanodeService, containerId.getId()); - containerToDelete.getContainerData().setBlockCount(0); + containerToDelete.getContainerData().setBlockCount(0); File chunkDir = new File(containerToDelete. getContainerData().getChunksPath()); - for (File file: chunkDir.listFiles()) { - FileUtils.delete(file); + File[] files = chunkDir.listFiles(); + if (files != null) { + for (File file : files) { + FileUtils.delete(file); + } } // Send the delete command again. It should succeed this time. command.setTerm( From 6e041a2357baf38c73345887046782f43ce3e329 Mon Sep 17 00:00:00 2001 From: Ritesh H Shukla Date: Fri, 10 Mar 2023 00:02:08 -0800 Subject: [PATCH 5/6] Remove extra new line Change-Id: I6e76194de6150e1da286cbbacefccd1659bdb2a5 --- .../src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 279c5e811532..2c80c74f2992 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -581,7 +581,6 @@ public final class ScmConfigKeys { public static final String OZONE_AUDIT_LOG_DEBUG_CMD_LIST_SCMAUDIT = "ozone.audit.log.debug.cmd.list.scmaudit"; - /** * Never constructed. */ From e76e593d8ef31a39758cbb64c4d6e4146fd6528b Mon Sep 17 00:00:00 2001 From: Ritesh H Shukla Date: Fri, 10 Mar 2023 11:12:50 -0800 Subject: [PATCH 6/6] The config does not need to be part of ozone-default.xml Change-Id: If410fa45b19cb22b9e3e56a4203be07a4dd36d4a --- hadoop-hdds/common/src/main/resources/ozone-default.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index f53117d096b8..f525478e8f27 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3607,12 +3607,4 @@ without using the optimised DAG based pruning approach - - hdds.datanode.check.empty.container.delete - true - DATANODE - - Disallow deletion of containers if on disk there are chunks still present. - -