diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java index e77b0031b94f..e717da65ff7f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java @@ -52,6 +52,7 @@ public class ContainerMetrics { @Metric private MutableCounterLong containerDeleteFailedNonEmptyDir; @Metric private MutableCounterLong containerDeleteFailedBlockCountNotZero; @Metric private MutableCounterLong containerForceDelete; + @Metric private MutableCounterLong containerDeleteFailedNonEmptyBlockDB; private MutableCounterLong[] numOpsArray; private MutableCounterLong[] opsBytesArray; @@ -152,4 +153,12 @@ public long getContainerDeleteFailedBlockCountNotZero() { public long getContainerForceDelete() { return containerForceDelete.value(); } + + public void incContainerDeleteFailedNonEmptyBlocksDB() { + containerDeleteFailedNonEmptyBlockDB.incr(); + } + + public long getContainerDeleteFailedNonEmptyBlockDB() { + return containerDeleteFailedNonEmptyBlockDB.value(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index e3396a17554c..c50e9bf67f0f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -24,6 +24,9 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; @@ -42,7 +45,6 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.GetSmallFileRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.KeyValue; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.PutSmallFileRequestProto; @@ -64,7 +66,9 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.interfaces.Handler; import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.report.IncrementalReportSender; @@ -124,14 +128,13 @@ public class KeyValueHandler extends Handler { private static final Logger LOG = LoggerFactory.getLogger( KeyValueHandler.class); - private final ContainerType containerType; private final BlockManager blockManager; private final ChunkManager chunkManager; private final VolumeChoosingPolicy volumeChoosingPolicy; private final long maxContainerSize; private final Function byteBufferToByteString; private final boolean validateChunkChecksumData; - private boolean checkIfNoBlockFiles; + private final boolean checkIfNoBlockFiles; // A striped lock that is held during container creation. private final Striped containerCreationLocks; @@ -143,7 +146,6 @@ public KeyValueHandler(ConfigurationSource config, ContainerMetrics metrics, IncrementalReportSender icrSender) { super(config, datanodeId, contSet, volSet, metrics, icrSender); - containerType = ContainerType.KeyValueContainer; blockManager = new BlockManagerImpl(config); validateChunkChecksumData = conf.getObject( DatanodeConfiguration.class).isChunkDataValidationCheck(); @@ -398,11 +400,10 @@ ContainerCommandResponseProto handleCreateContainer( private void populateContainerPathFields(KeyValueContainer container, HddsVolume hddsVolume) throws IOException { volumeSet.readLock(); - HddsVolume containerVolume = hddsVolume; try { String idDir = VersionedDatanodeFeatures.ScmHA.chooseContainerPathID( - containerVolume, clusterId); - container.populatePathFields(idDir, containerVolume); + hddsVolume, clusterId); + container.populatePathFields(idDir, hddsVolume); } finally { volumeSet.readUnlock(); } @@ -659,8 +660,8 @@ ContainerCommandResponseProto handleListBlock( } List responseData = blockManager.listBlock(kvContainer, startLocalId, count); - for (int i = 0; i < responseData.size(); i++) { - returnData.add(responseData.get(i).getProtoBufMessage()); + for (BlockData responseDatum : responseData) { + returnData.add(responseDatum.getProtoBufMessage()); } } catch (StorageContainerException ex) { return ContainerUtils.logAndReturnError(LOG, ex, request); @@ -1226,11 +1227,64 @@ private String[] getFilesWithPrefix(String prefix, File chunkDir) { return chunkDir.list(filter); } + private boolean logBlocksIfNonZero(Container container) + throws IOException { + boolean nonZero = false; + try (DBHandle dbHandle + = BlockUtils.getDB( + (KeyValueContainerData) container.getContainerData(), + conf)) { + BlockIterator + blockIterator = dbHandle.getStore(). + getBlockIterator(container.getContainerData().getContainerID()); + StringBuilder stringBuilder = new StringBuilder(); + while (blockIterator.hasNext()) { + nonZero = true; + stringBuilder.append(blockIterator.nextBlock()); + if (stringBuilder.length() > StorageUnit.KB.toBytes(32)) { + break; + } + } + if (nonZero) { + LOG.error("blocks in rocksDB on container delete: {}", + stringBuilder.toString()); + } + } + return nonZero; + } + + private boolean logBlocksFoundOnDisk(Container container) throws IOException { + // List files left over + File chunksPath = new + File(container.getContainerData().getChunksPath()); + Preconditions.checkArgument(chunksPath.isDirectory()); + boolean notEmpty = false; + try (DirectoryStream dir + = Files.newDirectoryStream(chunksPath.toPath())) { + StringBuilder stringBuilder = new StringBuilder(); + for (Path block : dir) { + if (notEmpty) { + stringBuilder.append(","); + } + stringBuilder.append(block); + notEmpty = true; + if (stringBuilder.length() > StorageUnit.KB.toBytes(16)) { + break; + } + } + if (notEmpty) { + LOG.error("Files still part of the container on delete: {}", + stringBuilder.toString()); + } + } + return notEmpty; + } + private void deleteInternal(Container container, boolean force) throws StorageContainerException { container.writeLock(); try { - // If force is false, we check container state. + // If force is false, we check container state. if (!force) { // Check if container is open if (container.getContainerData().isOpen()) { @@ -1245,7 +1299,7 @@ private void deleteInternal(Container container, boolean force) if (container.getContainerData().getBlockCount() != 0) { metrics.incContainerDeleteFailedBlockCountNotZero(); LOG.error("Received container deletion command for container {} but" + - " the container is not empty with blockCount {}", + " the container is not empty with blockCount {}", container.getContainerData().getContainerID(), container.getContainerData().getBlockCount()); throw new StorageContainerException("Non-force deletion of " + @@ -1253,16 +1307,52 @@ private void deleteInternal(Container container, boolean force) DELETE_ON_NON_EMPTY_CONTAINER); } - if (checkIfNoBlockFiles && !container.isEmpty()) { - metrics.incContainerDeleteFailedNonEmpty(); - LOG.error("Received container deletion command for container {} but" + - " the container is not empty", - container.getContainerData().getContainerID()); - throw new StorageContainerException("Non-force deletion of " + - "non-empty container:" + - container.getContainerData().getContainerID() + - " is not allowed.", - DELETE_ON_NON_EMPTY_CONTAINER); + // This is a defensive check to make sure there is no data loss if + // 1. There are one or more blocks on the filesystem + // 2. There are one or more blocks in the block table + // This can lead to false positives as + // 1. Chunks written to disk that did not get recorded in RocksDB can + // occur due to failures during write + // 2. Blocks that were deleted from blocks table but the deletion of + // the underlying file could not be completed + // 3. Failures between files being deleted from disk but not being + // cleaned up. + // 4. Bugs in the code. + // Blocks stored on disk represent data written by a client and should + // be treated with care at the expense of creating artifacts on disk + // that might be unreferenced. + // https://issues.apache.org/jira/browse/HDDS-8138 will move the + // implementation to only depend on consistency of the chunks folder + + // First check if any files are in the chunks folder. If there are + // to help with debugging also dump the blocks table data. + if (checkIfNoBlockFiles) { + if (!container.isEmpty()) { + metrics.incContainerDeleteFailedNonEmpty(); + logBlocksFoundOnDisk(container); + logBlocksIfNonZero(container); + // List Blocks from Blocks Table + throw new StorageContainerException("Non-force deletion of " + + "non-empty container dir:" + + container.getContainerData().getContainerID() + + " is not allowed.", + DELETE_ON_NON_EMPTY_CONTAINER); + } + + // The chunks folder is empty, not check if the blocks table has any + // blocks still referenced. This will avoid cleaning up the + // blocks table for future debugging. + // List rocks + if (logBlocksIfNonZero(container)) { + LOG.error("Non-empty blocks table for container {}", + container.getContainerData().getContainerID()); + metrics.incContainerDeleteFailedNonEmptyBlocksDB(); + throw new StorageContainerException("Non-force deletion of " + + "non-empty container block table:" + + container.getContainerData().getContainerID() + + " is not allowed.", + DELETE_ON_NON_EMPTY_CONTAINER); + } } } else { metrics.incContainersForceDelete(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java index 2b319365aaf6..282c4e7f1462 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java @@ -19,6 +19,7 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; @@ -29,16 +30,22 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand; @@ -53,6 +60,7 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.UUID; import org.junit.Rule; @@ -194,20 +202,171 @@ public void testDeleteNonEmptyContainerDir() cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command); + // Check the log for the error message when deleting non-empty containers + GenericTestUtils.LogCapturer logCapturer = + GenericTestUtils.LogCapturer.captureLogs( + LoggerFactory.getLogger(KeyValueHandler.class)); + GenericTestUtils.waitFor(() -> + logCapturer.getOutput(). + contains("Files still part of the container on delete"), + 500, + 5 * 2000); + Assert.assertTrue(!isContainerDeleted(hddsDatanodeService, + containerId.getId())); + Assert.assertEquals(1, + metrics.getContainerDeleteFailedNonEmptyDir()); + // Send the delete command. It should pass with force flag. // Deleting a non-empty container should pass on the DN when the force flag // is true + long beforeForceCount = metrics.getContainerForceDelete(); + command = new DeleteContainerCommand(containerId.getId(), true); + + command.setTerm( + cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); + nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command); + + GenericTestUtils.waitFor(() -> + isContainerDeleted(hddsDatanodeService, containerId.getId()), + 500, 5 * 1000); + Assert.assertTrue(isContainerDeleted(hddsDatanodeService, + containerId.getId())); + Assert.assertTrue(beforeForceCount < + metrics.getContainerForceDelete()); + } + + @Test(timeout = 60000) + public void testDeleteNonEmptyContainerBlockTable() + throws Exception { + // 1. Test if a non force deletion fails if chunks are still present with + // block count set to 0 + // 2. Test if a force deletion passes even if chunks are still present + //the easiest way to create an open container is creating a key + String keyName = UUID.randomUUID().toString(); + // create key + createKey(keyName); + // get containerID of the key + ContainerID containerId = getContainerID(keyName); + ContainerInfo container = cluster.getStorageContainerManager() + .getContainerManager().getContainer(containerId); + Pipeline pipeline = cluster.getStorageContainerManager() + .getPipelineManager().getPipeline(container.getPipelineID()); + + // We need to close the container because delete container only happens + // on closed containers when force flag is set to false. + + HddsDatanodeService hddsDatanodeService = + cluster.getHddsDatanodes().get(0); + + Assert.assertFalse(isContainerClosed(hddsDatanodeService, + containerId.getId())); + + DatanodeDetails datanodeDetails = hddsDatanodeService.getDatanodeDetails(); + + NodeManager nodeManager = + cluster.getStorageContainerManager().getScmNodeManager(); + //send the order to close the container + SCMCommand command = new CloseContainerCommand( + containerId.getId(), pipeline.getId()); + command.setTerm( + cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); + nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command); + + Container containerInternalObj = + hddsDatanodeService. + getDatanodeStateMachine(). + getContainer().getContainerSet().getContainer(containerId.getId()); + + // Write a file to the container chunks directory indicating that there + // might be a discrepancy between block count as recorded in RocksDB and + // what is actually on disk. + File lingeringBlock = + new File(containerInternalObj. + getContainerData().getChunksPath() + "/1.block"); + lingeringBlock.createNewFile(); + ContainerMetrics metrics = + hddsDatanodeService + .getDatanodeStateMachine().getContainer().getMetrics(); + GenericTestUtils.waitFor(() -> + isContainerClosed(hddsDatanodeService, containerId.getId()), + 500, 5 * 1000); + + //double check if it's really closed (waitFor also throws an exception) + Assert.assertTrue(isContainerClosed(hddsDatanodeService, + containerId.getId())); + + // Check container exists before sending delete container command + Assert.assertFalse(isContainerDeleted(hddsDatanodeService, + containerId.getId())); + + // Set container blockCount to 0 to mock that it is empty as per RocksDB + getContainerfromDN(hddsDatanodeService, containerId.getId()) + .getContainerData().setBlockCount(0); + // Write entries to the block Table. + try (DBHandle dbHandle + = BlockUtils.getDB( + (KeyValueContainerData)getContainerfromDN(hddsDatanodeService, + containerId.getId()).getContainerData(), + conf)) { + BlockData blockData = new BlockData(new BlockID(1, 1)); + dbHandle.getStore().getBlockDataTable().put("block1", blockData); + } + + long containerDeleteFailedNonEmptyBefore = + metrics.getContainerDeleteFailedNonEmptyDir(); + // send delete container to the datanode + command = new DeleteContainerCommand(containerId.getId(), false); + + // Send the delete command. It should fail as even though block count + // is zero there is a lingering block on disk. + command.setTerm( + cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); + nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command); + + // Check the log for the error message when deleting non-empty containers GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs( LoggerFactory.getLogger(KeyValueHandler.class)); GenericTestUtils.waitFor(() -> - logCapturer.getOutput().contains("container is not empty"), + logCapturer.getOutput(). + contains("Files still part of the container on delete"), 500, 5 * 2000); Assert.assertTrue(!isContainerDeleted(hddsDatanodeService, containerId.getId())); - Assert.assertEquals(1, + Assert.assertTrue(containerDeleteFailedNonEmptyBefore < metrics.getContainerDeleteFailedNonEmptyDir()); + + // Now empty the container Dir and try with a non empty block table + Container containerToDelete = getContainerfromDN( + hddsDatanodeService, containerId.getId()); + File chunkDir = new File(containerToDelete. + getContainerData().getChunksPath()); + File[] files = chunkDir.listFiles(); + if (files != null) { + for (File file : files) { + FileUtils.delete(file); + } + } + command = new DeleteContainerCommand(containerId.getId(), false); + + // Send the delete command. It should fail as even though block count + // is zero there is a lingering block on disk. + command.setTerm( + cluster.getStorageContainerManager().getScmContext().getTermOfLeader()); + nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command); + + + // Check the log for the error message when deleting non-empty containers + GenericTestUtils.waitFor(() -> + logCapturer.getOutput(). + contains("Non-empty blocks table for container"), + 500, + 5 * 2000); + Assert.assertTrue(!isContainerDeleted(hddsDatanodeService, + containerId.getId())); + Assert.assertEquals(1, + metrics.getContainerDeleteFailedNonEmptyBlockDB()); // Send the delete command. It should pass with force flag. long beforeForceCount = metrics.getContainerForceDelete(); command = new DeleteContainerCommand(containerId.getId(), true); @@ -225,6 +384,31 @@ public void testDeleteNonEmptyContainerDir() metrics.getContainerForceDelete()); } + private void clearBlocksTable(Container container) throws IOException { + try (DBHandle dbHandle + = BlockUtils.getDB( + (KeyValueContainerData) container.getContainerData(), + conf)) { + List> + blocks = dbHandle.getStore().getBlockDataTable().getRangeKVs( + ((KeyValueContainerData) container.getContainerData()). + startKeyEmpty(), + Integer.MAX_VALUE, + ((KeyValueContainerData) container.getContainerData()). + containerPrefix(), + ((KeyValueContainerData) container.getContainerData()). + getUnprefixedKeyFilter()); + try (BatchOperation batch = dbHandle.getStore().getBatchHandler() + .initBatchOperation()) { + for (Table.KeyValue kv : blocks) { + String blk = kv.getKey(); + dbHandle.getStore().getBlockDataTable().deleteWithBatch(batch, blk); + } + dbHandle.getStore().getBatchHandler().commitBatchOperation(batch); + } + } + } + @Test(timeout = 60000) public void testDeleteContainerRequestHandlerOnClosedContainer() throws Exception { @@ -310,6 +494,8 @@ public void testDeleteContainerRequestHandlerOnClosedContainer() FileUtils.delete(file); } } + clearBlocksTable(containerToDelete); + // Send the delete command again. It should succeed this time. command.setTerm( cluster.getStorageContainerManager().getScmContext().getTermOfLeader());