Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ public class ContainerMetrics {
public static final String STORAGE_CONTAINER_METRICS =
"StorageContainerMetrics";
@Metric private MutableCounterLong numOps;
@Metric private MutableCounterLong containerDeleteFailedNonEmptyDir;
@Metric private MutableCounterLong containerDeleteFailedBlockCountNotZero;
@Metric private MutableCounterLong containerForceDelete;

private MutableCounterLong[] numOpsArray;
private MutableCounterLong[] opsBytesArray;
private MutableRate[] opsLatency;
Expand Down Expand Up @@ -125,4 +129,27 @@ public void incContainerBytesStats(ContainerProtos.Type type, long bytes) {
public long getContainerBytesMetrics(ContainerProtos.Type type) {
return opsBytesArray[type.ordinal()].value();
}

public void incContainerDeleteFailedBlockCountNotZero() {
containerDeleteFailedBlockCountNotZero.incr();
}
public void incContainerDeleteFailedNonEmpty() {
containerDeleteFailedNonEmptyDir.incr();
}

public void incContainersForceDelete() {
containerForceDelete.incr();
}

public long getContainerDeleteFailedNonEmptyDir() {
return containerDeleteFailedNonEmptyDir.value();
}

public long getContainerDeleteFailedBlockCountNotZero() {
return containerDeleteFailedBlockCountNotZero.value();
}

public long getContainerForceDelete() {
return containerForceDelete.value();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ void create(VolumeSet volumeSet, VolumeChoosingPolicy volumeChoosingPolicy,
*/
void delete() throws StorageContainerException;

/**
* Returns true if container is empty.
* @return true of container is empty
* @throws IOException if was unable to check container status.
*/
boolean isEmpty() throws IOException;

/**
* Update the container.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ public class DatanodeConfiguration {
ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_KEY =
"hdds.datanode.rocksdb.delete_obsolete_files_period";

public static final String
OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE =
"hdds.datanode.check.empty.container.delete";
public static final Boolean
OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT =
true;

/**
* Number of threads per volume that Datanode will use for chunk read.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,11 @@ public void delete() throws StorageContainerException {
}
}

@Override
public boolean isEmpty() throws IOException {
return KeyValueContainerUtil.noBlocksInContainer(containerData);
}

@Override
public void markContainerForClose() throws StorageContainerException {
writeLock();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ public class KeyValueHandler extends Handler {
private final long maxContainerSize;
private final Function<ByteBuffer, ByteString> byteBufferToByteString;
private final boolean validateChunkChecksumData;
private boolean checkIfNoBlockFiles;

// A striped lock that is held during container creation.
private final Striped<Lock> containerCreationLocks;
Expand All @@ -155,6 +156,14 @@ public KeyValueHandler(ConfigurationSource config,
} catch (Exception e) {
throw new RuntimeException(e);
}

checkIfNoBlockFiles =
conf.getBoolean(
DatanodeConfiguration.
OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE,
DatanodeConfiguration.
OZONE_DATANODE_CHECK_EMPTY_CONTAINER_ON_DISK_ON_DELETE_DEFAULT);

maxContainerSize = (long) config.getStorageSize(
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
Expand Down Expand Up @@ -1231,19 +1240,45 @@ private void deleteInternal(Container container, boolean force)
}
// Safety check that the container is empty.
// If the container is not empty, it should not be deleted unless the
// container is beinf forcefully deleted (which happens when
// container is being forcefully deleted (which happens when
// container is unhealthy or over-replicated).
if (container.getContainerData().getBlockCount() != 0) {
metrics.incContainerDeleteFailedBlockCountNotZero();
LOG.error("Received container deletion command for container {} but" +
" the container is not empty.",
container.getContainerData().getContainerID());
" the container is not empty with blockCount {}",
container.getContainerData().getContainerID(),
container.getContainerData().getBlockCount());
throw new StorageContainerException("Non-force deletion of " +
"non-empty container is not allowed.",
DELETE_ON_NON_EMPTY_CONTAINER);
}

if (checkIfNoBlockFiles && !container.isEmpty()) {
metrics.incContainerDeleteFailedNonEmpty();
LOG.error("Received container deletion command for container {} but" +
" the container is not empty",
container.getContainerData().getContainerID());
throw new StorageContainerException("Non-force deletion of " +
"non-empty container:" +
container.getContainerData().getContainerID() +
" is not allowed.",
DELETE_ON_NON_EMPTY_CONTAINER);
}
} else {
metrics.incContainersForceDelete();
}
long containerId = container.getContainerData().getContainerID();
containerSet.removeContainer(containerId);
} catch (StorageContainerException e) {
throw e;
} catch (IOException e) {
// All other IO Exceptions should be treated as if the container is not
// empty as a defensive check.
LOG.error("Could not determine if the container {} is empty",
container.getContainerData().getContainerID(), e);
throw new StorageContainerException("Could not determine if container "
+ container.getContainerData().getContainerID() +
" is empty", DELETE_ON_NON_EMPTY_CONTAINER);
} finally {
container.writeUnlock();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import java.io.File;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand Down Expand Up @@ -159,6 +160,26 @@ public static void removeContainer(KeyValueContainerData containerData,
FileUtils.deleteDirectory(containerMetaDataPath.getParentFile());
}

/**
* Returns if there are no blocks in the container.
* @param containerData Container to check
* @param conf configuration
* @return true if the directory containing blocks is empty
* @throws IOException
*/
public static boolean noBlocksInContainer(KeyValueContainerData
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want to check RocksDB and not chunks on disk. The latter may always be present due to orphaned writes. We only care about committed blocks which have entries in RocksDB.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The likelihood of having blocks written to disk but not in DN RocksDB will go down with https://issues.apache.org/jira/browse/HDDS-8117
We will still have the issue of having blocks on DN that are not on OM which needs to be dealt with separately.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should check RocksDB in this PR and switch to using the directory check as part of HDDS-8117, or maybe wait until after HDDS-8138. Otherwise we may get some alarming false positives. In order to tell whether the exception is a false positive or not we would need to check RocksDB anyways, so I don't think the error message is useful without a RocksDB check given how the rest of the code works right now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The read data is the block. We should work towards making sure blocks are written only if valid, and if they are on disk, we should err towards not cleaning them up. Even with HDDS-8138, I would like to keep this check in place. We can get HDDS-8117 in but while that is being worked on we should check in this fix to avoid any data loss which is the main motivation for this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After offline discussion, we decided to keep this PR as is, since it is better than no check at all. A follow up PR will be coming shortly that will add a RocksDB check and log message here as well to help with debugging.

containerData)
throws IOException {
Preconditions.checkNotNull(containerData);
File chunksPath = new File(containerData.getChunksPath());
Preconditions.checkArgument(chunksPath.isDirectory());

try (DirectoryStream<Path> dir
= Files.newDirectoryStream(chunksPath.toPath())) {
return !dir.iterator().hasNext();
}
}

/**
* Parse KeyValueContainerData and verify checksum. Set block related
* metadata like block commit sequence id, block count, bytes used and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ public class OzoneContainer {
private DatanodeDetails datanodeDetails;
private StateContext context;


private final ContainerMetrics metrics;

enum InitializingStatus {
UNINITIALIZED, INITIALIZING, INITIALIZED
}
Expand Down Expand Up @@ -162,7 +165,7 @@ public OzoneContainer(
metadataScanner = null;

buildContainerSet();
final ContainerMetrics metrics = ContainerMetrics.create(conf);
metrics = ContainerMetrics.create(conf);
handlers = Maps.newHashMap();

IncrementalReportSender<Container> icrSender = container -> {
Expand Down Expand Up @@ -521,4 +524,8 @@ public MutableVolumeSet getDbVolumeSet() {
StorageVolumeChecker getVolumeChecker(ConfigurationSource conf) {
return new StorageVolumeChecker(conf, new Timer());
}

public ContainerMetrics getMetrics() {
return metrics;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,7 @@ public void testDeleteContainer() throws IOException {
Mockito.when(volumeSet.getVolumesList())
.thenReturn(Collections.singletonList(hddsVolume));

final int[] interval = new int[1];
interval[0] = 2;
final ContainerMetrics metrics = new ContainerMetrics(interval);
final ContainerMetrics metrics = ContainerMetrics.create(conf);

final AtomicInteger icrReceived = new AtomicInteger(0);

Expand Down
Loading