Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ private DiskCheckUtil() { }
// to inject failures.
private static DiskChecks impl = new DiskChecksImpl();

/** Enum for disk read/write status during volume check. */
public enum ReadWriteStatus {
WRITE_FAIL, READ_FAIL, READ_WRITE_OK
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This naming is more consistent with the container scanner, which uses ScanResult. FWIW we moved the scan result to a separate class in the reconciliation branch. Volume scan is simpler than container scan though so I think enum is good for now.

Suggested change
public enum ReadWriteStatus {
WRITE_FAIL, READ_FAIL, READ_WRITE_OK
}
public enum DiskCheckResult {
WRITE_FAILURE, READ_FAILURE, OK
}


@VisibleForTesting
public static void setTestImpl(DiskChecks diskChecks) {
impl = diskChecks;
Expand All @@ -57,11 +62,11 @@ public static boolean checkExistence(File storageDir) {
return impl.checkExistence(storageDir);
}

public static boolean checkPermissions(File storageDir) {
public static ReadWriteStatus checkPermissions(File storageDir) {
return impl.checkPermissions(storageDir);
}

public static boolean checkReadWrite(File storageDir, File testFileDir,
public static ReadWriteStatus checkReadWrite(File storageDir, File testFileDir,
int numBytesToWrite) {
return impl.checkReadWrite(storageDir, testFileDir, numBytesToWrite);
}
Expand All @@ -75,12 +80,12 @@ public interface DiskChecks {
default boolean checkExistence(File storageDir) {
return true;
}
default boolean checkPermissions(File storageDir) {
return true;
default ReadWriteStatus checkPermissions(File storageDir) {
return ReadWriteStatus.READ_WRITE_OK;
}
default boolean checkReadWrite(File storageDir, File testFileDir,
default ReadWriteStatus checkReadWrite(File storageDir, File testFileDir,
int numBytesToWrite) {
return true;
return ReadWriteStatus.READ_WRITE_OK;
}
}

Expand All @@ -105,31 +110,29 @@ public boolean checkExistence(File diskDir) {
}

@Override
public boolean checkPermissions(File storageDir) {
public ReadWriteStatus checkPermissions(File storageDir) {
// Check all permissions on the volume. If there are multiple permission
// errors, count it as one failure so the admin can fix them all at once.
boolean permissionsCorrect = true;
if (!storageDir.canRead()) {
logError(storageDir,
"Datanode does not have read permission on volume.");
permissionsCorrect = false;
}
if (!storageDir.canWrite()) {
logError(storageDir,
"Datanode does not have write permission on volume.");
permissionsCorrect = false;
return ReadWriteStatus.READ_FAIL;
}
if (!storageDir.canExecute()) {
logError(storageDir, "Datanode does not have execute" +
"permission on volume.");
permissionsCorrect = false;
return ReadWriteStatus.READ_FAIL;
}

return permissionsCorrect;
if (!storageDir.canWrite()) {
logError(storageDir,
"Datanode does not have write permission on volume.");
return ReadWriteStatus.WRITE_FAIL;
}
return ReadWriteStatus.READ_WRITE_OK;
}

@Override
public boolean checkReadWrite(File storageDir,
public ReadWriteStatus checkReadWrite(File storageDir,
File testFileDir, int numBytesToWrite) {
File testFile = new File(testFileDir, "disk-check-" + UUID.randomUUID());
byte[] writtenBytes = new byte[numBytesToWrite];
Expand All @@ -140,15 +143,15 @@ public boolean checkReadWrite(File storageDir,
} catch (FileNotFoundException notFoundEx) {
logError(storageDir, String.format("Could not find file %s for " +
"volume check.", testFile.getAbsolutePath()), notFoundEx);
return false;
return ReadWriteStatus.WRITE_FAIL;
} catch (SyncFailedException syncEx) {
logError(storageDir, String.format("Could sync file %s to disk.",
testFile.getAbsolutePath()), syncEx);
return false;
return ReadWriteStatus.WRITE_FAIL;
} catch (IOException ioEx) {
logError(storageDir, String.format("Could not write file %s " +
"for volume check.", testFile.getAbsolutePath()), ioEx);
return false;
return ReadWriteStatus.WRITE_FAIL;
}

// Read data back from the test file.
Expand All @@ -159,35 +162,35 @@ public boolean checkReadWrite(File storageDir,
logError(storageDir, String.format("%d bytes written to file %s " +
"but %d bytes were read back.", numBytesToWrite,
testFile.getAbsolutePath(), numBytesRead));
return false;
return ReadWriteStatus.READ_FAIL;
}
} catch (FileNotFoundException notFoundEx) {
logError(storageDir, String.format("Could not find file %s " +
"for volume check.", testFile.getAbsolutePath()), notFoundEx);
return false;
return ReadWriteStatus.READ_FAIL;
} catch (IOException ioEx) {
logError(storageDir, String.format("Could not read file %s " +
"for volume check.", testFile.getAbsolutePath()), ioEx);
return false;
return ReadWriteStatus.READ_FAIL;
}

// Check that test file has the expected content.
if (!Arrays.equals(writtenBytes, readBytes)) {
logError(storageDir, String.format("%d Bytes read from file " +
"%s do not match the %d bytes that were written.",
writtenBytes.length, testFile.getAbsolutePath(), readBytes.length));
return false;
return ReadWriteStatus.READ_FAIL;
}

// Delete the file.
if (!testFile.delete()) {
logError(storageDir, String.format("Could not delete file %s " +
"for volume check.", testFile.getAbsolutePath()));
return false;
return ReadWriteStatus.WRITE_FAIL;
}

// If all checks passed, the volume is healthy.
return true;
return ReadWriteStatus.READ_WRITE_OK;
}

private void logError(File storageDir, String message) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,13 @@ public HddsVolume chooseVolume(List<HddsVolume> volumes,
throw new DiskOutOfSpaceException("No more available volumes");
}

List<HddsVolume> volumesWithWriteAllowed =
volumes.stream().filter(k -> k.getStorageState() != StorageVolume.VolumeState.READ_ONLY)
.collect(Collectors.toList());

AvailableSpaceFilter filter = new AvailableSpaceFilter(maxContainerSize);

List<HddsVolume> volumesWithEnoughSpace = volumes.stream()
List<HddsVolume> volumesWithEnoughSpace = volumesWithWriteAllowed.stream()
.filter(filter)
.collect(Collectors.toList());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please combine the streams, apply both filters to the same stream. (Or move the condition to AvailableSpaceFilter.)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,16 @@ public void checkAllVolumes(StorageVolumeChecker checker)
Set<? extends StorageVolume> failedVolumes;
try {
failedVolumes = checker.checkAllVolumes(allVolumes);
if (failedVolumes.size() > 0) {
LOG.warn("checkAllVolumes got {} failed volumes - {}",
failedVolumes.size(), failedVolumes);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted while running disk check", e);
}

if (failedVolumes.size() > 0) {
LOG.warn("checkAllVolumes got {} failed volumes - {}",
failedVolumes.size(), failedVolumes);
if (failedVolumeMap.size() > 0 || failedVolumes.size() > 0) {
handleVolumeFailures(failedVolumes);
} else {
LOG.debug("checkAllVolumes encountered no failures");
Expand Down Expand Up @@ -482,7 +484,7 @@ public StorageLocationReport[] getStorageReport() {
rootDir = volumeInfo.get().getRootDir();
SpaceUsageSource usage = volumeInfo.get().getCurrentUsage();
scmUsed = usage.getUsedSpace();
remaining = usage.getAvailable();
remaining = volume.getStorageState() == HddsVolume.VolumeState.READ_ONLY ? 0 : usage.getAvailable();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will remaining "0" confuse the usage statistics on Recon and SCM UI? Maybe we would consider including the READ only state in storage report.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes this is good suggestion. Currently in SCM usage CLI it shows aggregated DN space usage including all volume. We can show count of READ only volume.
Can we do this CLI statistics report change in another Jira?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs proto change to include volume state in report. Can be done via another JIRA to include volume status also.

capacity = usage.getCapacity();
committed = (volume instanceof HddsVolume) ?
((HddsVolume) volume).getCommittedBytes() : 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.io.IOException;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

import static org.apache.hadoop.ozone.container.common.volume.VolumeChoosingUtil.logIfSomeVolumesOutOfSpace;
import static org.apache.hadoop.ozone.container.common.volume.VolumeChoosingUtil.throwDiskOutOfSpace;
Expand All @@ -51,6 +52,10 @@ public HddsVolume chooseVolume(List<HddsVolume> volumes,
throw new DiskOutOfSpaceException("No more available volumes");
}

List<HddsVolume> volumesWithWriteAllowed =
volumes.stream().filter(k -> k.getStorageState() != StorageVolume.VolumeState.READ_ONLY)
.collect(Collectors.toList());

AvailableSpaceFilter filter = new AvailableSpaceFilter(maxContainerSize);

// since volumes could've been removed because of the failure
Expand All @@ -61,7 +66,7 @@ public HddsVolume chooseVolume(List<HddsVolume> volumes,
int startVolumeIndex = currentVolumeIndex;

while (true) {
final HddsVolume volume = volumes.get(currentVolumeIndex);
final HddsVolume volume = volumesWithWriteAllowed.get(currentVolumeIndex);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

currentVolumeIndex is calculated based on volumes.size(), so this could result in IndexOutOfBoundsException.

Volumes should not be eagerly pre-filtered, they can be filtered in the loop.

// adjust for remaining capacity in Open containers
boolean hasEnoughSpace = filter.test(volume);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ public enum VolumeState {
NON_EXISTENT,
INCONSISTENT,
NOT_FORMATTED,
NOT_INITIALIZED
NOT_INITIALIZED,
READ_ONLY
}

private volatile VolumeState state;
Expand Down Expand Up @@ -599,35 +600,47 @@ private void cleanTmpDiskCheckDir() {
@Override
public synchronized VolumeCheckResult check(@Nullable Boolean unused)
throws Exception {
DiskCheckUtil.ReadWriteStatus readWriteStatus = DiskCheckUtil.checkPermissions(storageDir);

boolean directoryChecksPassed =
DiskCheckUtil.checkExistence(storageDir) &&
DiskCheckUtil.checkPermissions(storageDir);
DiskCheckUtil.checkExistence(storageDir);
// If the directory is not present or has incorrect permissions, fail the
// volume immediately. This is not an intermittent error.
if (!directoryChecksPassed) {
if (!directoryChecksPassed || readWriteStatus == DiskCheckUtil.ReadWriteStatus.READ_FAIL) {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException("Directory check of volume " + this +
" interrupted.");
}
return VolumeCheckResult.FAILED;
}

if (readWriteStatus == DiskCheckUtil.ReadWriteStatus.WRITE_FAIL) {
setState(VolumeState.READ_ONLY);
Copy link
Contributor

@ChenSammi ChenSammi Feb 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ashishkumar50 , thanks for working on this.
Based on above checkReadWrite() implementation, it looks like WRITE_FAIL doesn't guarantee read will pass.

Back to the container schema V2 age, when disk is full, it's not able to read blocks because rocksdb cannot be opened due to disk full. Now container V3 is used, thing will be better. But it's not sure whether read block can succeed or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For READ failure we are already checking in container meta scanner which runs every 3 hours, That will take care of marking container unhealthy if rocksdb doesn't open or read doesn't pass through.

return VolumeCheckResult.HEALTHY;
}

// If IO test count is set to 0, IO tests for disk health are disabled.
if (ioTestCount == 0) {
return VolumeCheckResult.HEALTHY;
}

// Since IO errors may be intermittent, volume remains healthy until the
// threshold of failures is crossed.
boolean diskChecksPassed = DiskCheckUtil.checkReadWrite(storageDir,
readWriteStatus = DiskCheckUtil.checkReadWrite(storageDir,
diskCheckDir, healthCheckFileSize);
if (readWriteStatus == DiskCheckUtil.ReadWriteStatus.WRITE_FAIL) {
// Mark volume as READ only
setState(VolumeState.READ_ONLY);
return VolumeCheckResult.HEALTHY;
}
if (Thread.currentThread().isInterrupted()) {
// Thread interrupt may have caused IO operations to abort. Do not
// consider this a failure.
throw new InterruptedException("IO check of volume " + this +
" interrupted.");
}

boolean diskChecksPassed = readWriteStatus == DiskCheckUtil.ReadWriteStatus.READ_WRITE_OK;
// Move the sliding window of IO test results forward 1 by adding the
// latest entry and removing the oldest entry from the window.
// Update the failure counter for the new window.
Expand All @@ -640,8 +653,7 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused)
currentIOFailureCount.decrementAndGet();
}

// If the failure threshold has been crossed, fail the volume without
// further scans.
// If the failure threshold has been crossed, mark volume as READ only
// Once the volume is failed, it will not be checked anymore.
// The failure counts can be left as is.
if (currentIOFailureCount.get() > ioFailureTolerance) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
Expand All @@ -44,21 +45,21 @@ public void testPermissions() {
assertTrue(testDir.canRead());
assertTrue(testDir.canWrite());
assertTrue(testDir.canExecute());
assertTrue(DiskCheckUtil.checkPermissions(testDir));
assertSame(DiskCheckUtil.checkPermissions(testDir), DiskCheckUtil.ReadWriteStatus.READ_WRITE_OK);

// Test failure without read permissiosns.
assertTrue(testDir.setReadable(false));
assertFalse(DiskCheckUtil.checkPermissions(testDir));
assertSame(DiskCheckUtil.checkPermissions(testDir), DiskCheckUtil.ReadWriteStatus.READ_FAIL);
assertTrue(testDir.setReadable(true));

// Test failure without write permissiosns.
assertTrue(testDir.setWritable(false));
assertFalse(DiskCheckUtil.checkPermissions(testDir));
assertSame(DiskCheckUtil.checkPermissions(testDir), DiskCheckUtil.ReadWriteStatus.WRITE_FAIL);
assertTrue(testDir.setWritable(true));

// Test failure without execute permissiosns.
assertTrue(testDir.setExecutable(false));
assertFalse(DiskCheckUtil.checkPermissions(testDir));
assertSame(DiskCheckUtil.checkPermissions(testDir), DiskCheckUtil.ReadWriteStatus.READ_FAIL);
assertTrue(testDir.setExecutable(true));
}

Expand All @@ -74,7 +75,8 @@ public void testExistence() {

@Test
public void testReadWrite() {
assertTrue(DiskCheckUtil.checkReadWrite(testDir, testDir, 10));
assertSame(DiskCheckUtil.checkReadWrite(testDir, testDir, 10),
DiskCheckUtil.ReadWriteStatus.READ_WRITE_OK);

// Test file should have been deleted.
File[] children = testDir.listFiles();
Expand Down
Loading
Loading