Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public class HddsVolume extends StorageVolume {
private File dbParentDir;
private File deletedContainerDir;
private AtomicBoolean dbLoaded = new AtomicBoolean(false);
private final AtomicBoolean dbLoadFailure = new AtomicBoolean(false);

/**
* Builder for HddsVolume.
Expand Down Expand Up @@ -257,14 +258,23 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused)
VolumeCheckResult result = super.check(unused);

DatanodeConfiguration df = getConf().getObject(DatanodeConfiguration.class);
if (isDbLoadFailure()) {
LOG.warn("Volume {} failed to access RocksDB: RocksDB parent directory is null, " +
"the volume might not have been loaded properly.", getStorageDir());
return VolumeCheckResult.FAILED;
}
if (result != VolumeCheckResult.HEALTHY ||
!df.getContainerSchemaV3Enabled() || !isDbLoaded()) {
return result;
}

// Check that per-volume RocksDB is present.
File dbFile = new File(dbParentDir, CONTAINER_DB_NAME);
if (!dbFile.exists() || !dbFile.canRead()) {
File dbFile = dbParentDir == null ? null : new File(dbParentDir, CONTAINER_DB_NAME);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it passes L261 check, does it still have the chance to have a null dbParentDir?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dbParentDir will non-null if it passes L261 check. I think this can be simplified

if (dbFile == null || !dbFile.exists() || !dbFile.canRead()) {
if (dbFile == null) {
LOG.warn("Volume {} failed to access RocksDB: RocksDB parent directory is null, " +
"the volume might not have been loaded properly.", getStorageDir());
}
LOG.warn("Volume {} failed health check. Could not access RocksDB at " +
"{}", getStorageDir(), dbFile);
return VolumeCheckResult.FAILED;
Expand Down Expand Up @@ -326,6 +336,10 @@ public boolean isDbLoaded() {
return dbLoaded.get();
}

public boolean isDbLoadFailure() {
return dbLoadFailure.get();
}

public void loadDbStore(boolean readOnly) throws IOException {
// DN startup for the first time, not registered yet,
// so the DbVolume is not formatted.
Expand All @@ -343,35 +357,41 @@ public void loadDbStore(boolean readOnly) throws IOException {
File clusterIdDir = new File(dbVolume == null ?
getStorageDir() : dbVolume.getStorageDir(),
getClusterID());
if (!clusterIdDir.exists()) {
throw new IOException("Working dir " + clusterIdDir.getAbsolutePath() +
" not created for HddsVolume: " + getStorageDir().getAbsolutePath());
}
try {
if (!clusterIdDir.exists()) {
throw new IOException("Working dir " + clusterIdDir.getAbsolutePath() +
" not created for HddsVolume: " + getStorageDir().getAbsolutePath());
}

File storageIdDir = new File(clusterIdDir, getStorageID());
if (!storageIdDir.exists()) {
throw new IOException("Db parent dir " + storageIdDir.getAbsolutePath() +
" not found for HddsVolume: " + getStorageDir().getAbsolutePath());
}
File storageIdDir = new File(clusterIdDir, getStorageID());
if (!storageIdDir.exists()) {
throw new IOException("Db parent dir " + storageIdDir.getAbsolutePath() +
" not found for HddsVolume: " + getStorageDir().getAbsolutePath());
}

File containerDBFile = new File(storageIdDir, CONTAINER_DB_NAME);
if (!containerDBFile.exists()) {
throw new IOException("Db dir " + storageIdDir.getAbsolutePath() +
" not found for HddsVolume: " + getStorageDir().getAbsolutePath());
}
File containerDBFile = new File(storageIdDir, CONTAINER_DB_NAME);
if (!containerDBFile.exists()) {
throw new IOException("Db dir " + storageIdDir.getAbsolutePath() +
" not found for HddsVolume: " + getStorageDir().getAbsolutePath());
}

String containerDBPath = containerDBFile.getAbsolutePath();
try {
initPerDiskDBStore(containerDBPath, getConf(), readOnly);
String containerDBPath = containerDBFile.getAbsolutePath();
try {
initPerDiskDBStore(containerDBPath, getConf(), readOnly);
} catch (IOException e) {
throw new IOException("Can't init db instance under path "
+ containerDBPath + " for volume " + getStorageID(), e);
}

dbParentDir = storageIdDir;
dbLoaded.set(true);
dbLoadFailure.set(false);
LOG.info("SchemaV3 db is loaded at {} for volume {}", containerDBPath,
getStorageID());
} catch (IOException e) {
throw new IOException("Can't init db instance under path "
+ containerDBPath + " for volume " + getStorageID(), e);
dbLoadFailure.set(true);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xichen01 , thanks for reporting this. Can you share more detail about which operation will cause the DB load failure and throw the IOException, besides the known initPerDiskDBStore? Is there any exception stack can be shared?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure of the root cause of the DB loading issue, but we found a very large container.db in the system home directory, as well as error logs from HddsVolume initing.

logs from HddsVolume initing
image

container.db in the system home
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Manually destroy the RocksDB file. 2. Then restart DN and write data. can reproduce a similar problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

 ERROR org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer: Load db store for HddsVolume /xxx/xxx/xxx/ozone/hdds failed
java.io.IOException: Can't init db instance under path /xxx/xxxx/xxx/ozone/hdds/CID-xxx/DS-xxx/container.db for volume DS-xxx
        at org.apache.hadoop.ozone.container.common.volume.HddsVolume.loadDbStore(HddsVolume.java:235)
        at org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil.loadAllHddsVolumeDbStore(HddsVolumeUtil.java:99)
        at org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer.<init>(OzoneContainer.java:146)
        at org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine.<init>(DatanodeStateMachine.java:153)
        at org.apache.hadoop.ozone.HddsDatanodeService.start(HddsDatanodeService.java:295)
        at org.apache.hadoop.ozone.HddsDatanodeService.start(HddsDatanodeService.java:227)
        at org.apache.hadoop.ozone.HddsDatanodeService.call(HddsDatanodeService.java:195)
        at org.apache.hadoop.ozone.HddsDatanodeService.call(HddsDatanodeService.java:104)
        at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
        at picocli.CommandLine.access$1300(CommandLine.java:145)
        at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
        at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
        at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
        at picocli.CommandLine$AbstractParseResultHandler.handleParseResult(CommandLine.java:2172)
        at picocli.CommandLine.parseWithHandlers(CommandLine.java:2550)
        at picocli.CommandLine.parseWithHandler(CommandLine.java:2485)
        at org.apache.hadoop.hdds.cli.GenericCli.execute(GenericCli.java:96)
        at org.apache.hadoop.hdds.cli.GenericCli.run(GenericCli.java:87)
        at org.apache.hadoop.ozone.HddsDatanodeService.main(HddsDatanodeService.java:178)
Caused by: java.io.IOException: Failed init RocksDB, db path : /xxx/xxx/xxx/ozone/hdds/CID-xxx/DS-xxx/container.db, exception :org.rocksdb.RocksDBException CURRENT file does not end with newline
        at org.apache.hadoop.hdds.utils.db.RDBStore.<init>(RDBStore.java:130)
        at org.apache.hadoop.hdds.utils.db.DBStoreBuilder.build(DBStoreBuilder.java:191)
        at org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore.start(AbstractDatanodeStore.java:146)
        at org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore.<init>(AbstractDatanodeStore.java:99)
        at org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl.<init>(DatanodeStoreSchemaThreeImpl.java:54)
        at org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils.getUncachedDatanodeStore(BlockUtils.java:84)
        at org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil.initPerDiskDBStore(HddsVolumeUtil.java:79)
        at org.apache.hadoop.ozone.container.common.volume.HddsVolume.loadDbStore(HddsVolume.java:232)
        ... 18 more

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to root cause this issue. Could you kindly check the related DN logs to see if there is any related suspicious logs? If we don't have the root cause, then this extra IOException catch may not enough.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not found others related log which can help to locate the root cause, this may be a legacy issue that may have existed when the cluster was created, but with this PR, we could have found this issue faster.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have observed another impact when volume metadata path is removed incorrectly in test env. But at that point of time, container was getting loaded having NPE in other flow.
NPE was handled with #5921 avoiding loading of container.
Since it was test script issue where cleanup was not proper, referring similar case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xichen01 , in case we don't know the exact case of DB loading failure, let's change this catch from IOException to Throwable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xichen01 , could you please address my above comment and provide a new patch?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I will

throw e;
}

dbParentDir = storageIdDir;
dbLoaded.set(true);
LOG.info("SchemaV3 db is loaded at {} for volume {}", containerDBPath,
getStorageID());
}

/**
Expand Down Expand Up @@ -417,9 +437,11 @@ public void createDbStore(MutableVolumeSet dbVolumeSet) throws IOException {
try {
HddsVolumeUtil.initPerDiskDBStore(containerDBPath, getConf(), false);
dbLoaded.set(true);
dbLoadFailure.set(false);
LOG.info("SchemaV3 db is created and loaded at {} for volume {}",
containerDBPath, getStorageID());
} catch (IOException e) {
dbLoadFailure.set(true);
String errMsg = "Can't create db instance under path "
+ containerDBPath + " for volume " + getStorageID();
LOG.error(errMsg, e);
Expand Down Expand Up @@ -448,6 +470,7 @@ private void closeDbStore() {
.getAbsolutePath();
DatanodeStoreCache.getInstance().removeDB(containerDBPath);
dbLoaded.set(false);
dbLoadFailure.set(false);
LOG.info("SchemaV3 db is stopped at {} for volume {}", containerDBPath,
getStorageID());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,12 +442,20 @@ public Map<StorageType, List<StorageVolume>> getVolumeStateMap() {
public boolean hasEnoughVolumes() {
// Max number of bad volumes allowed, should have at least
// 1 good volume
boolean hasEnoughVolumes;
if (maxVolumeFailuresTolerated ==
StorageVolumeChecker.MAX_VOLUME_FAILURE_TOLERATED_LIMIT) {
return getVolumesList().size() >= 1;
hasEnoughVolumes = getVolumesList().size() >= 1;
} else {
return getFailedVolumesList().size() <= maxVolumeFailuresTolerated;
hasEnoughVolumes = getFailedVolumesList().size() <= maxVolumeFailuresTolerated;
}
if (!hasEnoughVolumes) {
LOG.error("Not enough volumes in MutableVolumeSet. DatanodeUUID: {}, VolumeType: {}, " +
"MaxVolumeFailuresTolerated: {}, ActiveVolumes: {}, FailedVolumes: {}",
datanodeUuid, volumeType, maxVolumeFailuresTolerated,
getVolumesList().size(), getFailedVolumesList().size());
}
return hasEnoughVolumes;
}

public StorageLocationReport[] getStorageReport() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,13 @@ private static String getContainerSubDirectory(long containerId) {
*/
public static File getContainerDBFile(KeyValueContainerData containerData) {
if (containerData.hasSchema(OzoneConsts.SCHEMA_V3)) {
Preconditions.checkNotNull(containerData.getVolume().getDbParentDir(), "Base Directory cannot be null");
return new File(containerData.getVolume().getDbParentDir(),
OzoneConsts.CONTAINER_DB_NAME);
}
return getContainerDBFile(containerData.getMetadataPath(), containerData);
}

public static File getContainerDBFile(String baseDir,
KeyValueContainerData containerData) {
return new File(baseDir, containerData.getContainerID() +
Preconditions.checkNotNull(containerData.getMetadataPath(), "Metadata Directory cannot be null");
return new File(containerData.getMetadataPath(), containerData.getContainerID() +
OzoneConsts.DN_CONTAINER_DB);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.container.common.ContainerTestUtils;
import org.apache.hadoop.ozone.container.common.volume.DbVolume;
Expand All @@ -43,7 +44,13 @@
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mockStatic;

import org.mockito.MockedStatic;
import org.mockito.Mockito;


/**
* Test for {@link HddsVolumeUtil}.
Expand Down Expand Up @@ -95,6 +102,34 @@ public void teardown() {
dbVolumeSet.shutdown();
}

@Test
public void testLoadHDDVolumeWithInitDBException()
throws Exception {
// Create db instances for all HDDsVolumes.
for (HddsVolume hddsVolume : StorageVolumeUtil.getHddsVolumesList(
hddsVolumeSet.getVolumesList())) {
hddsVolume.format(clusterId);
hddsVolume.createWorkingDir(clusterId, null);
}

try (MockedStatic<HddsVolumeUtil> mocked = mockStatic(HddsVolumeUtil.class, Mockito.CALLS_REAL_METHODS)) {
// Simulating the init DB Exception
mocked.when(() -> HddsVolumeUtil.initPerDiskDBStore(Mockito.anyString(), Mockito.any(), Mockito.anyBoolean()))
.thenThrow(new IOException("Mocked Exception"));

reinitVolumes();
for (HddsVolume hddsVolume : StorageVolumeUtil.getHddsVolumesList(
hddsVolumeSet.getVolumesList())) {
assertThrowsExactly(IOException.class, () -> hddsVolume.loadDbStore(true));
// If the Volume init DB is abnormal, the Volume should be recognized as a failed Volume
assertEquals(VolumeCheckResult.FAILED, hddsVolume.check(false));
assertTrue(hddsVolume.isDbLoadFailure());
assertFalse(hddsVolume.isDbLoaded());
}
}

}

@Test
public void testLoadAllHddsVolumeDbStoreWithoutDbVolumes()
throws IOException {
Expand Down