Skip to content
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
34f6e5a
Refactor volume level tmp dir so it can be used for disk test files
errose28 Jun 1, 2023
1b3375d
Consolidate volume shutdown
errose28 Jun 2, 2023
0d15e8a
More refactoring of volume tmp dir
errose28 Jun 5, 2023
8572b34
Change tmpdir to File
errose28 Jun 5, 2023
7dced52
Another compilation fix after cherry-pick
errose28 Jun 5, 2023
442dde9
Update TestContainerPersistence and fix directory create bug
errose28 Jun 5, 2023
04384c1
Refactor working dir and subdirs on startup
errose28 Jun 5, 2023
72c0d31
Rename dir delete_container_service -> deleted-containers
errose28 Jun 5, 2023
b12bcd2
Move tmp dir cleanup inside HddsVolume
errose28 Jun 6, 2023
44dbd47
Update existing tests
errose28 Jun 6, 2023
15725ae
Initial implementation of improved volume checks
errose28 May 30, 2023
57bddac
Add counter for consecutive volume IO failures
errose28 Jun 1, 2023
23ab0a6
Add config for size of file used to assess disk health
errose28 Jun 1, 2023
1d45ece
Update disk check log messages
errose28 Jun 1, 2023
9951e04
Use tmp dir for disk check files
errose28 Jun 1, 2023
0e770c4
Create tmp disk check directory on startup and clear it
errose28 Jun 8, 2023
908ee85
Add startup/shutdown tests to TestHddsVolume
errose28 Jun 9, 2023
e38ec9c
Checkstyle
errose28 Jun 9, 2023
c27ed0e
Merge branch 'tmp-dir-refactor' into improve-volume-scanner
errose28 Jun 9, 2023
4ceff35
Fix assignment to tmp dir name fields
errose28 Jun 9, 2023
87847d1
Merge branch 'tmp-dir-refactor' into improve-volume-scanner
errose28 Jun 9, 2023
bfa064b
Add test for clearing tmp disk check files on startup/shutdown
errose28 Jun 9, 2023
ab9cd26
Add tests for DiskCheckUtil and support injecting
errose28 Jun 9, 2023
0013ba9
Add unit tests for StorageVolume#check
errose28 Jun 9, 2023
fa6b5b5
Checkstyle
errose28 Jun 9, 2023
ccded0a
Merge branch 'master' into tmp-dir-refactor
errose28 Jun 10, 2023
b581bc9
Merge branch 'tmp-dir-refactor' into improve-volume-scanner
errose28 Jun 10, 2023
d516b19
Checkstyle
errose28 Jun 10, 2023
5c63737
Merge branch 'tmp-dir-refactor' into improve-volume-scanner
errose28 Jun 10, 2023
6ddb500
Add check that per-volume RocksDB is present on volume scan
errose28 Jun 13, 2023
fada782
Synchronize volume checks
errose28 Jun 13, 2023
0369054
Restore endpoint test
errose28 Jun 13, 2023
1b20307
Separate DB store init from tmp dir creation
errose28 Jun 14, 2023
d579586
Fix SCM HA finalization compat test
errose28 Jun 24, 2023
e502a07
Merge branch 'master' into tmp-dir-refactor
errose28 Jun 25, 2023
d023a89
Initial improvement of volume check configurations
errose28 Jun 26, 2023
fa4207c
Merge branch 'tmp-dir-refactor' into improve-volume-scanner
errose28 Jun 26, 2023
d27093b
Merge branch 'master' into improve-volume-scanner
errose28 Jun 26, 2023
71e277b
Fix test cleanup regression in TestHddsVolume
errose28 Jun 27, 2023
1f5ff5a
Fix new failures in TestStorageVolume
errose28 Jun 27, 2023
ca67685
Ignore interrupt during volume scan
errose28 Jun 27, 2023
397a7b6
Checkstyle
errose28 Jun 27, 2023
7ad00f8
Reduce disk check gap default
errose28 Jun 27, 2023
a84e1cc
Rat and findbugs
errose28 Jun 27, 2023
6ad84d3
Bypass IO check config validaiton if disabled
errose28 Jun 27, 2023
259c269
Update config and vairable names
errose28 Jun 27, 2023
d5d92f8
Switch to sliding window based IO checks
errose28 Jun 27, 2023
b9bb49e
Increase disk check min gap to 10 minutes
errose28 Jun 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
import org.apache.hadoop.ozone.container.common.volume.StorageVolume;
import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil;
import org.apache.hadoop.ozone.util.OzoneNetUtils;
import org.apache.hadoop.ozone.util.ShutdownHookManager;
import org.apache.hadoop.security.SecurityUtil;
Expand Down Expand Up @@ -130,25 +129,6 @@ public HddsDatanodeService(boolean printBanner, String[] args) {
this.args = args != null ? Arrays.copyOf(args, args.length) : null;
}

private void cleanTmpDir() {
if (datanodeStateMachine != null) {
MutableVolumeSet volumeSet =
datanodeStateMachine.getContainer().getVolumeSet();
for (StorageVolume volume : volumeSet.getVolumesList()) {
if (volume instanceof HddsVolume) {
HddsVolume hddsVolume = (HddsVolume) volume;
try {
KeyValueContainerUtil.ContainerDeleteDirectory
.cleanTmpDir(hddsVolume);
} catch (IOException ex) {
LOG.error("Error while cleaning tmp delete directory " +
"under {}", hddsVolume.getWorkingDir(), ex);
}
}
}
}
}

/**
* Create a Datanode instance based on the supplied command-line arguments.
* <p>
Expand Down Expand Up @@ -574,8 +554,6 @@ public void terminateDatanode() {
@Override
public void stop() {
if (!isStopped.getAndSet(true)) {
// Clean <HddsVolume>/tmp/container_delete_service dir.
cleanTmpDir();
if (plugins != null) {
for (ServicePlugin plugin : plugins) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ public class DatanodeConfiguration {
"hdds.datanode.periodic.disk.check.interval.minutes";
public static final String FAILED_DATA_VOLUMES_TOLERATED_KEY =
"hdds.datanode.failed.data.volumes.tolerated";
public static final String CONSECUTIVE_VOLUME_IO_FAILURES_TOLERATED_KEY =
"hdds.datanode.consecutive.volume.io.failures.tolerated";
public static final String VOLUME_HEALTH_CHECK_FILE_SIZE_KEY =
"hdds.datanode.volume.health.check.file.size";
public static final String FAILED_METADATA_VOLUMES_TOLERATED_KEY =
"hdds.datanode.failed.metadata.volumes.tolerated";
public static final String FAILED_DB_VOLUMES_TOLERATED_KEY =
Expand All @@ -64,6 +68,10 @@ public class DatanodeConfiguration {

static final int FAILED_VOLUMES_TOLERATED_DEFAULT = -1;

public static final int CONSECUTIVE_VOLUME_IO_FAILURES_TOLERATED_DEFAULT = 3;

public static final int VOLUME_HEALTH_CHECK_FILE_SIZE_DEFAULT = 100;

static final boolean WAIT_ON_ALL_FOLLOWERS_DEFAULT = false;

static final long DISK_CHECK_MIN_GAP_DEFAULT =
Expand Down Expand Up @@ -269,6 +277,29 @@ public void setBlockDeletionLimit(int limit) {
)
private int failedDbVolumesTolerated = FAILED_VOLUMES_TOLERATED_DEFAULT;

@Config(key = "consecutive.volume.io.failures.tolerated",
defaultValue = "3",
type = ConfigType.INT,
tags = { DATANODE },
description = "The number of consecutive disk health checks that can " +
"fail due to a read/write error before the volume is considered " +
"failed."
)
private int consecutiveVolumeIOFailuresTolerated =
CONSECUTIVE_VOLUME_IO_FAILURES_TOLERATED_DEFAULT;

@Config(key = "volume.health.check.file.size",
defaultValue = "100B",
type = ConfigType.SIZE,
tags = { DATANODE },
description = "The size of the temporary file that will be synced to " +
"the disk and " +
"read back to assess its health. The contents of the " +
"file will be stored in memory during the duration of the check."
)
private int volumeHealthCheckFileSize =
VOLUME_HEALTH_CHECK_FILE_SIZE_DEFAULT;

@Config(key = "disk.check.min.gap",
defaultValue = "15m",
type = ConfigType.TIME,
Expand Down Expand Up @@ -451,6 +482,24 @@ public void validate() {
failedDbVolumesTolerated = FAILED_VOLUMES_TOLERATED_DEFAULT;
}

if (consecutiveVolumeIOFailuresTolerated < 1) {
LOG.warn(CONSECUTIVE_VOLUME_IO_FAILURES_TOLERATED_KEY +
"must be at least 1 and was set to {}. Defaulting to {}",
consecutiveVolumeIOFailuresTolerated,
CONSECUTIVE_VOLUME_IO_FAILURES_TOLERATED_DEFAULT);
consecutiveVolumeIOFailuresTolerated =
CONSECUTIVE_VOLUME_IO_FAILURES_TOLERATED_DEFAULT;
}

if (volumeHealthCheckFileSize < 1) {
LOG.warn(VOLUME_HEALTH_CHECK_FILE_SIZE_KEY +
"must be at least 1 byte and was set to {}. Defaulting to {}",
volumeHealthCheckFileSize,
VOLUME_HEALTH_CHECK_FILE_SIZE_DEFAULT);
volumeHealthCheckFileSize =
VOLUME_HEALTH_CHECK_FILE_SIZE_DEFAULT;
}

if (diskCheckMinGap < 0) {
LOG.warn(DISK_CHECK_MIN_GAP_KEY +
" must be greater than zero and was set to {}. Defaulting to {}",
Expand Down Expand Up @@ -487,7 +536,6 @@ public void validate() {
rocksdbDeleteObsoleteFilesPeriod =
ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_DEFAULT;
}

}

public void setContainerDeleteThreads(int containerDeleteThreads) {
Expand Down Expand Up @@ -531,6 +579,22 @@ public void setFailedDbVolumesTolerated(int failedVolumesTolerated) {
this.failedDbVolumesTolerated = failedVolumesTolerated;
}

public int getConsecutiveVolumeIOFailuresTolerated() {
return consecutiveVolumeIOFailuresTolerated;
}

public void setConsecutiveVolumeIOFailuresTolerated(int failuresToTolerate) {
this.consecutiveVolumeIOFailuresTolerated = failuresToTolerate;
}

public int getVolumeHealthCheckFileSize() {
return volumeHealthCheckFileSize;
}

public void getVolumeHealthCheckFileSize(int fileSizeBytes) {
this.volumeHealthCheckFileSize = fileSizeBytes;
}

public Duration getDiskCheckMinGap() {
return Duration.ofMillis(diskCheckMinGap);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine;
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
import org.apache.hadoop.ozone.container.common.volume.StorageVolume;
import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil;
import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
import org.apache.hadoop.ozone.protocol.VersionResponse;
import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
Expand Down Expand Up @@ -124,20 +122,7 @@ private void checkVolumeSet(MutableVolumeSet volumeSet,
boolean result = StorageVolumeUtil.checkVolume(volume,
scmId, clusterId, configuration, LOG,
ozoneContainer.getDbVolumeSet());

if (result) {
// Clean <HddsVolume>/tmp/container_delete_service dir.
if (volume instanceof HddsVolume) {
HddsVolume hddsVolume = (HddsVolume) volume;
try {
KeyValueContainerUtil.ContainerDeleteDirectory
.cleanTmpDir(hddsVolume);
} catch (IOException ex) {
LOG.error("Error while cleaning tmp delete directory " +
"under {}", hddsVolume.getWorkingDir(), ex);
}
}
} else {
if (!result) {
volumeSet.failVolume(volume.getStorageDir().getPath());
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.container.common.utils;

import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.SyncFailedException;
import java.util.Arrays;
import java.util.Random;
import java.util.UUID;

/**
* Utility class that supports checking disk health when provided a directory
* where the disk is mounted.
*/
public final class DiskCheckUtil {
private DiskCheckUtil() { }

// For testing purposes, an alternate check implementation can be provided
// to inject failures.
private static DiskChecks impl = new DiskChecksImpl();

@VisibleForTesting
public static void setTestImpl(DiskChecks diskChecks) {
impl = diskChecks;
}

@VisibleForTesting
public static void clearTestImpl() {
impl = new DiskChecksImpl();
}

public static boolean checkExistence(File storageDir) {
return impl.checkExistence(storageDir);
}

public static boolean checkPermissions(File storageDir) {
return impl.checkPermissions(storageDir);
}

public static boolean checkReadWrite(File storageDir, File testFileDir,
int numBytesToWrite) {
return impl.checkReadWrite(storageDir, testFileDir, numBytesToWrite);
}

/**
* Defines operations that must be implemented by a class injecting
* failures into this class. Default implementations return true so that
* tests only need to override methods for the failures they want to test.
*/
public interface DiskChecks {
default boolean checkExistence(File storageDir) {
return true;
}
default boolean checkPermissions(File storageDir) {
return true;
}
default boolean checkReadWrite(File storageDir, File testFileDir,
int numBytesToWrite) {
return true;
}
}

/**
* The default implementation of DiskCheck that production code will use
* for disk checking.
*/
private static class DiskChecksImpl implements DiskChecks {

private static final Logger LOG =
LoggerFactory.getLogger(DiskCheckUtil.class);

@Override
public boolean checkExistence(File diskDir) {
if (!diskDir.exists()) {
logError(diskDir, "Directory does not exist.");
return false;
}
return true;
}

@Override
public boolean checkPermissions(File storageDir) {
// Check all permissions on the volume. If there are multiple permission
// errors, count it as one failure so the admin can fix them all at once.
boolean permissionsCorrect = true;
if (!storageDir.canRead()) {
logError(storageDir,
"Datanode does not have read permission on volume.");
permissionsCorrect = false;
}
if (!storageDir.canWrite()) {
logError(storageDir,
"Datanode does not have write permission on volume.");
permissionsCorrect = false;
}
if (!storageDir.canExecute()) {
logError(storageDir, "Datanode does not have execute" +
"permission on volume.");
permissionsCorrect = false;
}

return permissionsCorrect;
}

@Override
public boolean checkReadWrite(File storageDir,
File testFileDir, int numBytesToWrite) {
File testFile = new File(testFileDir, "disk-check-" + UUID.randomUUID());
byte[] writtenBytes = new byte[numBytesToWrite];
new Random().nextBytes(writtenBytes);
try (FileOutputStream fos = new FileOutputStream(testFile)) {
fos.write(writtenBytes);
fos.getFD().sync();
} catch (FileNotFoundException notFoundEx) {
logError(storageDir, String.format("Could not find file %s for " +
"volume check.", testFile), notFoundEx);
return false;
} catch (SyncFailedException syncEx) {
logError(storageDir, String.format("Could sync file %s to disk.",
testFile), syncEx);
return false;
} catch (IOException ioEx) {
logError(storageDir, String.format("Could not write file %s " +
"for volume check.", testFile), ioEx);
return false;
}

// Read data back from the test file.
byte[] readBytes = new byte[numBytesToWrite];
try (FileInputStream fis = new FileInputStream(testFile)) {
int numBytesRead = fis.read(readBytes);
if (numBytesRead != numBytesToWrite) {
logError(storageDir, String.format("%d bytes written to file %s " +
"but %d bytes were read back.", numBytesToWrite, testFile,
numBytesRead));
return false;
}
} catch (FileNotFoundException notFoundEx) {
logError(storageDir, String.format("Could not find file %s " +
"for volume check.", testFile), notFoundEx);
return false;
} catch (IOException ioEx) {
logError(storageDir, String.format("Could not read file %s " +
"for volume check.", testFile), ioEx);
return false;
}

// Check that test file has the expected content.
if (!Arrays.equals(writtenBytes, readBytes)) {
logError(storageDir, String.format("%d Bytes read from file " +
"%s do not match the %d bytes that were written.",
writtenBytes.length, testFile, readBytes.length));
return false;
}

// Delete the file.
if (!testFile.delete()) {
logError(storageDir, String.format("Could not delete file %s " +
"for volume check.", testFile));
return false;
}

// If all checks passed, the volume is healthy.
return true;
}

private void logError(File storageDir, String message) {
LOG.error("Volume {} failed health check. {}", storageDir, message);
}

private void logError(File storageDir, String message, Exception ex) {
LOG.error("Volume {} failed health check. {}", storageDir, message, ex);
}
}
}
Loading