diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 2c6b3ee62a0..2ace9ad49ff 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -62,6 +62,14 @@ public final class HddsConfigKeys { public static final String HDDS_DATANODE_VOLUME_CHOOSING_POLICY = "hdds.datanode.volume.choosing.policy"; + public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE = + "hdds.datanode.volume.min.free.space"; + public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT = + "5GB"; + + public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT = + "hdds.datanode.volume.min.free.space.percent"; + public static final String HDDS_DB_PROFILE = "hdds.db.profile"; // Once a container usage crosses this threshold, it is eligible for diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index c5186258e3e..3687708b869 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -204,6 +204,17 @@ This volume choosing policy selects volumes in a round-robin order. + + hdds.datanode.volume.min.free.space + 5GB + OZONE, CONTAINER, STORAGE, MANAGEMENT + + This determines the free space to be used for closing containers + When the difference between volume capacity and used reaches this number, + containers that reside on this volume will be closed and no new containers + would be allocated on this volume. + + dfs.container.ratis.enabled false diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java index fc711b5a371..ef5bae7999f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java @@ -55,7 +55,9 @@ import org.apache.hadoop.ozone.container.common.interfaces.Handler; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.common.volume.VolumeUsage; import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerScanner; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Time; @@ -538,7 +540,7 @@ public void validateContainerCommand( */ private void sendCloseContainerActionIfNeeded(Container container) { // We have to find a more efficient way to close a container. - boolean isSpaceFull = isContainerFull(container); + boolean isSpaceFull = isContainerFull(container) || isVolumeFull(container); boolean shouldClose = isSpaceFull || isContainerUnhealthy(container); if (shouldClose) { ContainerData containerData = container.getContainerData(); @@ -566,6 +568,23 @@ private boolean isContainerFull(Container container) { } } + private boolean isVolumeFull(Container container) { + boolean isOpen = Optional.ofNullable(container) + .map(cont -> cont.getContainerState() == ContainerDataProto.State.OPEN) + .orElse(Boolean.FALSE); + if (isOpen) { + HddsVolume volume = container.getContainerData().getVolume(); + long volumeCapacity = volume.getCapacity(); + long volumeFreeSpaceToSpare = + VolumeUsage.getMinVolumeFreeSpace(conf, volumeCapacity); + long volumeFree = volume.getAvailable(); + long volumeCommitted = volume.getCommittedBytes(); + long volumeAvailable = volumeFree - volumeCommitted; + return (volumeAvailable <= volumeFreeSpaceToSpare); + } + return false; + } + private boolean isContainerUnhealthy(Container container) { return Optional.ofNullable(container).map( cont -> (cont.getContainerState() == diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/AvailableSpaceFilter.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/AvailableSpaceFilter.java index ab6e0e1ed18..13041eb4d66 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/AvailableSpaceFilter.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/AvailableSpaceFilter.java @@ -38,10 +38,14 @@ class AvailableSpaceFilter implements Predicate { @Override public boolean test(HddsVolume vol) { + long volumeCapacity = vol.getCapacity(); long free = vol.getAvailable(); long committed = vol.getCommittedBytes(); long available = free - committed; - boolean hasEnoughSpace = available > requiredSpace; + long volumeFreeSpace = + VolumeUsage.getMinVolumeFreeSpace(vol.getConf(), volumeCapacity); + boolean hasEnoughSpace = + available > Math.max(requiredSpace, volumeFreeSpace); mostAvailableSpace = Math.max(available, mostAvailableSpace); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java index 6773b6ff648..310f3ca1aee 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java @@ -49,11 +49,17 @@ * - fsAvail: reported remaining space from local fs. * - fsUsed: reported total used space from local fs. * - fsCapacity: reported total capacity from local fs. + * - minVolumeFreeSpace (mvfs) : determines the free space for closing + containers.This is like adding a few reserved bytes to reserved space. + Dn's will send close container action to SCM at this limit & it is + configurable. + + * * - * |----used----| (avail) |++++++++reserved++++++++| - * |<- capacity ->| - * | fsAvail |-------other-------| - * |<- fsCapacity ->| + * |----used----| (avail) |++mvfs++|++++reserved+++++++| + * |<- capacity ->| + * | fsAvail |-------other-----------| + * |<- fsCapacity ->| * * What we could directly get from local fs: * fsCapacity, fsAvail, (fsUsed = fsCapacity - fsAvail) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java index 2d21c8f8ad0..87e50d680a4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java @@ -18,9 +18,17 @@ package org.apache.hadoop.ozone.container.common.volume; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.fs.CachingSpaceUsageSource; import org.apache.hadoop.hdds.fs.SpaceUsageCheckParams; import org.apache.hadoop.hdds.fs.SpaceUsageSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT; /** * Class that wraps the space df of the Datanode Volumes used by SCM @@ -32,6 +40,8 @@ public class VolumeUsage implements SpaceUsageSource { private boolean shutdownComplete; private long reservedInBytes; + private static final Logger LOG = LoggerFactory.getLogger(VolumeUsage.class); + VolumeUsage(SpaceUsageCheckParams checkParams) { source = new CachingSpaceUsageSource(checkParams); start(); // TODO should start only on demand @@ -101,4 +111,38 @@ public void refreshNow() { public void setReserved(long reserved) { this.reservedInBytes = reserved; } + + /** + * If 'hdds.datanode.volume.min.free.space' is defined, + * it will be honored first. If it is not defined and + * 'hdds.datanode.volume.min.free.space.' is defined,it will honor this + * else it will fall back to 'hdds.datanode.volume.min.free.space.default' + */ + public static long getMinVolumeFreeSpace(ConfigurationSource conf, + long capacity) { + if (conf.isConfigured( + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE) && conf.isConfigured( + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT)) { + LOG.error( + "Both {} and {} are set. Set either one, not both. If both are set," + + "it will use default value which is {} as min free space", + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE, + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT, + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT); + } + + if (conf.isConfigured(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE)) { + return (long) conf.getStorageSize(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE, + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT, StorageUnit.BYTES); + } else if (conf.isConfigured(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT)) { + float volumeMinFreeSpacePercent = Float.parseFloat( + conf.get(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT)); + return (long) (capacity * volumeMinFreeSpacePercent); + } + // either properties are not configured,then return + // HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT, + return (long) conf.getStorageSize(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE, + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT, StorageUnit.BYTES); + + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestHddsDispatcher.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestHddsDispatcher.java index 20b27fbc93e..34c041eb6f5 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestHddsDispatcher.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestHddsDispatcher.java @@ -22,8 +22,12 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.fs.MockSpaceUsageCheckFactory; +import org.apache.hadoop.hdds.fs.SpaceUsageCheckFactory; +import org.apache.hadoop.hdds.fs.SpaceUsageSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto @@ -36,6 +40,7 @@ .WriteChunkRequestProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerAction; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.common.Checksum; import org.apache.hadoop.ozone.common.utils.BufferUtils; @@ -50,12 +55,14 @@ import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.keyvalue.ContainerLayoutTestInfo; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.LambdaTestUtils; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.junit.Assert; import org.junit.Test; @@ -68,8 +75,13 @@ import java.util.Collections; import java.util.Map; import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; + +import java.time.Duration; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.fs.MockSpaceUsagePersistence.inMemory; +import static org.apache.hadoop.hdds.fs.MockSpaceUsageSource.fixed; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getContainerCommandResponse; import static org.junit.Assert.assertTrue; @@ -158,6 +170,86 @@ public void testContainerCloseActionWhenFull() throws IOException { } + @Test + public void testContainerCloseActionWhenVolumeFull() throws Exception { + String testDir = GenericTestUtils.getTempPath( + TestHddsDispatcher.class.getSimpleName()); + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setStorageSize(HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE, + 100.0, StorageUnit.BYTES); + DatanodeDetails dd = randomDatanodeDetails(); + + HddsVolume.Builder volumeBuilder = + new HddsVolume.Builder(testDir).datanodeUuid(dd.getUuidString()) + .conf(conf).usageCheckFactory(MockSpaceUsageCheckFactory.NONE); + // state of cluster : available (140) > 100 ,datanode volume + // utilisation threshold not yet reached. container creates are successful. + SpaceUsageSource spaceUsage = fixed(500, 140, 360); + + SpaceUsageCheckFactory factory = MockSpaceUsageCheckFactory.of( + spaceUsage, Duration.ZERO, inMemory(new AtomicLong(0))); + volumeBuilder.usageCheckFactory(factory); + MutableVolumeSet volumeSet = Mockito.mock(MutableVolumeSet.class); + Mockito.when(volumeSet.getVolumesList()) + .thenReturn(Collections.singletonList(volumeBuilder.build())); + try { + UUID scmId = UUID.randomUUID(); + ContainerSet containerSet = new ContainerSet(1000); + + DatanodeStateMachine stateMachine = Mockito.mock( + DatanodeStateMachine.class); + StateContext context = Mockito.mock(StateContext.class); + Mockito.when(stateMachine.getDatanodeDetails()).thenReturn(dd); + Mockito.when(context.getParent()).thenReturn(stateMachine); + // create a 50 byte container + KeyValueContainerData containerData = new KeyValueContainerData(1L, + layout, + 50, UUID.randomUUID().toString(), + dd.getUuidString()); + Container container = new KeyValueContainer(containerData, conf); + container.create(volumeSet, new RoundRobinVolumeChoosingPolicy(), + scmId.toString()); + containerSet.addContainer(container); + ContainerMetrics metrics = ContainerMetrics.create(conf); + Map handlers = Maps.newHashMap(); + for (ContainerType containerType : ContainerType.values()) { + handlers.put(containerType, + Handler.getHandlerForContainerType(containerType, conf, + context.getParent().getDatanodeDetails().getUuidString(), + containerSet, volumeSet, metrics, NO_OP_ICR_SENDER)); + } + HddsDispatcher hddsDispatcher = new HddsDispatcher( + conf, containerSet, volumeSet, handlers, context, metrics, null); + hddsDispatcher.setClusterId(scmId.toString()); + containerData.getVolume().getVolumeInfo() + .ifPresent(volumeInfo -> volumeInfo.incrementUsedSpace(50)); + ContainerCommandResponseProto response = hddsDispatcher + .dispatch(getWriteChunkRequest(dd.getUuidString(), 1L, 1L), null); + Assert.assertEquals(ContainerProtos.Result.SUCCESS, + response.getResult()); + verify(context, times(1)) + .addContainerActionIfAbsent(Mockito.any(ContainerAction.class)); + + // try creating another container now as the volume used has crossed + // threshold + + KeyValueContainerData containerData2 = new KeyValueContainerData(1L, + layout, + 50, UUID.randomUUID().toString(), + dd.getUuidString()); + Container container2 = new KeyValueContainer(containerData2, conf); + LambdaTestUtils.intercept(StorageContainerException.class, + "Container creation failed, due to disk out of space", + () -> container2.create(volumeSet, + new RoundRobinVolumeChoosingPolicy(), scmId.toString())); + + } finally { + volumeSet.shutdown(); + ContainerMetrics.remove(); + FileUtils.deleteDirectory(new File(testDir)); + } + } + @Test public void testCreateContainerWithWriteChunk() throws IOException { String testDir = diff --git a/hadoop-hdds/container-service/src/test/resources/ozone-site.xml b/hadoop-hdds/container-service/src/test/resources/ozone-site.xml index ea24fbb790f..4b833d3f4fb 100644 --- a/hadoop-hdds/container-service/src/test/resources/ozone-site.xml +++ b/hadoop-hdds/container-service/src/test/resources/ozone-site.xml @@ -26,5 +26,10 @@ org.apache.hadoop.hdds.fs.MockSpaceUsageCheckFactory$None + + hdds.datanode.volume.min.free.space + 0MB + + diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java index 5c957680dec..6311f5775b2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java @@ -133,7 +133,8 @@ private void addPropertiesNotInXml() { OMConfigKeys.OZONE_OM_RANGER_HTTPS_ADMIN_API_USER, OMConfigKeys.OZONE_OM_RANGER_HTTPS_ADMIN_API_PASSWD, ScmConfigKeys.OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY, - S3GatewayConfigKeys.OZONE_S3G_FSO_DIRECTORY_CREATION_ENABLED + S3GatewayConfigKeys.OZONE_S3G_FSO_DIRECTORY_CREATION_ENABLED, + HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT )); } }