Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ public final class HddsConfigKeys {
public static final String HDDS_DATANODE_VOLUME_CHOOSING_POLICY =
"hdds.datanode.volume.choosing.policy";

public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE =
"hdds.datanode.volume.min.free.space";
public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT =
"5GB";

public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT =
"hdds.datanode.volume.min.free.space.percent";

public static final String HDDS_DB_PROFILE = "hdds.db.profile";

// Once a container usage crosses this threshold, it is eligible for
Expand Down
11 changes: 11 additions & 0 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,17 @@
This volume choosing policy selects volumes in a round-robin order.
</description>
</property>
<property>
<name>hdds.datanode.volume.min.free.space</name>
<value>5GB</value>
<tag>OZONE, CONTAINER, STORAGE, MANAGEMENT</tag>
<description>
This determines the free space to be used for closing containers
When the difference between volume capacity and used reaches this number,
containers that reside on this volume will be closed and no new containers
would be allocated on this volume.
</description>
</property>
<property>
<name>dfs.container.ratis.enabled</name>
<value>false</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@
import org.apache.hadoop.ozone.container.common.interfaces.Handler;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
import org.apache.hadoop.ozone.container.common.volume.VolumeUsage;
import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerScanner;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Time;
Expand Down Expand Up @@ -538,7 +540,7 @@ public void validateContainerCommand(
*/
private void sendCloseContainerActionIfNeeded(Container container) {
// We have to find a more efficient way to close a container.
boolean isSpaceFull = isContainerFull(container);
boolean isSpaceFull = isContainerFull(container) || isVolumeFull(container);
boolean shouldClose = isSpaceFull || isContainerUnhealthy(container);
if (shouldClose) {
ContainerData containerData = container.getContainerData();
Expand Down Expand Up @@ -566,6 +568,23 @@ private boolean isContainerFull(Container container) {
}
}

private boolean isVolumeFull(Container container) {
boolean isOpen = Optional.ofNullable(container)
.map(cont -> cont.getContainerState() == ContainerDataProto.State.OPEN)
.orElse(Boolean.FALSE);
if (isOpen) {
HddsVolume volume = container.getContainerData().getVolume();
long volumeCapacity = volume.getCapacity();
long volumeFreeSpaceToSpare =
VolumeUsage.getMinVolumeFreeSpace(conf, volumeCapacity);
long volumeFree = volume.getAvailable();
long volumeCommitted = volume.getCommittedBytes();
long volumeAvailable = volumeFree - volumeCommitted;
return (volumeAvailable <= volumeFreeSpaceToSpare);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sadanand48 , "- vol.getCommittedBytes()" is missing here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @sadanand48 . The last patch LGTM, +1.

}
return false;
}

private boolean isContainerUnhealthy(Container container) {
return Optional.ofNullable(container).map(
cont -> (cont.getContainerState() ==
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,14 @@ class AvailableSpaceFilter implements Predicate<HddsVolume> {

@Override
public boolean test(HddsVolume vol) {
long volumeCapacity = vol.getCapacity();
long free = vol.getAvailable();
long committed = vol.getCommittedBytes();
long available = free - committed;
boolean hasEnoughSpace = available > requiredSpace;
long volumeFreeSpace =
VolumeUsage.getMinVolumeFreeSpace(vol.getConf(), volumeCapacity);
boolean hasEnoughSpace =
available > Math.max(requiredSpace, volumeFreeSpace);

mostAvailableSpace = Math.max(available, mostAvailableSpace);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,17 @@
* - fsAvail: reported remaining space from local fs.
* - fsUsed: reported total used space from local fs.
* - fsCapacity: reported total capacity from local fs.
* - minVolumeFreeSpace (mvfs) : determines the free space for closing
containers.This is like adding a few reserved bytes to reserved space.
Dn's will send close container action to SCM at this limit & it is
configurable.

*
*
* |----used----| (avail) |++++++++reserved++++++++|
* |<- capacity ->|
* | fsAvail |-------other-------|
* |<- fsCapacity ->|
* |----used----| (avail) |++mvfs++|++++reserved+++++++|
* |<- capacity ->|
* | fsAvail |-------other-----------|
* |<- fsCapacity ->|
*
* What we could directly get from local fs:
* fsCapacity, fsAvail, (fsUsed = fsCapacity - fsAvail)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,17 @@

package org.apache.hadoop.ozone.container.common.volume;

import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.fs.CachingSpaceUsageSource;
import org.apache.hadoop.hdds.fs.SpaceUsageCheckParams;
import org.apache.hadoop.hdds.fs.SpaceUsageSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT;

/**
* Class that wraps the space df of the Datanode Volumes used by SCM
Expand All @@ -32,6 +40,8 @@ public class VolumeUsage implements SpaceUsageSource {
private boolean shutdownComplete;
private long reservedInBytes;

private static final Logger LOG = LoggerFactory.getLogger(VolumeUsage.class);

VolumeUsage(SpaceUsageCheckParams checkParams) {
source = new CachingSpaceUsageSource(checkParams);
start(); // TODO should start only on demand
Expand Down Expand Up @@ -101,4 +111,38 @@ public void refreshNow() {
public void setReserved(long reserved) {
this.reservedInBytes = reserved;
}

/**
* If 'hdds.datanode.volume.min.free.space' is defined,
* it will be honored first. If it is not defined and
* 'hdds.datanode.volume.min.free.space.' is defined,it will honor this
* else it will fall back to 'hdds.datanode.volume.min.free.space.default'
*/
public static long getMinVolumeFreeSpace(ConfigurationSource conf,
long capacity) {
if (conf.isConfigured(
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE) && conf.isConfigured(
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT)) {
LOG.error(
"Both {} and {} are set. Set either one, not both. If both are set,"
+ "it will use default value which is {} as min free space",
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE,
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT,
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT);
}

if (conf.isConfigured(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE)) {
return (long) conf.getStorageSize(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE,
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT, StorageUnit.BYTES);
} else if (conf.isConfigured(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT)) {
float volumeMinFreeSpacePercent = Float.parseFloat(
conf.get(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT));
return (long) (capacity * volumeMinFreeSpacePercent);
}
// either properties are not configured,then return
// HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT,
return (long) conf.getStorageSize(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE,
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT, StorageUnit.BYTES);

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.StorageUnit;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.client.BlockID;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.fs.MockSpaceUsageCheckFactory;
import org.apache.hadoop.hdds.fs.SpaceUsageCheckFactory;
import org.apache.hadoop.hdds.fs.SpaceUsageSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.datanode.proto
Expand All @@ -36,6 +40,7 @@
.WriteChunkRequestProto;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.ContainerAction;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.common.Checksum;
import org.apache.hadoop.ozone.common.utils.BufferUtils;
Expand All @@ -50,12 +55,14 @@
import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy;
import org.apache.hadoop.ozone.container.common.volume.StorageVolume;
import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
import org.apache.hadoop.ozone.container.keyvalue.ContainerLayoutTestInfo;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import org.apache.ozone.test.GenericTestUtils;

import org.apache.ozone.test.LambdaTestUtils;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.junit.Assert;
import org.junit.Test;
Expand All @@ -68,8 +75,13 @@
import java.util.Collections;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicLong;

import java.time.Duration;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.hadoop.hdds.fs.MockSpaceUsagePersistence.inMemory;
import static org.apache.hadoop.hdds.fs.MockSpaceUsageSource.fixed;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY;
import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getContainerCommandResponse;
import static org.junit.Assert.assertTrue;
Expand Down Expand Up @@ -158,6 +170,86 @@ public void testContainerCloseActionWhenFull() throws IOException {

}

@Test
public void testContainerCloseActionWhenVolumeFull() throws Exception {
String testDir = GenericTestUtils.getTempPath(
TestHddsDispatcher.class.getSimpleName());
OzoneConfiguration conf = new OzoneConfiguration();
conf.setStorageSize(HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE,
100.0, StorageUnit.BYTES);
DatanodeDetails dd = randomDatanodeDetails();

HddsVolume.Builder volumeBuilder =
new HddsVolume.Builder(testDir).datanodeUuid(dd.getUuidString())
.conf(conf).usageCheckFactory(MockSpaceUsageCheckFactory.NONE);
// state of cluster : available (140) > 100 ,datanode volume
// utilisation threshold not yet reached. container creates are successful.
SpaceUsageSource spaceUsage = fixed(500, 140, 360);

SpaceUsageCheckFactory factory = MockSpaceUsageCheckFactory.of(
spaceUsage, Duration.ZERO, inMemory(new AtomicLong(0)));
volumeBuilder.usageCheckFactory(factory);
MutableVolumeSet volumeSet = Mockito.mock(MutableVolumeSet.class);
Mockito.when(volumeSet.getVolumesList())
.thenReturn(Collections.singletonList(volumeBuilder.build()));
try {
UUID scmId = UUID.randomUUID();
ContainerSet containerSet = new ContainerSet(1000);

DatanodeStateMachine stateMachine = Mockito.mock(
DatanodeStateMachine.class);
StateContext context = Mockito.mock(StateContext.class);
Mockito.when(stateMachine.getDatanodeDetails()).thenReturn(dd);
Mockito.when(context.getParent()).thenReturn(stateMachine);
// create a 50 byte container
KeyValueContainerData containerData = new KeyValueContainerData(1L,
layout,
50, UUID.randomUUID().toString(),
dd.getUuidString());
Container container = new KeyValueContainer(containerData, conf);
container.create(volumeSet, new RoundRobinVolumeChoosingPolicy(),
scmId.toString());
containerSet.addContainer(container);
ContainerMetrics metrics = ContainerMetrics.create(conf);
Map<ContainerType, Handler> handlers = Maps.newHashMap();
for (ContainerType containerType : ContainerType.values()) {
handlers.put(containerType,
Handler.getHandlerForContainerType(containerType, conf,
context.getParent().getDatanodeDetails().getUuidString(),
containerSet, volumeSet, metrics, NO_OP_ICR_SENDER));
}
HddsDispatcher hddsDispatcher = new HddsDispatcher(
conf, containerSet, volumeSet, handlers, context, metrics, null);
hddsDispatcher.setClusterId(scmId.toString());
containerData.getVolume().getVolumeInfo()
.ifPresent(volumeInfo -> volumeInfo.incrementUsedSpace(50));
ContainerCommandResponseProto response = hddsDispatcher
.dispatch(getWriteChunkRequest(dd.getUuidString(), 1L, 1L), null);
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
response.getResult());
verify(context, times(1))
.addContainerActionIfAbsent(Mockito.any(ContainerAction.class));

// try creating another container now as the volume used has crossed
// threshold

KeyValueContainerData containerData2 = new KeyValueContainerData(1L,
layout,
50, UUID.randomUUID().toString(),
dd.getUuidString());
Container container2 = new KeyValueContainer(containerData2, conf);
LambdaTestUtils.intercept(StorageContainerException.class,
"Container creation failed, due to disk out of space",
() -> container2.create(volumeSet,
new RoundRobinVolumeChoosingPolicy(), scmId.toString()));

} finally {
volumeSet.shutdown();
ContainerMetrics.remove();
FileUtils.deleteDirectory(new File(testDir));
}
}

@Test
public void testCreateContainerWithWriteChunk() throws IOException {
String testDir =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,10 @@
<value>org.apache.hadoop.hdds.fs.MockSpaceUsageCheckFactory$None</value>
</property>

<property>
<name>hdds.datanode.volume.min.free.space</name>
<value>0MB</value>
</property>


</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ private void addPropertiesNotInXml() {
OMConfigKeys.OZONE_OM_RANGER_HTTPS_ADMIN_API_USER,
OMConfigKeys.OZONE_OM_RANGER_HTTPS_ADMIN_API_PASSWD,
ScmConfigKeys.OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY,
S3GatewayConfigKeys.OZONE_S3G_FSO_DIRECTORY_CREATION_ENABLED
S3GatewayConfigKeys.OZONE_S3G_FSO_DIRECTORY_CREATION_ENABLED,
HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT
));
}
}
Loading