diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 23400b1a06b4..0d99f1116ec4 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -359,6 +359,10 @@ public final class ScmConfigKeys { "ozone.scm.container.size"; public static final String OZONE_SCM_CONTAINER_SIZE_DEFAULT = "5GB"; + public static final String OZONE_SCM_CONTAINER_SPACE_REQUIREMENT_MULTIPLIER = + "ozone.scm.container.space.requirement.multiplier"; + public static final double OZONE_SCM_CONTAINER_SPACE_REQUIREMENT_MULTIPLIER_DEFAULT = 5.0; + public static final String OZONE_SCM_CONTAINER_LOCK_STRIPE_SIZE = "ozone.scm.container.lock.stripes"; public static final int OZONE_SCM_CONTAINER_LOCK_STRIPE_SIZE_DEFAULT = 512; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 5d36eb3b8f29..59150ff320ac 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1035,6 +1035,22 @@ balances the amount of metadata. + + ozone.scm.container.space.requirement.multiplier + 5.0 + OZONE, SCM, MANAGEMENT + + Multiplier for container space requirement when checking if a datanode + has enough space for container allocation. The required space is calculated + as container size multiplied by this value. This prevents concurrent clients + from all passing the space check when there's only enough space for one + container. For example, with default container size of 5GB and multiplier + of 5.0, the system will require 25GB of available space before allocating + a new container. This ensures that if only 6GB is remaining, the check will + fail, preventing multiple clients from attempting to create containers + concurrently when there's only space for one. + + ozone.scm.container.lock.stripes 512 diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java index 2da151faed6a..01624aac76cd 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java @@ -432,6 +432,13 @@ public static long requiredReplicationSpace(long defaultContainerSize) { return 2 * defaultContainerSize; } + public static long requiredReplicationSpace(long defaultContainerSize, ConfigurationSource conf) { + double multiplier = conf.getDouble( + ScmConfigKeys.OZONE_SCM_CONTAINER_SPACE_REQUIREMENT_MULTIPLIER, + ScmConfigKeys.OZONE_SCM_CONTAINER_SPACE_REQUIREMENT_MULTIPLIER_DEFAULT); + return (long) (multiplier * defaultContainerSize); + } + public static Collection getDatanodeStorageDirs(ConfigurationSource conf) { Collection rawLocations = conf.getTrimmedStringCollection(HDDS_DATANODE_DIR_KEY); if (rawLocations.isEmpty()) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java index d255bc9a672d..921fedc77371 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java @@ -34,6 +34,7 @@ import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerInfoProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent; @@ -46,6 +47,7 @@ import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException; import org.apache.hadoop.util.Time; @@ -71,6 +73,7 @@ public class ContainerManagerImpl implements ContainerManager { private final SCMHAManager haManager; private final SequenceIdGenerator sequenceIdGen; + private final OzoneConfiguration conf; // TODO: Revisit this. // Metrics related to operations should be moved to ProtocolServer @@ -99,6 +102,11 @@ public ContainerManagerImpl( this.pipelineManager = pipelineManager; this.haManager = scmHaManager; this.sequenceIdGen = sequenceIdGen; + if (conf instanceof OzoneConfiguration) { + this.conf = (OzoneConfiguration) conf; + } else { + this.conf = new OzoneConfiguration(conf); + } this.containerStateManager = ContainerStateManagerImpl.newBuilder() .setConfiguration(conf) .setPipelineManager(pipelineManager) @@ -352,23 +360,25 @@ public ContainerInfo getMatchingContainer(final long size, final String owner, synchronized (pipeline.getId()) { containerIDs = getContainersForOwner(pipeline, owner); if (containerIDs.size() < getOpenContainerCountPerPipeline(pipeline)) { - if (pipelineManager.hasEnoughSpace(pipeline, maxContainerSize)) { + long requiredSpace = HddsServerUtil.requiredReplicationSpace(maxContainerSize, conf); + if (pipelineManager.hasEnoughSpace(pipeline, requiredSpace)) { allocateContainer(pipeline, owner); containerIDs = getContainersForOwner(pipeline, owner); } else { LOG.debug("Cannot allocate a new container because pipeline {} does not have the required space {}.", - pipeline, maxContainerSize); + pipeline, requiredSpace); } } containerIDs.removeAll(excludedContainerIDs); containerInfo = containerStateManager.getMatchingContainer( size, owner, pipeline.getId(), containerIDs); if (containerInfo == null) { - if (pipelineManager.hasEnoughSpace(pipeline, maxContainerSize)) { + long requiredSpace = HddsServerUtil.requiredReplicationSpace(maxContainerSize, conf); + if (pipelineManager.hasEnoughSpace(pipeline, requiredSpace)) { containerInfo = allocateContainer(pipeline, owner); } else { LOG.debug("Cannot allocate a new container because pipeline {} does not have the required space {}.", - pipeline, maxContainerSize); + pipeline, requiredSpace); } } return containerInfo; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/ECPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/ECPipelineProvider.java index c1f14d8cc655..4da90e8a3f15 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/ECPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/ECPipelineProvider.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -86,9 +87,10 @@ public synchronized Pipeline create(ECReplicationConfig replicationConfig) protected Pipeline create(ECReplicationConfig replicationConfig, List excludedNodes, List favoredNodes) throws IOException { + long requiredSpace = HddsServerUtil.requiredReplicationSpace(containerSizeBytes, conf); List dns = placementPolicy .chooseDatanodes(excludedNodes, favoredNodes, - replicationConfig.getRequiredNodes(), 0, this.containerSizeBytes); + replicationConfig.getRequiredNodes(), 0, requiredSpace); return create(replicationConfig, dns); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java index 491e4d15adf2..7ec47f9d1b1b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hdds.scm.pipeline.leader.choose.algorithms.LeaderChoosePolicy; import org.apache.hadoop.hdds.scm.pipeline.leader.choose.algorithms.LeaderChoosePolicyFactory; import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.protocol.commands.ClosePipelineCommand; import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.CreatePipelineCommand; @@ -162,10 +163,11 @@ public synchronized Pipeline create(RatisReplicationConfig replicationConfig, final ReplicationFactor factor = replicationConfig.getReplicationFactor(); + long requiredSpace = HddsServerUtil.requiredReplicationSpace(containerSizeBytes, conf); switch (factor) { case ONE: dns = pickNodesNotUsed(replicationConfig, minRatisVolumeSizeBytes, - containerSizeBytes, conf); + requiredSpace, conf); break; case THREE: List excludeDueToEngagement = filterPipelineEngagement(); @@ -178,7 +180,7 @@ public synchronized Pipeline create(RatisReplicationConfig replicationConfig, } dns = placementPolicy.chooseDatanodes(excludedNodes, favoredNodes, factor.getNumber(), minRatisVolumeSizeBytes, - containerSizeBytes); + requiredSpace); break; default: throw new IllegalStateException("Unknown factor: " + factor.name()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java index dd5edf381930..4daebb72cd57 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java @@ -27,8 +27,11 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -57,6 +60,7 @@ import org.apache.hadoop.hdds.scm.pipeline.MockPipelineManager; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; import org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException; @@ -179,6 +183,40 @@ public void testGetMatchingContainerReturnsContainerWhenEnoughSpaceInDatanodes() assertNotNull(container); } + @Test + public void testContainerSpaceRequirement() throws IOException { + long sizeRequired = 256 * 1024 * 1024; + long containerSize = 5L * 1024 * 1024 * 1024; + + PipelineManager spyPipelineManager = spy(pipelineManager); + File tempDir = new File(testDir, "tempDir"); + OzoneConfiguration conf = SCMTestUtils.getConf(tempDir); + long expectedSpaceRequirement = HddsServerUtil.requiredReplicationSpace(containerSize, conf); + + ContainerManager manager = new ContainerManagerImpl(conf, + scmhaManager, sequenceIdGen, spyPipelineManager, + SCMDBDefinition.CONTAINERS.getTable(dbStore), pendingOpsMock); + + Pipeline pipeline = spyPipelineManager.getPipelines().iterator().next(); + + doReturn(false).when(spyPipelineManager) + .hasEnoughSpace(any(Pipeline.class), anyLong()); + ContainerInfo container = manager + .getMatchingContainer(sizeRequired, "test", pipeline, Collections.emptySet()); + assertNull(container); + verify(spyPipelineManager, atLeast(1)) + .hasEnoughSpace(eq(pipeline), eq(expectedSpaceRequirement)); + + reset(spyPipelineManager); + doReturn(true).when(spyPipelineManager) + .hasEnoughSpace(any(Pipeline.class), anyLong()); + container = manager + .getMatchingContainer(sizeRequired, "test", pipeline, Collections.emptySet()); + assertNotNull(container); + verify(spyPipelineManager, atLeast(1)) + .hasEnoughSpace(eq(pipeline), eq(expectedSpaceRequirement)); + } + @Test void testUpdateContainerState() throws Exception { final ContainerInfo container = containerManager.allocateContainer( diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestECPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestECPipelineProvider.java index f2511e624f77..b39beebdfa9c 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestECPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestECPipelineProvider.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -68,17 +69,18 @@ public class TestECPipelineProvider { private PipelineStateManager stateManager = mock(PipelineStateManager.class); private PlacementPolicy placementPolicy = mock(PlacementPolicy.class); - private long containerSizeBytes; + private long containerSpaceRequirement; @BeforeEach public void setup() throws IOException, NodeNotFoundException { OzoneConfiguration conf = new OzoneConfiguration(); provider = new ECPipelineProvider( nodeManager, stateManager, conf, placementPolicy); - this.containerSizeBytes = (long) conf.getStorageSize( + long containerSizeBytes = (long) conf.getStorageSize( ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + this.containerSpaceRequirement = HddsServerUtil.requiredReplicationSpace(containerSizeBytes, conf); // Placement policy will always return EC number of random nodes. when(placementPolicy.chooseDatanodes(anyList(), anyList(), anyInt(), anyLong(), @@ -200,7 +202,7 @@ public void testExcludedAndFavoredNodesPassedToPlacementPolicy() assertEquals(ecConf.getData() + ecConf.getParity(), pipeline.getNodes().size()); verify(placementPolicy).chooseDatanodes(excludedNodes, favoredNodes, - ecConf.getRequiredNodes(), 0, containerSizeBytes); + ecConf.getRequiredNodes(), 0, containerSpaceRequirement); } private Set createContainerReplicas(int number) { diff --git a/hadoop-ozone/dist/src/main/k8s/definitions/ozone/config.yaml b/hadoop-ozone/dist/src/main/k8s/definitions/ozone/config.yaml index e646b1c0af88..e7914e55e87f 100644 --- a/hadoop-ozone/dist/src/main/k8s/definitions/ozone/config.yaml +++ b/hadoop-ozone/dist/src/main/k8s/definitions/ozone/config.yaml @@ -31,6 +31,7 @@ data: OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "3" OZONE-SITE.XML_ozone.datanode.pipeline.limit: "1" OZONE-SITE.XML_hdds.datanode.volume.min.free.space: "1GB" + OZONE-SITE.XML_ozone.scm.container.size: "1GB" OZONE-SITE.XML_ozone.metadata.dirs: "/data/metadata" OZONE-SITE.XML_ozone.om.address: "om-0.om" OZONE-SITE.XML_ozone.recon.address: "recon-0.recon" diff --git a/hadoop-ozone/dist/src/main/k8s/examples/getting-started/config-configmap.yaml b/hadoop-ozone/dist/src/main/k8s/examples/getting-started/config-configmap.yaml index 666be83378cc..cef7ba08057b 100644 --- a/hadoop-ozone/dist/src/main/k8s/examples/getting-started/config-configmap.yaml +++ b/hadoop-ozone/dist/src/main/k8s/examples/getting-started/config-configmap.yaml @@ -31,6 +31,7 @@ data: OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "3" OZONE-SITE.XML_ozone.datanode.pipeline.limit: "1" OZONE-SITE.XML_hdds.datanode.volume.min.free.space: "1GB" + OZONE-SITE.XML_ozone.scm.container.size: "1GB" OZONE-SITE.XML_ozone.metadata.dirs: /data/metadata OZONE-SITE.XML_ozone.om.address: om-0.om OZONE-SITE.XML_ozone.recon.address: recon-0.recon diff --git a/hadoop-ozone/dist/src/main/k8s/examples/minikube/config-configmap.yaml b/hadoop-ozone/dist/src/main/k8s/examples/minikube/config-configmap.yaml index 666be83378cc..cef7ba08057b 100644 --- a/hadoop-ozone/dist/src/main/k8s/examples/minikube/config-configmap.yaml +++ b/hadoop-ozone/dist/src/main/k8s/examples/minikube/config-configmap.yaml @@ -31,6 +31,7 @@ data: OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "3" OZONE-SITE.XML_ozone.datanode.pipeline.limit: "1" OZONE-SITE.XML_hdds.datanode.volume.min.free.space: "1GB" + OZONE-SITE.XML_ozone.scm.container.size: "1GB" OZONE-SITE.XML_ozone.metadata.dirs: /data/metadata OZONE-SITE.XML_ozone.om.address: om-0.om OZONE-SITE.XML_ozone.recon.address: recon-0.recon diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone-dev/config-configmap.yaml b/hadoop-ozone/dist/src/main/k8s/examples/ozone-dev/config-configmap.yaml index 55c865fe224b..ccd8d2c96130 100644 --- a/hadoop-ozone/dist/src/main/k8s/examples/ozone-dev/config-configmap.yaml +++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone-dev/config-configmap.yaml @@ -31,6 +31,7 @@ data: OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "3" OZONE-SITE.XML_ozone.datanode.pipeline.limit: "1" OZONE-SITE.XML_hdds.datanode.volume.min.free.space: "1GB" + OZONE-SITE.XML_ozone.scm.container.size: "1GB" OZONE-SITE.XML_ozone.metadata.dirs: /data/metadata OZONE-SITE.XML_ozone.om.address: om-0.om OZONE-SITE.XML_ozone.recon.address: recon-0.recon diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone-ha/config-configmap.yaml b/hadoop-ozone/dist/src/main/k8s/examples/ozone-ha/config-configmap.yaml index 596fddfd9c39..f433cc04187c 100644 --- a/hadoop-ozone/dist/src/main/k8s/examples/ozone-ha/config-configmap.yaml +++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone-ha/config-configmap.yaml @@ -31,6 +31,7 @@ data: OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "3" OZONE-SITE.XML_ozone.datanode.pipeline.limit: "1" OZONE-SITE.XML_hdds.datanode.volume.min.free.space: "1GB" + OZONE-SITE.XML_ozone.scm.container.size: "1GB" OZONE-SITE.XML_ozone.metadata.dirs: /data/metadata OZONE-SITE.XML_ozone.om.address: om-0.om OZONE-SITE.XML_ozone.recon.address: recon-0.recon diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone/config-configmap.yaml b/hadoop-ozone/dist/src/main/k8s/examples/ozone/config-configmap.yaml index 68a5697992c0..4faf6888bde5 100644 --- a/hadoop-ozone/dist/src/main/k8s/examples/ozone/config-configmap.yaml +++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone/config-configmap.yaml @@ -31,6 +31,7 @@ data: OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "3" OZONE-SITE.XML_ozone.datanode.pipeline.limit: "1" OZONE-SITE.XML_hdds.datanode.volume.min.free.space: "1GB" + OZONE-SITE.XML_ozone.scm.container.size: "1GB" OZONE-SITE.XML_ozone.metadata.dirs: /data/metadata OZONE-SITE.XML_ozone.om.address: om-0.om OZONE-SITE.XML_ozone.recon.address: recon-0.recon