From e5d4d8671deda8a2a416724da160f5c4a4118955 Mon Sep 17 00:00:00 2001 From: ashishk Date: Fri, 14 Mar 2025 11:24:55 +0530 Subject: [PATCH 1/3] HDDS-12426. SCM replication should check double of container size. --- .../replication/ReplicationManagerUtil.java | 2 +- .../container/replication/ReplicationTestUtil.java | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java index f0be5b231d99..a423a28679bc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java @@ -78,7 +78,7 @@ public static List getTargetDatanodes(PlacementPolicy policy, // Ensure that target datanodes have enough space to hold a complete // container. final long dataSizeRequired = - Math.max(container.getUsedBytes(), defaultContainerSize); + Math.max(container.getUsedBytes(), defaultContainerSize * 2); int mutableRequiredNodes = requiredNodes; while (mutableRequiredNodes > 0) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java index 93c670e3ab4f..436ef6f76b66 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyBoolean; import static org.mockito.Mockito.anyInt; @@ -36,6 +37,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -43,6 +45,7 @@ import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; @@ -328,6 +331,9 @@ protected List chooseDatanodesInternal( List favoredNodes, int nodesRequiredToChoose, long metadataSizeRequired, long dataSizeRequired) throws SCMException { + long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + assertEquals(containerSize * 2, dataSizeRequired); if (nodesRequiredToChoose > 1) { throw new IllegalArgumentException("Only one node is allowed"); } @@ -356,6 +362,9 @@ protected List chooseDatanodesInternal( List favoredNodes, int nodesRequiredToChoose, long metadataSizeRequired, long dataSizeRequired) throws SCMException { + long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + assertEquals(containerSize * 2, dataSizeRequired); throw new SCMException("No nodes available", FAILED_TO_FIND_SUITABLE_NODE); } @@ -383,6 +392,9 @@ protected List chooseDatanodesInternal( List favoredNodes, int nodesRequiredToChoose, long metadataSizeRequired, long dataSizeRequired) throws SCMException { + long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + assertEquals(containerSize * 2, dataSizeRequired); if (nodesRequiredToChoose >= throwWhenThisOrMoreNodesRequested) { throw new SCMException("No nodes available", FAILED_TO_FIND_SUITABLE_NODE); From 6f340c106f0a84e376b45029ef4a8a72d177247d Mon Sep 17 00:00:00 2001 From: ashishk Date: Tue, 18 Mar 2025 11:11:12 +0530 Subject: [PATCH 2/3] Extract space requirement to common place --- .../src/main/java/org/apache/hadoop/hdds/HddsUtils.java | 5 +++++ .../ozone/container/replication/ContainerImporter.java | 3 ++- .../scm/container/replication/ReplicationManagerUtil.java | 3 ++- .../scm/container/replication/ReplicationTestUtil.java | 7 ++++--- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java index f35141d68aae..008009f4098c 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java @@ -724,6 +724,11 @@ public static boolean shouldNotFailoverOnRpcException(Throwable exception) { return exception instanceof InvalidProtocolBufferException; } + public static long requiredReplicationSpace(long defaultContainerSize) { + // During container import it requires double the container size to hold container in tmp and dest directory + return 2 * defaultContainerSize; + } + /** * Remove binary data from request {@code msg}. (May be incomplete, feel * free to add any missing cleanups.) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerImporter.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerImporter.java index 46bbb6662015..ac52f23354c9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerImporter.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerImporter.java @@ -26,6 +26,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.Set; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -149,7 +150,7 @@ HddsVolume chooseNextVolume() throws IOException { // Choose volume that can hold both container in tmp and dest directory return volumeChoosingPolicy.chooseVolume( StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), - containerSize * 2); + HddsUtils.requiredReplicationSpace(containerSize)); } public static Path getUntarDirectory(HddsVolume hddsVolume) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java index a423a28679bc..03727606f55d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java @@ -28,6 +28,7 @@ import java.util.UUID; import java.util.function.Function; import java.util.stream.Collectors; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; @@ -78,7 +79,7 @@ public static List getTargetDatanodes(PlacementPolicy policy, // Ensure that target datanodes have enough space to hold a complete // container. final long dataSizeRequired = - Math.max(container.getUsedBytes(), defaultContainerSize * 2); + Math.max(container.getUsedBytes(), HddsUtils.requiredReplicationSpace(defaultContainerSize)); int mutableRequiredNodes = requiredNodes; while (mutableRequiredNodes > 0) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java index 436ef6f76b66..c811d719c51e 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java @@ -35,6 +35,7 @@ import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; @@ -333,7 +334,7 @@ protected List chooseDatanodesInternal( throws SCMException { long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); - assertEquals(containerSize * 2, dataSizeRequired); + assertEquals(HddsUtils.requiredReplicationSpace(containerSize), dataSizeRequired); if (nodesRequiredToChoose > 1) { throw new IllegalArgumentException("Only one node is allowed"); } @@ -364,7 +365,7 @@ protected List chooseDatanodesInternal( throws SCMException { long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); - assertEquals(containerSize * 2, dataSizeRequired); + assertEquals(HddsUtils.requiredReplicationSpace(containerSize), dataSizeRequired); throw new SCMException("No nodes available", FAILED_TO_FIND_SUITABLE_NODE); } @@ -394,7 +395,7 @@ protected List chooseDatanodesInternal( throws SCMException { long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); - assertEquals(containerSize * 2, dataSizeRequired); + assertEquals(HddsUtils.requiredReplicationSpace(containerSize), dataSizeRequired); if (nodesRequiredToChoose >= throwWhenThisOrMoreNodesRequested) { throw new SCMException("No nodes available", FAILED_TO_FIND_SUITABLE_NODE); From af274fb966a9e1c39def7bf93282f50da5dc89bf Mon Sep 17 00:00:00 2001 From: ashishk Date: Tue, 18 Mar 2025 11:38:10 +0530 Subject: [PATCH 3/3] Fix review comment --- .../src/main/java/org/apache/hadoop/hdds/HddsUtils.java | 5 ----- .../ozone/container/replication/ContainerImporter.java | 4 ++-- .../java/org/apache/hadoop/hdds/utils/HddsServerUtil.java | 5 +++++ .../scm/container/replication/ReplicationManagerUtil.java | 4 ++-- .../scm/container/replication/ReplicationTestUtil.java | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java index 008009f4098c..f35141d68aae 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java @@ -724,11 +724,6 @@ public static boolean shouldNotFailoverOnRpcException(Throwable exception) { return exception instanceof InvalidProtocolBufferException; } - public static long requiredReplicationSpace(long defaultContainerSize) { - // During container import it requires double the container size to hold container in tmp and dest directory - return 2 * defaultContainerSize; - } - /** * Remove binary data from request {@code msg}. (May be incomplete, feel * free to add any missing cleanups.) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerImporter.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerImporter.java index ac52f23354c9..90fc50f84ab3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerImporter.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerImporter.java @@ -26,12 +26,12 @@ import java.util.Collections; import java.util.HashSet; import java.util.Set; -import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; @@ -150,7 +150,7 @@ HddsVolume chooseNextVolume() throws IOException { // Choose volume that can hold both container in tmp and dest directory return volumeChoosingPolicy.chooseVolume( StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), - HddsUtils.requiredReplicationSpace(containerSize)); + HddsServerUtil.requiredReplicationSpace(containerSize)); } public static Path getUntarDirectory(HddsVolume hddsVolume) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java index 5d04f2060f59..f554990fc941 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java @@ -403,6 +403,11 @@ public static Collection getOzoneDatanodeRatisDirectory( return rawLocations; } + public static long requiredReplicationSpace(long defaultContainerSize) { + // During container import it requires double the container size to hold container in tmp and dest directory + return 2 * defaultContainerSize; + } + public static Collection getDatanodeStorageDirs(ConfigurationSource conf) { Collection rawLocations = conf.getTrimmedStringCollection(HDDS_DATANODE_DIR_KEY); if (rawLocations.isEmpty()) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java index 03727606f55d..503126198a0d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java @@ -28,7 +28,6 @@ import java.util.UUID; import java.util.function.Function; import java.util.stream.Collectors; -import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; @@ -38,6 +37,7 @@ import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -79,7 +79,7 @@ public static List getTargetDatanodes(PlacementPolicy policy, // Ensure that target datanodes have enough space to hold a complete // container. final long dataSizeRequired = - Math.max(container.getUsedBytes(), HddsUtils.requiredReplicationSpace(defaultContainerSize)); + HddsServerUtil.requiredReplicationSpace(Math.max(container.getUsedBytes(), defaultContainerSize)); int mutableRequiredNodes = requiredNodes; while (mutableRequiredNodes > 0) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java index c811d719c51e..9cc264b3c70d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java @@ -35,7 +35,6 @@ import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; @@ -55,6 +54,7 @@ import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.Node; import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand; import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand; import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; @@ -334,7 +334,7 @@ protected List chooseDatanodesInternal( throws SCMException { long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); - assertEquals(HddsUtils.requiredReplicationSpace(containerSize), dataSizeRequired); + assertEquals(HddsServerUtil.requiredReplicationSpace(containerSize), dataSizeRequired); if (nodesRequiredToChoose > 1) { throw new IllegalArgumentException("Only one node is allowed"); } @@ -365,7 +365,7 @@ protected List chooseDatanodesInternal( throws SCMException { long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); - assertEquals(HddsUtils.requiredReplicationSpace(containerSize), dataSizeRequired); + assertEquals(HddsServerUtil.requiredReplicationSpace(containerSize), dataSizeRequired); throw new SCMException("No nodes available", FAILED_TO_FIND_SUITABLE_NODE); } @@ -395,7 +395,7 @@ protected List chooseDatanodesInternal( throws SCMException { long containerSize = (long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); - assertEquals(HddsUtils.requiredReplicationSpace(containerSize), dataSizeRequired); + assertEquals(HddsServerUtil.requiredReplicationSpace(containerSize), dataSizeRequired); if (nodesRequiredToChoose >= throwWhenThisOrMoreNodesRequested) { throw new SCMException("No nodes available", FAILED_TO_FIND_SUITABLE_NODE);