From 163dc8b58ee2a88cc505671bb2f1799bb745049f Mon Sep 17 00:00:00 2001 From: Duong Nguyen Date: Fri, 3 May 2024 09:09:05 -0700 Subject: [PATCH 01/17] HDDS-10780. NullPointerException in watchForCommit (#6627) (cherry picked from commit e1d2d9c832e8688787faeff7e58d2a9d24ec0391) --- .../java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java index 58a2153352a4..5c7d7480070a 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java @@ -149,7 +149,8 @@ public long updateCommitInfosMap( } else { stream = commitInfoProtos.stream().map(proto -> commitInfoMap .computeIfPresent(RatisHelper.toDatanodeId(proto.getServer()), - (address, index) -> proto.getCommitIndex())); + (address, index) -> proto.getCommitIndex())) + .filter(Objects::nonNull); } return stream.mapToLong(Long::longValue).min().orElse(0); } From 9d1ad1aeba1c51f9341e9207a714e69b594a78fa Mon Sep 17 00:00:00 2001 From: hao guo Date: Thu, 9 May 2024 00:38:54 +0800 Subject: [PATCH 02/17] HDDS-10777. S3 Gateway error when parsing XML concurrently (#6609) (cherry picked from commit ff78dc83a7640bf1dbed409bc6e79b3a0f231483) --- ...eteMultipartUploadRequestUnmarshaller.java | 6 +-- .../MultiDeleteRequestUnmarshaller.java | 6 +-- .../PutBucketAclRequestUnmarshaller.java | 6 +-- ...eteMultipartUploadRequestUnmarshaller.java | 44 +++++++++++++++++++ 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/CompleteMultipartUploadRequestUnmarshaller.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/CompleteMultipartUploadRequestUnmarshaller.java index 17f7f575a6ca..cdaaa228ecd7 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/CompleteMultipartUploadRequestUnmarshaller.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/CompleteMultipartUploadRequestUnmarshaller.java @@ -44,14 +44,13 @@ public class CompleteMultipartUploadRequestUnmarshaller implements MessageBodyReader { private final JAXBContext context; - private final XMLReader xmlReader; + private final SAXParserFactory saxParserFactory; public CompleteMultipartUploadRequestUnmarshaller() { try { context = JAXBContext.newInstance(CompleteMultipartUploadRequest.class); - SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); + saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - xmlReader = saxParserFactory.newSAXParser().getXMLReader(); } catch (Exception ex) { throw new AssertionError("Can not instantiate " + "CompleteMultipartUploadRequest parser", ex); @@ -70,6 +69,7 @@ public CompleteMultipartUploadRequest readFrom( MultivaluedMap multivaluedMap, InputStream inputStream) throws IOException, WebApplicationException { try { + XMLReader xmlReader = saxParserFactory.newSAXParser().getXMLReader(); UnmarshallerHandler unmarshallerHandler = context.createUnmarshaller().getUnmarshallerHandler(); XmlNamespaceFilter filter = diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/MultiDeleteRequestUnmarshaller.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/MultiDeleteRequestUnmarshaller.java index f5745a8fc102..0c34c08091aa 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/MultiDeleteRequestUnmarshaller.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/MultiDeleteRequestUnmarshaller.java @@ -43,14 +43,13 @@ public class MultiDeleteRequestUnmarshaller implements MessageBodyReader { private final JAXBContext context; - private final XMLReader xmlReader; + private final SAXParserFactory saxParserFactory; public MultiDeleteRequestUnmarshaller() { try { context = JAXBContext.newInstance(MultiDeleteRequest.class); - SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); + saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - xmlReader = saxParserFactory.newSAXParser().getXMLReader(); } catch (Exception ex) { throw new AssertionError("Can't instantiate MultiDeleteRequest parser", ex); @@ -68,6 +67,7 @@ public MultiDeleteRequest readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) { try { + XMLReader xmlReader = saxParserFactory.newSAXParser().getXMLReader(); UnmarshallerHandler unmarshallerHandler = context.createUnmarshaller().getUnmarshallerHandler(); diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/PutBucketAclRequestUnmarshaller.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/PutBucketAclRequestUnmarshaller.java index 3ca2e47c469e..3fa6149815ea 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/PutBucketAclRequestUnmarshaller.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/PutBucketAclRequestUnmarshaller.java @@ -44,14 +44,13 @@ public class PutBucketAclRequestUnmarshaller implements MessageBodyReader { private final JAXBContext context; - private final XMLReader xmlReader; + private final SAXParserFactory saxParserFactory; public PutBucketAclRequestUnmarshaller() { try { context = JAXBContext.newInstance(S3BucketAcl.class); - SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); + saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - xmlReader = saxParserFactory.newSAXParser().getXMLReader(); } catch (Exception ex) { throw new AssertionError("Can not instantiate " + "PutBucketAclRequest parser", ex); @@ -70,6 +69,7 @@ public S3BucketAcl readFrom( MultivaluedMap multivaluedMap, InputStream inputStream) throws IOException, WebApplicationException { try { + XMLReader xmlReader = saxParserFactory.newSAXParser().getXMLReader(); UnmarshallerHandler unmarshallerHandler = context.createUnmarshaller().getUnmarshallerHandler(); XmlNamespaceFilter filter = diff --git a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/endpoint/TestCompleteMultipartUploadRequestUnmarshaller.java b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/endpoint/TestCompleteMultipartUploadRequestUnmarshaller.java index cd0fbfed4e65..1872c440da31 100644 --- a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/endpoint/TestCompleteMultipartUploadRequestUnmarshaller.java +++ b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/s3/endpoint/TestCompleteMultipartUploadRequestUnmarshaller.java @@ -20,12 +20,16 @@ import java.io.ByteArrayInputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.UUID; +import java.util.concurrent.CompletableFuture; + import org.junit.jupiter.api.Test; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; /** * Class tests Unmarshall logic of {@link CompleteMultipartUploadRequest}. @@ -75,6 +79,7 @@ public void fromStreamWithoutNamespace() throws IOException { } private void checkContent(CompleteMultipartUploadRequest request) { + assertNotNull(request); assertEquals(2, request.getPartList().size()); List parts = @@ -89,4 +94,43 @@ private CompleteMultipartUploadRequest unmarshall( return new CompleteMultipartUploadRequestUnmarshaller() .readFrom(null, null, null, null, null, inputBody); } + + @Test + public void concurrentParse() { + CompleteMultipartUploadRequestUnmarshaller unmarshaller = + new CompleteMultipartUploadRequestUnmarshaller(); + byte[] bytes = ("" + "" + part1 + + "1" + + part2 + "2" + + "").getBytes( + UTF_8); + + List> futures = + new ArrayList<>(); + for (int i = 0; i < 40; i++) { + futures.add(CompletableFuture.supplyAsync(() -> { + try { + //GIVEN + ByteArrayInputStream inputBody = new ByteArrayInputStream(bytes); + //WHEN + return unmarshall(unmarshaller, inputBody); + } catch (IOException e) { + return null; + } + })); + } + + for (CompletableFuture future : futures) { + CompleteMultipartUploadRequest request = future.join(); + //THEN + checkContent(request); + } + } + + private CompleteMultipartUploadRequest unmarshall( + CompleteMultipartUploadRequestUnmarshaller unmarshaller, + ByteArrayInputStream inputBody) throws IOException { + return unmarshaller + .readFrom(null, null, null, null, null, inputBody); + } } From 995aacd9b7346b7b27466141e668be4e05b1ba62 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Sun, 2 Jun 2024 01:19:49 +0800 Subject: [PATCH 03/17] HDDS-10918. NPE in OM when leader transfers (#6735) (cherry picked from commit 2ade7faa942e40dbb44c557d742e1f30ba6b0cb2) --- .../hadoop/ozone/om/exceptions/OMNotLeaderException.java | 7 +++++++ .../hadoop/ozone/om/ratis/OzoneManagerRatisServer.java | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMNotLeaderException.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMNotLeaderException.java index 1abd92b55f8b..91d541dcf7ad 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMNotLeaderException.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMNotLeaderException.java @@ -56,6 +56,13 @@ public OMNotLeaderException(RaftPeerId currentPeerId, this.leaderAddress = suggestedLeaderAddress; } + public OMNotLeaderException(String msg) { + super(msg); + this.currentPeerId = null; + this.leaderPeerId = null; + this.leaderAddress = null; + } + public String getSuggestedLeaderNodeId() { return leaderPeerId; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index 48e41925a133..691c7ec56e7b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -72,6 +72,7 @@ import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.SetConfigurationRequest; import org.apache.ratis.protocol.exceptions.LeaderNotReadyException; +import org.apache.ratis.protocol.exceptions.LeaderSteppingDownException; import org.apache.ratis.protocol.exceptions.NotLeaderException; import org.apache.ratis.protocol.exceptions.StateMachineException; import org.apache.ratis.protocol.Message; @@ -493,6 +494,11 @@ private OMResponse createOmResponseImpl(OMRequest omRequest, leaderNotReadyException.getMessage())); } + LeaderSteppingDownException leaderSteppingDownException = reply.getLeaderSteppingDownException(); + if (leaderSteppingDownException != null) { + throw new ServiceException(new OMNotLeaderException(leaderSteppingDownException.getMessage())); + } + StateMachineException stateMachineException = reply.getStateMachineException(); if (stateMachineException != null) { From dd7d1ff461d67d13dd7f67fea7a39025c12ae247 Mon Sep 17 00:00:00 2001 From: Siddhant Sangwan Date: Sat, 27 Jul 2024 04:08:58 +0800 Subject: [PATCH 04/17] HDDS-11136. Some containers affected by HDDS-8129 may still be in the DELETING state incorrectly (#6967) (cherry picked from commit 69ba680c515a519a2e2fef611efe151aa033d7cd) --- .../AbstractContainerReportHandler.java | 12 +- .../hdds/scm/container/ContainerManager.java | 10 ++ .../scm/container/ContainerManagerImpl.java | 15 ++ .../scm/container/ContainerStateManager.java | 12 ++ .../container/ContainerStateManagerImpl.java | 23 +++ .../container/TestContainerManagerImpl.java | 59 ++++++ .../container/TestContainerReportHandler.java | 107 ++++++++++- .../container/TestContainerStateManager.java | 44 +++++ .../TestContainerReportHandling.java | 156 ++++++++++++++++ .../TestContainerReportHandlingWithHA.java | 168 ++++++++++++++++++ .../hadoop/ozone/container/TestHelper.java | 21 ++- 11 files changed, 616 insertions(+), 11 deletions(-) create mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java create mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java index 7e163ac306f8..34682b85bccd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java @@ -349,8 +349,18 @@ private boolean updateContainerState(final DatanodeDetails datanode, break; case DELETING: /* - * The container is under deleting. do nothing. + HDDS-11136: If a DELETING container has a non-empty CLOSED replica, the container should be moved back to CLOSED + state. */ + boolean replicaStateAllowed = replica.getState() == State.CLOSED; + boolean replicaNotEmpty = replica.hasIsEmpty() && !replica.getIsEmpty(); + if (replicaStateAllowed && replicaNotEmpty) { + logger.info("Moving DELETING container {} to CLOSED state, datanode {} reported replica with state={}, " + + "isEmpty={} and keyCount={}.", containerId, datanode.getHostName(), replica.getState(), false, + replica.getKeyCount()); + containerManager.transitionDeletingToClosedState(containerId); + } + break; case DELETED: /* diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java index 2a60e268ff4a..3eba240533e3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java @@ -131,6 +131,16 @@ void updateContainerState(ContainerID containerID, LifeCycleEvent event) throws IOException, InvalidStateTransitionException; + /** + * Bypasses the container state machine to change a container's state from DELETING to CLOSED. This API was + * introduced to fix a bug (HDDS-11136), and should be used with care otherwise. + * + * @see HDDS-11136 + * @param containerID id of the container to transition + * @throws IOException + */ + void transitionDeletingToClosedState(ContainerID containerID) throws IOException; + /** * Returns the latest list of replicas for given containerId. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java index 34604edb3b03..53fc615cca2f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java @@ -282,6 +282,21 @@ public void updateContainerState(final ContainerID cid, } } + @Override + public void transitionDeletingToClosedState(ContainerID containerID) throws IOException { + HddsProtos.ContainerID proto = containerID.getProtobuf(); + lock.lock(); + try { + if (containerExist(containerID)) { + containerStateManager.transitionDeletingToClosedState(proto); + } else { + throwContainerNotFoundException(containerID); + } + } finally { + lock.unlock(); + } + } + @Override public Set getContainerReplicas(final ContainerID id) throws ContainerNotFoundException { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java index 9f741068a86e..4f478b201cd4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java @@ -151,6 +151,18 @@ void updateContainerState(HddsProtos.ContainerID id, HddsProtos.LifeCycleEvent event) throws IOException, InvalidStateTransitionException; + + /** + * Bypasses the container state machine to change a container's state from DELETING to CLOSED. This API was + * introduced to fix a bug (HDDS-11136), and should be used with care otherwise. + * + * @see HDDS-11136 + * @param id id of the container to transition + * @throws IOException + */ + @Replicate + void transitionDeletingToClosedState(HddsProtos.ContainerID id) throws IOException; + /** * */ diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 62e1f0193561..c140e97bbc5d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.common.helpers.InvalidContainerStateException; import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaPendingOps; import org.apache.hadoop.hdds.scm.container.states.ContainerState; import org.apache.hadoop.hdds.scm.container.states.ContainerStateMap; @@ -367,6 +368,28 @@ public void updateContainerState(final HddsProtos.ContainerID containerID, } } + @Override + public void transitionDeletingToClosedState(HddsProtos.ContainerID containerID) throws IOException { + final ContainerID id = ContainerID.getFromProtobuf(containerID); + + try (AutoCloseableLock ignored = writeLock(id)) { + if (containers.contains(id)) { + final ContainerInfo oldInfo = containers.getContainerInfo(id); + final LifeCycleState oldState = oldInfo.getState(); + if (oldState != DELETING) { + throw new InvalidContainerStateException("Cannot transition container " + id + " from " + oldState + + " back to CLOSED. The container must be in the DELETING state."); + } + ExecutionUtil.create(() -> { + containers.updateState(id, oldState, CLOSED); + transactionBuffer.addToBuffer(containerStore, id, containers.getContainerInfo(id)); + }).onException(() -> { + transactionBuffer.addToBuffer(containerStore, id, oldInfo); + containers.updateState(id, CLOSED, oldState); + }).execute(); + } + } + } @Override public Set getContainerReplicas(final ContainerID id) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java index dca8498e38ff..567c5fa61ded 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerManagerImpl.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; +import org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterEach; @@ -52,6 +53,8 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertEquals; import static java.util.Collections.emptyList; import static java.util.stream.Collectors.toList; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.OPEN; @@ -139,6 +142,62 @@ void testUpdateContainerState() throws Exception { containerManager.getContainer(cid).getState()); } + @Test + void testTransitionDeletingToClosedState() throws IOException, InvalidStateTransitionException { + // allocate OPEN Ratis and Ec containers, and do a series of state changes to transition them to DELETING + final ContainerInfo container = containerManager.allocateContainer( + RatisReplicationConfig.getInstance( + ReplicationFactor.THREE), "admin"); + ContainerInfo ecContainer = containerManager.allocateContainer(new ECReplicationConfig(3, 2), "admin"); + final ContainerID cid = container.containerID(); + final ContainerID ecCid = ecContainer.containerID(); + assertEquals(LifeCycleState.OPEN, containerManager.getContainer(cid).getState()); + assertEquals(LifeCycleState.OPEN, containerManager.getContainer(ecCid).getState()); + + // OPEN -> CLOSING + containerManager.updateContainerState(cid, + HddsProtos.LifeCycleEvent.FINALIZE); + containerManager.updateContainerState(ecCid, HddsProtos.LifeCycleEvent.FINALIZE); + assertEquals(LifeCycleState.CLOSING, containerManager.getContainer(cid).getState()); + assertEquals(LifeCycleState.CLOSING, containerManager.getContainer(ecCid).getState()); + + // CLOSING -> CLOSED + containerManager.updateContainerState(cid, HddsProtos.LifeCycleEvent.CLOSE); + containerManager.updateContainerState(ecCid, HddsProtos.LifeCycleEvent.CLOSE); + assertEquals(LifeCycleState.CLOSED, containerManager.getContainer(cid).getState()); + assertEquals(LifeCycleState.CLOSED, containerManager.getContainer(ecCid).getState()); + + // CLOSED -> DELETING + containerManager.updateContainerState(cid, HddsProtos.LifeCycleEvent.DELETE); + containerManager.updateContainerState(ecCid, HddsProtos.LifeCycleEvent.DELETE); + assertEquals(LifeCycleState.DELETING, containerManager.getContainer(cid).getState()); + assertEquals(LifeCycleState.DELETING, containerManager.getContainer(ecCid).getState()); + + // DELETING -> CLOSED + containerManager.transitionDeletingToClosedState(cid); + containerManager.transitionDeletingToClosedState(ecCid); + // the containers should be back in CLOSED state now + assertEquals(LifeCycleState.CLOSED, containerManager.getContainer(cid).getState()); + assertEquals(LifeCycleState.CLOSED, containerManager.getContainer(ecCid).getState()); + } + + @Test + void testTransitionDeletingToClosedStateAllowsOnlyDeletingContainers() throws IOException { + // test for RATIS container + final ContainerInfo container = containerManager.allocateContainer( + RatisReplicationConfig.getInstance( + ReplicationFactor.THREE), "admin"); + final ContainerID cid = container.containerID(); + assertEquals(LifeCycleState.OPEN, containerManager.getContainer(cid).getState()); + assertThrows(IOException.class, () -> containerManager.transitionDeletingToClosedState(cid)); + + // test for EC container + final ContainerInfo ecContainer = containerManager.allocateContainer(new ECReplicationConfig(3, 2), "admin"); + final ContainerID ecCid = ecContainer.containerID(); + assertEquals(LifeCycleState.OPEN, containerManager.getContainer(ecCid).getState()); + assertThrows(IOException.class, () -> containerManager.transitionDeletingToClosedState(ecCid)); + } + @Test void testGetContainers() throws Exception { Assertions.assertEquals(emptyList(), containerManager.getContainers()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java index 30a5c13ea21e..5c7516ec5fbd 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java @@ -71,6 +71,7 @@ import java.util.stream.Stream; import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; +import static org.apache.hadoop.hdds.scm.HddsTestUtils.getContainerReports; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.apache.hadoop.hdds.scm.HddsTestUtils.getECContainer; import static org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas; @@ -102,7 +103,6 @@ public void setup() throws IOException, InvalidStateTransitionException, dbStore = DBStoreBuilder.createDBStore( conf, new SCMDBDefinition()); scmhaManager = SCMHAManagerStub.getInstance(true); - nodeManager = new MockNodeManager(true, 10); pipelineManager = new MockPipelineManager(dbStore, scmhaManager, nodeManager); containerStateManager = ContainerStateManagerImpl.newBuilder() @@ -153,6 +153,10 @@ public void setup() throws IOException, InvalidStateTransitionException, }).when(containerManager).removeContainerReplica( Mockito.any(ContainerID.class), Mockito.any(ContainerReplica.class)); + Mockito.doAnswer(invocation -> { + containerStateManager.transitionDeletingToClosedState(((ContainerID) invocation.getArgument(0)).getProtobuf()); + return null; + }).when(containerManager).transitionDeletingToClosedState(Mockito.any(ContainerID.class)); } @AfterEach @@ -451,6 +455,107 @@ public void testClosingToClosedForECContainer() containerManager.getContainer(container2.containerID()).getState()); } + /** + * Tests that a DELETING RATIS container transitions to CLOSED if a non-empty CLOSED replica is reported. It does not + * transition if a non-empty CLOSING replica is reported. + */ + @Test + public void ratisContainerShouldTransitionFromDeletingToClosedWhenNonEmptyClosedReplica() throws IOException { + ContainerInfo container = getContainer(LifeCycleState.DELETING); + containerStateManager.addContainer(container.getProtobuf()); + + // set up a non-empty CLOSED replica + DatanodeDetails dnWithClosedReplica = nodeManager.getNodes(NodeStatus.inServiceHealthy()).get(0); + ContainerReplicaProto.Builder builder = ContainerReplicaProto.newBuilder(); + ContainerReplicaProto closedReplica = builder.setContainerID(container.getContainerID()) + .setIsEmpty(false) + .setState(ContainerReplicaProto.State.CLOSED) + .setKeyCount(0) + .setBlockCommitSequenceId(123) + .setOriginNodeId(dnWithClosedReplica.getUuidString()).build(); + + // set up a non-empty CLOSING replica + DatanodeDetails dnWithClosingReplica = nodeManager.getNodes(NodeStatus.inServiceHealthy()).get(1); + ContainerReplicaProto closingReplica = builder.setState(ContainerReplicaProto.State.CLOSING) + .setOriginNodeId(dnWithClosingReplica.getUuidString()).build(); + + // should not transition on processing the CLOSING replica's report + ContainerReportHandler containerReportHandler = new ContainerReportHandler(nodeManager, containerManager); + ContainerReportsProto closingContainerReport = getContainerReports(closingReplica); + containerReportHandler + .onMessage(new ContainerReportFromDatanode(dnWithClosingReplica, closingContainerReport), publisher); + + assertEquals(LifeCycleState.DELETING, containerStateManager.getContainer(container.containerID()).getState()); + + // should transition on processing the CLOSED replica's report + ContainerReportsProto closedContainerReport = getContainerReports(closedReplica); + containerReportHandler + .onMessage(new ContainerReportFromDatanode(dnWithClosedReplica, closedContainerReport), publisher); + assertEquals(LifeCycleState.CLOSED, containerStateManager.getContainer(container.containerID()).getState()); + } + + @Test + public void ratisContainerShouldNotTransitionFromDeletingToClosedWhenEmptyClosedReplica() throws IOException { + ContainerInfo container = getContainer(LifeCycleState.DELETING); + containerStateManager.addContainer(container.getProtobuf()); + + // set up an empty CLOSED replica + DatanodeDetails dnWithClosedReplica = nodeManager.getNodes(NodeStatus.inServiceHealthy()).get(0); + ContainerReplicaProto.Builder builder = ContainerReplicaProto.newBuilder(); + ContainerReplicaProto closedReplica = builder.setContainerID(container.getContainerID()) + .setIsEmpty(true) + .setState(ContainerReplicaProto.State.CLOSED) + .setKeyCount(0) + .setBlockCommitSequenceId(123) + .setOriginNodeId(dnWithClosedReplica.getUuidString()).build(); + + ContainerReportHandler containerReportHandler = new ContainerReportHandler(nodeManager, containerManager); + ContainerReportsProto closedContainerReport = getContainerReports(closedReplica); + containerReportHandler + .onMessage(new ContainerReportFromDatanode(dnWithClosedReplica, closedContainerReport), publisher); + assertEquals(LifeCycleState.DELETING, containerStateManager.getContainer(container.containerID()).getState()); + } + + /** + * Tests that a DELETING EC container transitions to CLOSED if a non-empty CLOSED replica is reported. It does not + * transition if a non-empty CLOSING (or any other state) replica is reported. + */ + @Test + public void ecContainerShouldTransitionFromDeletingToClosedWhenNonEmptyClosedReplica() throws IOException { + ContainerInfo container = getECContainer(LifeCycleState.DELETING, PipelineID.randomId(), + new ECReplicationConfig(6, 3)); + containerStateManager.addContainer(container.getProtobuf()); + + // set up a non-empty CLOSED replica + DatanodeDetails dnWithClosedReplica = nodeManager.getNodes(NodeStatus.inServiceHealthy()).get(0); + ContainerReplicaProto.Builder builder = ContainerReplicaProto.newBuilder(); + ContainerReplicaProto closedReplica = builder.setContainerID(container.getContainerID()) + .setIsEmpty(false) + .setState(ContainerReplicaProto.State.CLOSED) + .setKeyCount(0) + .setBlockCommitSequenceId(0) + .setReplicaIndex(1) + .setOriginNodeId(dnWithClosedReplica.getUuidString()).build(); + + // set up a non-empty CLOSING replica + DatanodeDetails dnWithClosingReplica = nodeManager.getNodes(NodeStatus.inServiceHealthy()).get(1); + ContainerReplicaProto closingReplica = builder.setState(ContainerReplicaProto.State.CLOSING).setReplicaIndex(2) + .setOriginNodeId(dnWithClosingReplica.getUuidString()).build(); + + // should not transition on processing the CLOSING replica's report + ContainerReportHandler containerReportHandler = new ContainerReportHandler(nodeManager, containerManager); + ContainerReportsProto closingContainerReport = getContainerReports(closingReplica); + containerReportHandler + .onMessage(new ContainerReportFromDatanode(dnWithClosingReplica, closingContainerReport), publisher); + assertEquals(LifeCycleState.DELETING, containerStateManager.getContainer(container.containerID()).getState()); + + // should transition on processing the CLOSED replica's report + ContainerReportsProto closedContainerReport = getContainerReports(closedReplica); + containerReportHandler + .onMessage(new ContainerReportFromDatanode(dnWithClosedReplica, closedContainerReport), publisher); + assertEquals(LifeCycleState.CLOSED, containerStateManager.getContainer(container.containerID()).getState()); + } + /** * Creates the required number of DNs that will hold a replica each for the * specified EC container. Registers these DNs with the NodeManager, adds this diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java index 233cd5c4d9ef..349871c26eed 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; +import org.apache.hadoop.hdds.scm.container.common.helpers.InvalidContainerStateException; import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaPendingOps; import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub; import org.apache.hadoop.hdds.scm.ha.SCMHAManager; @@ -53,6 +54,8 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.Mockito.when; /** @@ -153,6 +156,47 @@ public void checkReplicationStateMissingReplica() Assertions.assertEquals(3, c1.getReplicationConfig().getRequiredNodes()); } + @Test + public void testTransitionDeletingToClosedState() throws IOException { + HddsProtos.ContainerInfoProto.Builder builder = HddsProtos.ContainerInfoProto.newBuilder(); + builder.setContainerID(1) + .setState(HddsProtos.LifeCycleState.DELETING) + .setUsedBytes(0) + .setNumberOfKeys(0) + .setOwner("root") + .setReplicationType(HddsProtos.ReplicationType.RATIS) + .setReplicationFactor(ReplicationFactor.THREE); + + HddsProtos.ContainerInfoProto container = builder.build(); + HddsProtos.ContainerID cid = HddsProtos.ContainerID.newBuilder().setId(container.getContainerID()).build(); + containerStateManager.addContainer(container); + containerStateManager.transitionDeletingToClosedState(cid); + Assertions.assertEquals(HddsProtos.LifeCycleState.CLOSED, + containerStateManager.getContainer(ContainerID.getFromProtobuf(cid)).getState()); + } + + @Test + public void testTransitionDeletingToClosedStateAllowsOnlyDeletingContainer() throws IOException { + HddsProtos.ContainerInfoProto.Builder builder = HddsProtos.ContainerInfoProto.newBuilder(); + builder.setContainerID(1) + .setState(HddsProtos.LifeCycleState.QUASI_CLOSED) + .setUsedBytes(0) + .setNumberOfKeys(0) + .setOwner("root") + .setReplicationType(HddsProtos.ReplicationType.RATIS) + .setReplicationFactor(ReplicationFactor.THREE); + + HddsProtos.ContainerInfoProto container = builder.build(); + HddsProtos.ContainerID cid = HddsProtos.ContainerID.newBuilder().setId(container.getContainerID()).build(); + containerStateManager.addContainer(container); + try { + containerStateManager.transitionDeletingToClosedState(cid); + fail("Was expecting an Exception, but did not catch any."); + } catch (IOException e) { + assertInstanceOf(InvalidContainerStateException.class, e.getCause().getCause()); + } + } + private void addReplica(ContainerInfo cont, DatanodeDetails node) { ContainerReplica replica = ContainerReplica.newBuilder() .setContainerID(cont.containerID()) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java new file mode 100644 index 000000000000..c7d142ac2f41 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container; + +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerManager; +import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; +import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Collections.emptyMap; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Tests for container report handling. + */ +public class TestContainerReportHandling { + private static final String VOLUME = "vol1"; + private static final String BUCKET = "bucket1"; + private static final String KEY = "key1"; + + /** + * Tests that a DELETING container moves to the CLOSED state if a non-empty CLOSED replica is reported. To do this, + * the test first creates a key and closes its corresponding container. Then it moves that container to the + * DELETING state using ContainerManager. Then it restarts a Datanode hosting that container, making it send a full + * container report. Then the test waits for the container to move from DELETING to CLOSED. + */ + @Test + void testDeletingContainerTransitionsToClosedWhenNonEmptyReplicaIsReported() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS); + conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, TimeUnit.SECONDS); + + Path clusterPath = null; + try (MiniOzoneCluster cluster = newCluster(conf)) { + cluster.waitForClusterToBeReady(); + clusterPath = Paths.get(cluster.getBaseDir()); + + try (OzoneClient client = cluster.newClient()) { + // create a container and close it + createTestData(client); + List keyLocations = lookupKey(cluster); + assertThat(keyLocations).isNotEmpty(); + OmKeyLocationInfo keyLocation = keyLocations.get(0); + ContainerID containerID = ContainerID.valueOf(keyLocation.getContainerID()); + waitForContainerClose(cluster, containerID.getId()); + + // move the container to DELETING + ContainerManager containerManager = cluster.getStorageContainerManager().getContainerManager(); + containerManager.updateContainerState(containerID, HddsProtos.LifeCycleEvent.DELETE); + assertEquals(HddsProtos.LifeCycleState.DELETING, containerManager.getContainer(containerID).getState()); + + // restart a DN and wait for the container to get CLOSED. + HddsDatanodeService dn = cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode()); + cluster.restartHddsDatanode(dn.getDatanodeDetails(), false); + GenericTestUtils.waitFor(() -> { + try { + return containerManager.getContainer(containerID).getState() == HddsProtos.LifeCycleState.CLOSED; + } catch (ContainerNotFoundException e) { + fail(e); + } + return false; + }, 2000, 20000); + + assertEquals(HddsProtos.LifeCycleState.CLOSED, containerManager.getContainer(containerID).getState()); + } + } finally { + if (clusterPath != null) { + System.out.println("Deleting path " + clusterPath); + boolean deleted = FileUtil.fullyDelete(clusterPath.toFile()); + assertTrue(deleted); + } + } + } + + private static MiniOzoneCluster newCluster(OzoneConfiguration conf) + throws IOException { + return MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(3) + .build(); + } + + private static List lookupKey(MiniOzoneCluster cluster) + throws IOException { + OmKeyArgs keyArgs = new OmKeyArgs.Builder() + .setVolumeName(VOLUME) + .setBucketName(BUCKET) + .setKeyName(KEY) + .build(); + OmKeyInfo keyInfo = cluster.getOzoneManager().lookupKey(keyArgs); + OmKeyLocationInfoGroup locations = keyInfo.getLatestVersionLocations(); + assertNotNull(locations); + return locations.getLocationList(); + } + + private void createTestData(OzoneClient client) throws IOException { + ObjectStore objectStore = client.getObjectStore(); + objectStore.createVolume(VOLUME); + OzoneVolume volume = objectStore.getVolume(VOLUME); + volume.createBucket(BUCKET); + + OzoneBucket bucket = volume.getBucket(BUCKET); + + try (OutputStream out = bucket.createKey(KEY, 0, + RatisReplicationConfig.getInstance(THREE), emptyMap())) { + out.write("Hello".getBytes(UTF_8)); + } + } + +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java new file mode 100644 index 000000000000..d34265641a20 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java @@ -0,0 +1,168 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container; + +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerManager; +import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; +import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Collections.emptyMap; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; +import static org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Tests for container report handling with SCM High Availability. + */ +public class TestContainerReportHandlingWithHA { + private static final String VOLUME = "vol1"; + private static final String BUCKET = "bucket1"; + private static final String KEY = "key1"; + + /** + * Tests that a DELETING container moves to the CLOSED state if a non-empty CLOSED replica is reported. To do this, + * the test first creates a key and closes its corresponding container. Then it moves that container to the + * DELETING state using ContainerManager. Then it restarts a Datanode hosting that container, making it send a full + * container report. Then the test waits for the container to move from DELETING to CLOSED in all SCMs. + */ + @Test + void testDeletingContainerTransitionsToClosedWhenNonEmptyReplicaIsReportedWithScmHA() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS); + conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, TimeUnit.SECONDS); + + int numSCM = 3; + Path clusterPath = null; + try (MiniOzoneHAClusterImpl cluster = newHACluster(conf, numSCM)) { + cluster.waitForClusterToBeReady(); + clusterPath = Paths.get(cluster.getBaseDir()); + + try (OzoneClient client = cluster.newClient()) { + // create a container and close it + createTestData(client); + List keyLocations = lookupKey(cluster); + assertThat(keyLocations).isNotEmpty(); + OmKeyLocationInfo keyLocation = keyLocations.get(0); + ContainerID containerID = ContainerID.valueOf(keyLocation.getContainerID()); + waitForContainerClose(cluster, containerID.getId()); + + // move the container to DELETING + ContainerManager containerManager = cluster.getScmLeader().getContainerManager(); + containerManager.updateContainerState(containerID, HddsProtos.LifeCycleEvent.DELETE); + assertEquals(HddsProtos.LifeCycleState.DELETING, containerManager.getContainer(containerID).getState()); + + // restart a DN and wait for the container to get CLOSED in all SCMs + HddsDatanodeService dn = cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode()); + cluster.restartHddsDatanode(dn.getDatanodeDetails(), false); + ContainerManager[] array = new ContainerManager[numSCM]; + for (int i = 0; i < numSCM; i++) { + array[i] = cluster.getStorageContainerManager(i).getContainerManager(); + } + GenericTestUtils.waitFor(() -> { + try { + for (ContainerManager manager : array) { + if (manager.getContainer(containerID).getState() != HddsProtos.LifeCycleState.CLOSED) { + return false; + } + } + return true; + } catch (ContainerNotFoundException e) { + fail(e); + } + return false; + }, 2000, 20000); + + assertEquals(HddsProtos.LifeCycleState.CLOSED, containerManager.getContainer(containerID).getState()); + } + } finally { + if (clusterPath != null) { + boolean deleted = FileUtil.fullyDelete(clusterPath.toFile()); + assertTrue(deleted); + } + } + } + + private static MiniOzoneHAClusterImpl newHACluster(OzoneConfiguration conf, int numSCM) throws IOException { + return (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) + .setOMServiceId("om-service") + .setSCMServiceId("scm-service") + .setNumOfOzoneManagers(1) + .setNumOfStorageContainerManagers(numSCM) + .build(); + } + + private static List lookupKey(MiniOzoneCluster cluster) + throws IOException { + OmKeyArgs keyArgs = new OmKeyArgs.Builder() + .setVolumeName(VOLUME) + .setBucketName(BUCKET) + .setKeyName(KEY) + .build(); + OmKeyInfo keyInfo = cluster.getOzoneManager().lookupKey(keyArgs); + OmKeyLocationInfoGroup locations = keyInfo.getLatestVersionLocations(); + assertNotNull(locations); + return locations.getLocationList(); + } + + private void createTestData(OzoneClient client) throws IOException { + ObjectStore objectStore = client.getObjectStore(); + objectStore.createVolume(VOLUME); + OzoneVolume volume = objectStore.getVolume(VOLUME); + volume.createBucket(BUCKET); + + OzoneBucket bucket = volume.getBucket(BUCKET); + + try (OutputStream out = bucket.createKey(KEY, 0, + RatisReplicationConfig.getInstance(THREE), emptyMap())) { + out.write("Hello".getBytes(UTF_8)); + } + } + +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java index 2a7423b15d37..6cce2dad272c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java @@ -42,8 +42,10 @@ import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.io.BlockDataStreamOutputEntry; @@ -325,14 +327,17 @@ public static void waitForContainerClose(MiniOzoneCluster cluster, Long... containerIdList) throws ContainerNotFoundException, PipelineNotFoundException, TimeoutException, InterruptedException { + StorageContainerManager scm; + if (cluster instanceof MiniOzoneHAClusterImpl) { + MiniOzoneHAClusterImpl haCluster = (MiniOzoneHAClusterImpl) cluster; + scm = haCluster.getScmLeader(); + } else { + scm = cluster.getStorageContainerManager(); + } List pipelineList = new ArrayList<>(); for (long containerID : containerIdList) { - ContainerInfo container = - cluster.getStorageContainerManager().getContainerManager() - .getContainer(ContainerID.valueOf(containerID)); - Pipeline pipeline = - cluster.getStorageContainerManager().getPipelineManager() - .getPipeline(container.getPipelineID()); + ContainerInfo container = scm.getContainerManager().getContainer(ContainerID.valueOf(containerID)); + Pipeline pipeline = scm.getPipelineManager().getPipeline(container.getPipelineID()); pipelineList.add(pipeline); List datanodes = pipeline.getNodes(); @@ -348,9 +353,7 @@ public static void waitForContainerClose(MiniOzoneCluster cluster, // make sure the container gets created first Assert.assertFalse(isContainerClosed(cluster, containerID, details)); // send the order to close the container - cluster.getStorageContainerManager().getEventQueue() - .fireEvent(SCMEvents.CLOSE_CONTAINER, - ContainerID.valueOf(containerID)); + scm.getEventQueue().fireEvent(SCMEvents.CLOSE_CONTAINER, ContainerID.valueOf(containerID)); } } int index = 0; From e04bcdfa15777644e9a2b39c7a20872c68417794 Mon Sep 17 00:00:00 2001 From: Cyrill Date: Mon, 29 Jul 2024 02:43:26 +0300 Subject: [PATCH 05/17] HDDS-11223. Fix iteration over ChunkBufferImplWithByteBufferList (#6999) (cherry picked from commit b07bb21333781ae8160528c4aa14cd99fff448f2) --- .../ChunkBufferImplWithByteBufferList.java | 41 +++++++++-- ...TestChunkBufferImplWithByteBufferList.java | 70 +++++++++++++++++++ 2 files changed, 107 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBufferList.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBufferList.java index 7c3a0c7d2d56..a3b5f9d2eef8 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBufferList.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBufferList.java @@ -61,6 +61,7 @@ public class ChunkBufferImplWithByteBufferList implements ChunkBuffer { private void findCurrent() { boolean found = false; + limitPrecedingCurrent = 0; for (int i = 0; i < buffers.size(); i++) { final ByteBuffer buf = buffers.get(i); final int pos = buf.position(); @@ -185,6 +186,8 @@ public ChunkBuffer duplicate(int newPosition, int newLimit) { */ @Override public Iterable iterate(int bufferSize) { + Preconditions.checkArgument(bufferSize > 0); + return () -> new Iterator() { @Override public boolean hasNext() { @@ -198,10 +201,40 @@ public ByteBuffer next() { } findCurrent(); ByteBuffer current = buffers.get(currentIndex); - final ByteBuffer duplicated = current.duplicate(); - duplicated.limit(current.limit()); - current.position(current.limit()); - return duplicated; + + // If current buffer has enough space or it's the last one, return it. + if (current.remaining() >= bufferSize || currentIndex == buffers.size() - 1) { + final ByteBuffer duplicated = current.duplicate(); + int duplicatedLimit = Math.min(current.position() + bufferSize, current.limit()); + duplicated.limit(duplicatedLimit); + duplicated.position(current.position()); + + current.position(duplicatedLimit); + return duplicated; + } + + // Otherwise, create a new buffer. + int newBufferSize = Math.min(bufferSize, remaining()); + ByteBuffer allocated = ByteBuffer.allocate(newBufferSize); + int remainingToFill = allocated.remaining(); + + while (remainingToFill > 0) { + final ByteBuffer b = current(); + int bytes = Math.min(b.remaining(), remainingToFill); + b.limit(b.position() + bytes); + allocated.put(b); + remainingToFill -= bytes; + advanceCurrent(); + } + + allocated.flip(); + + // Up-to-date current. + current = buffers.get(currentIndex); + // Reset + current.limit(current.capacity()); + + return allocated; } }; } diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChunkBufferImplWithByteBufferList.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChunkBufferImplWithByteBufferList.java index 072c07be64f1..b06b4b563324 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChunkBufferImplWithByteBufferList.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChunkBufferImplWithByteBufferList.java @@ -67,6 +67,76 @@ public void rejectsMultipleCurrentBuffers() { assertThrows(IllegalArgumentException.class, () -> ChunkBuffer.wrap(list)); } + @Test + public void testIterateSmallerOverSingleChunk() { + ChunkBuffer subject = ChunkBuffer.wrap(ImmutableList.of(ByteBuffer.allocate(100))); + + assertEquals(0, subject.position()); + assertEquals(100, subject.remaining()); + assertEquals(100, subject.limit()); + + subject.iterate(25).forEach(buffer -> assertEquals(25, buffer.remaining())); + + assertEquals(100, subject.position()); + assertEquals(0, subject.remaining()); + assertEquals(100, subject.limit()); + } + + @Test + public void testIterateOverMultipleChunksFitChunkSize() { + ByteBuffer b1 = ByteBuffer.allocate(100); + ByteBuffer b2 = ByteBuffer.allocate(100); + ByteBuffer b3 = ByteBuffer.allocate(100); + ChunkBuffer subject = ChunkBuffer.wrap(ImmutableList.of(b1, b2, b3)); + + assertEquals(0, subject.position()); + assertEquals(300, subject.remaining()); + assertEquals(300, subject.limit()); + + subject.iterate(100).forEach(buffer -> assertEquals(100, buffer.remaining())); + + assertEquals(300, subject.position()); + assertEquals(0, subject.remaining()); + assertEquals(300, subject.limit()); + } + + @Test + public void testIterateOverMultipleChunksSmallerChunks() { + ByteBuffer b1 = ByteBuffer.allocate(100); + ByteBuffer b2 = ByteBuffer.allocate(100); + ByteBuffer b3 = ByteBuffer.allocate(100); + ChunkBuffer subject = ChunkBuffer.wrap(ImmutableList.of(b1, b2, b3)); + + assertEquals(0, subject.position()); + assertEquals(300, subject.remaining()); + assertEquals(300, subject.limit()); + + subject.iterate(50).forEach(buffer -> assertEquals(50, buffer.remaining())); + + assertEquals(300, subject.position()); + assertEquals(0, subject.remaining()); + assertEquals(300, subject.limit()); + } + + @Test + public void testIterateOverMultipleChunksBiggerChunks() { + ByteBuffer b1 = ByteBuffer.allocate(100); + ByteBuffer b2 = ByteBuffer.allocate(100); + ByteBuffer b3 = ByteBuffer.allocate(100); + ByteBuffer b4 = ByteBuffer.allocate(100); + ChunkBuffer subject = ChunkBuffer.wrap(ImmutableList.of(b1, b2, b3, b4)); + + assertEquals(0, subject.position()); + assertEquals(400, subject.remaining()); + assertEquals(400, subject.limit()); + + subject.iterate(200).forEach(buffer -> assertEquals(200, buffer.remaining())); + + assertEquals(400, subject.position()); + assertEquals(0, subject.remaining()); + assertEquals(400, subject.limit()); + } + private static void assertEmpty(ChunkBuffer subject) { assertEquals(0, subject.position()); assertEquals(0, subject.remaining()); From d2bce4d461ec91b9ef2cdbf48a986ba2f53871af Mon Sep 17 00:00:00 2001 From: Arafat2198 Date: Tue, 10 Sep 2024 10:51:08 +0530 Subject: [PATCH 06/17] HDDS-11436. Minor update in Recon API handling. (#7178) (cherry picked from commit 9477aa63fd07c6a939b2886d0fb500f07e6f0ea4) --- .../ozone/recon/api/TriggerDBSyncEndpoint.java | 1 + .../ozone/recon/api/filters/TestAdminFilter.java | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java index 070b7e1ccd4f..3ce4fc7f8370 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java @@ -32,6 +32,7 @@ */ @Path("/triggerdbsync") @Produces(MediaType.APPLICATION_JSON) +@AdminOnly public class TriggerDBSyncEndpoint { private OzoneManagerServiceProvider ozoneManagerServiceProvider; diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/filters/TestAdminFilter.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/filters/TestAdminFilter.java index 9e1aa7478ef0..d0ae7d498068 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/filters/TestAdminFilter.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/filters/TestAdminFilter.java @@ -27,7 +27,6 @@ import org.apache.hadoop.ozone.recon.api.NodeEndpoint; import org.apache.hadoop.ozone.recon.api.PipelineEndpoint; import org.apache.hadoop.ozone.recon.api.TaskStatusService; -import org.apache.hadoop.ozone.recon.api.TriggerDBSyncEndpoint; import org.apache.hadoop.ozone.recon.api.UtilizationEndpoint; import org.apache.hadoop.security.UserGroupInformation; import org.junit.jupiter.api.Assertions; @@ -66,8 +65,14 @@ public void testAdminOnlyEndpoints() { Assertions.assertFalse(allEndpoints.isEmpty()); - // If an endpoint is added, it must be explicitly added to this set or be - // marked with @AdminOnly for this test to pass. + // If an endpoint is added, it must either require admin privileges by being + // marked with the `@AdminOnly` annotation, or be added to this set to exclude it. + // - Any endpoint that displays information related to the filesystem namespace + // (including aggregate counts), user information, or allows modification to the + // cluster's state should be marked as `@AdminOnly`. + // - Read-only endpoints that only return information about node status or + // cluster state do not require the `@AdminOnly` annotation and can be excluded + // from admin requirements by adding them to this set. Set> nonAdminEndpoints = new HashSet<>(); nonAdminEndpoints.add(UtilizationEndpoint.class); nonAdminEndpoints.add(ClusterStateEndpoint.class); @@ -75,7 +80,6 @@ public void testAdminOnlyEndpoints() { nonAdminEndpoints.add(NodeEndpoint.class); nonAdminEndpoints.add(PipelineEndpoint.class); nonAdminEndpoints.add(TaskStatusService.class); - nonAdminEndpoints.add(TriggerDBSyncEndpoint.class); Assertions.assertTrue(allEndpoints.containsAll(nonAdminEndpoints)); From 39c175bbb99a87a673460ae498ff82fec1a573a3 Mon Sep 17 00:00:00 2001 From: Abhishek Pal <43001336+devabhishekpal@users.noreply.github.com> Date: Mon, 30 Sep 2024 21:23:16 +0530 Subject: [PATCH 07/17] HDDS-11472. Avoid recreating external access authorizer on OM state reload (#7238) (cherry picked from commit d0ad8362f690e0e13b39fbe94d9445e2f0549281) --- .../hadoop/ozone/security/acl/OzoneAccessAuthorizer.java | 9 +++++++-- .../java/org/apache/hadoop/ozone/om/OzoneManager.java | 8 +++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/security/acl/OzoneAccessAuthorizer.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/security/acl/OzoneAccessAuthorizer.java index 1f105a03ad46..abd4cd6f6d25 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/security/acl/OzoneAccessAuthorizer.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/security/acl/OzoneAccessAuthorizer.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF @@ -19,7 +19,7 @@ import org.apache.hadoop.ozone.om.exceptions.OMException; /** - * Default implementation for {@link IAccessAuthorizer}. + * No-op implementation for {@link IAccessAuthorizer}, allows everything. * */ public class OzoneAccessAuthorizer implements IAccessAuthorizer { @@ -35,4 +35,9 @@ public boolean checkAccess(IOzoneObj ozoneObject, RequestContext context) throws OMException { return true; } + + @Override + public boolean isNative() { + return true; + } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 0037fbb1b5d0..228ede131b42 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -845,7 +845,13 @@ private void instantiateServices(boolean withNewSnapshot) throws IOException { prefixManager = new PrefixManagerImpl(metadataManager, isRatisEnabled); keyManager = new KeyManagerImpl(this, scmClient, configuration, perfMetrics); - accessAuthorizer = OzoneAuthorizerFactory.forOM(this); + // If authorizer is not initialized or the authorizer is Native + // re-initialize the authorizer, else for non-native authorizer + // like ranger we can reuse previous value if it is initialized + if (null == accessAuthorizer || accessAuthorizer.isNative()) { + accessAuthorizer = OzoneAuthorizerFactory.forOM(this); + } + omMetadataReader = new OmMetadataReader(keyManager, prefixManager, this, LOG, AUDIT, metrics, accessAuthorizer); // Active DB's OmMetadataReader instance does not need to be reference From 6bb14054bc2651d993abda6da5f92b99266b250b Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Tue, 1 Oct 2024 00:42:55 -0700 Subject: [PATCH 08/17] HDDS-11504. Update Ratis to 3.1.1. (#7257) (cherry picked from commit 10d3b213e8f0c9156b885f989e7d69c9287c6be4) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 58a86907628d..a3ac4d692d96 100644 --- a/pom.xml +++ b/pom.xml @@ -76,7 +76,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs ${hdds.version} - 3.1.0 + 3.1.1 1.0.6 From a65d1da9cb210a8d7424fa3c11c0614259c93a70 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Sun, 6 Oct 2024 14:11:50 +0200 Subject: [PATCH 09/17] HDDS-11536. Bump macOS runner version to macos-13 (#7279) (cherry picked from commit ed2a073499e0245e535d36e770478768c3f1eb84) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e6964804f0db..f82b66b1b373 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -143,7 +143,7 @@ jobs: include: - os: ubuntu-20.04 - java: 8 - os: macos-12 + os: macos-13 fail-fast: false runs-on: ${{ matrix.os }} steps: From 5c250a647a2b82d4909b75df4e35694c7210cf39 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Thu, 7 Mar 2024 16:43:51 -0800 Subject: [PATCH 10/17] HDDS-10480. Avoid proto2 ByteString.toByteArray() calls. (#6342) (cherry picked from commit d94aadc4b8447bd092a581b116d3b19949334774) --- .../keyvalue/KeyValueContainerCheck.java | 6 +-- .../hadoop/hdds/scm/ha/SCMRatisResponse.java | 20 ++++---- .../checksum/ECBlockChecksumComputer.java | 50 ++++++++----------- .../ReplicatedBlockChecksumComputer.java | 23 ++++++--- .../ozone/freon/DatanodeChunkValidator.java | 2 +- 5 files changed, 50 insertions(+), 51 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java index 61a506da2c28..ff230a607936 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java @@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.container.keyvalue; import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.StringUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdfs.util.Canceler; @@ -43,7 +44,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.util.Arrays; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.slf4j.Logger; @@ -404,8 +404,8 @@ private static ScanResult verifyChecksum(BlockData block, " for block %s", ChunkInfo.getFromProtoBuf(chunk), i, - Arrays.toString(expected.toByteArray()), - Arrays.toString(actual.toByteArray()), + StringUtils.bytes2Hex(expected.asReadOnlyByteBuffer()), + StringUtils.bytes2Hex(actual.asReadOnlyByteBuffer()), block.getBlockID()); return ScanResult.unhealthy( ScanResult.FailureType.CORRUPT_CHUNK, chunkFile, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisResponse.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisResponse.java index 15163bf3e6a6..9d65eae06b15 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisResponse.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisResponse.java @@ -17,12 +17,13 @@ package org.apache.hadoop.hdds.scm.ha; -import com.google.protobuf.ByteString; import com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.SCMRatisResponseProto; import org.apache.hadoop.hdds.scm.ha.io.CodecFactory; import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations; /** * Represents the response from RatisServer. @@ -72,13 +73,11 @@ public static Message encode(final Object result) } final Class type = result.getClass(); - final ByteString value = CodecFactory.getCodec(type).serialize(result); - final SCMRatisResponseProto response = SCMRatisResponseProto.newBuilder() - .setType(type.getName()).setValue(value).build(); - return Message.valueOf( - org.apache.ratis.thirdparty.com.google.protobuf.ByteString.copyFrom( - response.toByteArray())); + .setType(type.getName()) + .setValue(CodecFactory.getCodec(type).serialize(result)) + .build(); + return Message.valueOf(UnsafeByteOperations.unsafeWrap(response.toByteString().asReadOnlyByteBuffer())); } public static SCMRatisResponse decode(RaftClientReply reply) @@ -87,14 +86,13 @@ public static SCMRatisResponse decode(RaftClientReply reply) return new SCMRatisResponse(reply.getException()); } - final byte[] response = reply.getMessage().getContent().toByteArray(); + final ByteString response = reply.getMessage().getContent(); - if (response.length == 0) { + if (response.isEmpty()) { return new SCMRatisResponse(); } - final SCMRatisResponseProto responseProto = SCMRatisResponseProto - .parseFrom(response); + final SCMRatisResponseProto responseProto = SCMRatisResponseProto.parseFrom(response.toByteArray()); try { final Class type = ReflectionUtil.getClass(responseProto.getType()); diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java index e0b82bebc3a8..220bef71491a 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java @@ -25,12 +25,13 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.util.DataChecksum; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.security.MessageDigest; import java.util.List; @@ -42,8 +43,8 @@ public class ECBlockChecksumComputer extends AbstractBlockChecksumComputer { private static final Logger LOG = LoggerFactory.getLogger(ECBlockChecksumComputer.class); - private List chunkInfoList; - private OmKeyInfo keyInfo; + private final List chunkInfoList; + private final OmKeyInfo keyInfo; public ECBlockChecksumComputer( @@ -68,7 +69,7 @@ public void compute(OzoneClientConfig.ChecksumCombineMode combineMode) } - private void computeMd5Crc() throws IOException { + private void computeMd5Crc() { Preconditions.checkArgument(chunkInfoList.size() > 0); final ContainerProtos.ChunkInfo firstChunkInfo = chunkInfoList.get(0); @@ -77,32 +78,28 @@ private void computeMd5Crc() throws IOException { // Total parity checksum bytes per stripe to remove int parityBytes = getParityBytes(chunkSize, bytesPerCrc); - ByteArrayOutputStream out = new ByteArrayOutputStream(); + final MessageDigest digester = MD5Hash.getDigester(); for (ContainerProtos.ChunkInfo chunkInfo : chunkInfoList) { ByteString stripeChecksum = chunkInfo.getStripeChecksum(); Preconditions.checkNotNull(stripeChecksum); - byte[] checksumBytes = stripeChecksum.toByteArray(); - - Preconditions.checkArgument(checksumBytes.length % 4 == 0, + final int checksumSize = stripeChecksum.size(); + Preconditions.checkArgument(checksumSize % 4 == 0, "Checksum Bytes size does not match"); - ByteBuffer byteWrap = ByteBuffer - .wrap(checksumBytes, 0, checksumBytes.length - parityBytes); - byte[] currentChecksum = new byte[4]; - - while (byteWrap.hasRemaining()) { - byteWrap.get(currentChecksum); - out.write(currentChecksum); - } + final ByteBuffer byteWrap = stripeChecksum.asReadOnlyByteBuffer(); + byteWrap.limit(checksumSize - parityBytes); + digester.update(byteWrap); } - MD5Hash fileMD5 = MD5Hash.digest(out.toByteArray()); - setOutBytes(fileMD5.getDigest()); + final byte[] fileMD5 = digester.digest(); + setOutBytes(digester.digest()); - LOG.debug("Number of chunks={}, md5hash={}", - chunkInfoList.size(), fileMD5); + if (LOG.isDebugEnabled()) { + LOG.debug("Number of chunks={}, md5hash={}", + chunkInfoList.size(), StringUtils.bytes2HexString(fileMD5)); + } } private void computeCompositeCrc() throws IOException { @@ -149,17 +146,15 @@ private void computeCompositeCrc() throws IOException { ByteString stripeChecksum = chunkInfo.getStripeChecksum(); Preconditions.checkNotNull(stripeChecksum); - byte[] checksumBytes = stripeChecksum.toByteArray(); - - Preconditions.checkArgument(checksumBytes.length % 4 == 0, + final int checksumSize = stripeChecksum.size(); + Preconditions.checkArgument(checksumSize % 4 == 0, "Checksum Bytes size does not match"); CrcComposer chunkCrcComposer = CrcComposer.newCrcComposer(dataChecksumType, bytesPerCrc); // Limit parity bytes as they do not contribute to fileChecksum - ByteBuffer byteWrap = ByteBuffer - .wrap(checksumBytes, 0, checksumBytes.length - parityBytes); - byte[] currentChecksum = new byte[4]; + final ByteBuffer byteWrap = stripeChecksum.asReadOnlyByteBuffer(); + byteWrap.limit(checksumSize - parityBytes); long chunkOffsetIndex = 1; while (byteWrap.hasRemaining()) { @@ -177,8 +172,7 @@ private void computeCompositeCrc() throws IOException { currentChunkOffset = bytesPerCrcOffset; } - byteWrap.get(currentChecksum); - int checksumDataCrc = CrcUtil.readInt(currentChecksum, 0); + final int checksumDataCrc = byteWrap.getInt(); //To handle last chunk when it size is lower than 1524K in the case // of rs-3-2-1524k. long chunkSizePerChecksum = Math.min(Math.min(keySize, bytesPerCrc), diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ReplicatedBlockChecksumComputer.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ReplicatedBlockChecksumComputer.java index cf976e3bd39c..2c0fc0c0d36a 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ReplicatedBlockChecksumComputer.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ReplicatedBlockChecksumComputer.java @@ -26,8 +26,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.ByteBuffer; +import java.security.MessageDigest; import java.util.List; /** @@ -39,7 +40,13 @@ public class ReplicatedBlockChecksumComputer extends private static final Logger LOG = LoggerFactory.getLogger(ReplicatedBlockChecksumComputer.class); - private List chunkInfoList; + static MD5Hash digest(ByteBuffer data) { + final MessageDigest digester = MD5Hash.getDigester(); + digester.update(data); + return new MD5Hash(digester.digest()); + } + + private final List chunkInfoList; public ReplicatedBlockChecksumComputer( List chunkInfoList) { @@ -62,20 +69,20 @@ public void compute(OzoneClientConfig.ChecksumCombineMode combineMode) } // compute the block checksum, which is the md5 of chunk checksums - private void computeMd5Crc() throws IOException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - + private void computeMd5Crc() { + ByteString bytes = ByteString.EMPTY; for (ContainerProtos.ChunkInfo chunkInfo : chunkInfoList) { ContainerProtos.ChecksumData checksumData = chunkInfo.getChecksumData(); List checksums = checksumData.getChecksumsList(); for (ByteString checksum : checksums) { - baos.write(checksum.toByteArray()); + bytes = bytes.concat(checksum); } } - MD5Hash fileMD5 = MD5Hash.digest(baos.toByteArray()); + final MD5Hash fileMD5 = digest(bytes.asReadOnlyByteBuffer()); + setOutBytes(fileMD5.getDigest()); LOG.debug("number of chunks={}, md5out={}", @@ -121,7 +128,7 @@ private void computeCompositeCrc() throws IOException { Preconditions.checkArgument(remainingChunkSize <= checksums.size() * chunkSize); for (ByteString checksum : checksums) { - int checksumDataCrc = CrcUtil.readInt(checksum.toByteArray(), 0); + final int checksumDataCrc = checksum.asReadOnlyByteBuffer().getInt(); chunkCrcComposer.update(checksumDataCrc, Math.min(bytesPerCrc, remainingChunkSize)); remainingChunkSize -= bytesPerCrc; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkValidator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkValidator.java index b290da2da1f5..2bbf8b6d5b24 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkValidator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/DatanodeChunkValidator.java @@ -193,7 +193,7 @@ private ChecksumData computeChecksum(ContainerCommandResponseProto response) throws OzoneChecksumException { ContainerProtos.ReadChunkResponseProto readChunk = response.getReadChunk(); if (readChunk.hasData()) { - return checksum.computeChecksum(readChunk.getData().toByteArray()); + return checksum.computeChecksum(readChunk.getData().asReadOnlyByteBuffer()); } else { return checksum.computeChecksum( readChunk.getDataBuffers().getBuffersList()); From 06a3d1fbde6f87f8e6239bcf306406107eae600c Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Tue, 7 May 2024 00:19:49 +0800 Subject: [PATCH 11/17] HDDS-10465. Change ozone.client.bytes.per.checksum default to 16KB (#6331) (cherry picked from commit d49a2b6b194610d3a7909e2f41e4f0c1262f8ea4) --- .../java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java | 6 +++--- .../main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java | 2 +- .../ozone/container/keyvalue/interfaces/ChunkManager.java | 2 +- .../ozone/client/checksum/ECBlockChecksumComputer.java | 6 ++---- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java index 549735438a02..31dcde001021 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java @@ -170,13 +170,13 @@ public enum ChecksumCombineMode { private String checksumType = ChecksumType.CRC32.name(); @Config(key = "bytes.per.checksum", - defaultValue = "1MB", + defaultValue = "16KB", type = ConfigType.SIZE, description = "Checksum will be computed for every bytes per checksum " + "number of bytes and stored sequentially. The minimum value for " - + "this config is 16KB.", + + "this config is 8KB.", tags = ConfigTag.CLIENT) - private int bytesPerChecksum = 1024 * 1024; + private int bytesPerChecksum = 16 * 1024; @Config(key = "verify.checksum", defaultValue = "true", diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 9508d9125311..9c4e6afca872 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -458,7 +458,7 @@ public final class OzoneConfigKeys { "hdds.datanode.replication.work.dir"; - public static final int OZONE_CLIENT_BYTES_PER_CHECKSUM_MIN_SIZE = 16 * 1024; + public static final int OZONE_CLIENT_BYTES_PER_CHECKSUM_MIN_SIZE = 8 * 1024; public static final String OZONE_CLIENT_READ_TIMEOUT = "ozone.client.read.timeout"; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java index 151c15f35676..aa2cdcb05f5b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java @@ -126,7 +126,7 @@ static int getBufferCapacityForChunkRead(ChunkInfo chunkInfo, } else { // Set buffer capacity to checksum boundary size so that each buffer // corresponds to one checksum. If checksum is NONE, then set buffer - // capacity to default (OZONE_CHUNK_READ_BUFFER_DEFAULT_SIZE_KEY = 64KB). + // capacity to default (OZONE_CHUNK_READ_BUFFER_DEFAULT_SIZE_KEY = 1MB). ChecksumData checksumData = chunkInfo.getChecksumData(); if (checksumData != null) { diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java index 220bef71491a..b2c30ed9e08f 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java @@ -121,12 +121,10 @@ private void computeCompositeCrc() throws IOException { // Bytes required to create a CRC long bytesPerCrc = firstChunkInfo.getChecksumData().getBytesPerChecksum(); - ECReplicationConfig replicationConfig = - (ECReplicationConfig) keyInfo.getReplicationConfig(); - long chunkSize = replicationConfig.getEcChunkSize(); + long chunkSize = firstChunkInfo.getLen(); //When EC chunk size is not a multiple of ozone.client.bytes.per.checksum - // (default = 1MB) the last checksum in an EC chunk is only generated for + // (default = 16KB) the last checksum in an EC chunk is only generated for // offset. long bytesPerCrcOffset = chunkSize % bytesPerCrc; From 9655a7b3be04d9429cc7a930fae5b3c2d05a74d2 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Thu, 10 Oct 2024 06:49:50 -0700 Subject: [PATCH 12/17] HDDS-11482. EC Checksum throws IllegalArgumentException because the buffer limit is negative (#7230) (cherry picked from commit 7ef7de200bd56b95e647e65dbb3f2540300b0790) --- .../checksum/ECBlockChecksumComputer.java | 72 ++++++------------- .../client/checksum/ECFileChecksumHelper.java | 7 +- .../fs/ozone/TestOzoneFileChecksum.java | 49 ++++++++----- 3 files changed, 59 insertions(+), 69 deletions(-) diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java index b2c30ed9e08f..a4c24768cddc 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECBlockChecksumComputer.java @@ -45,12 +45,14 @@ public class ECBlockChecksumComputer extends AbstractBlockChecksumComputer { private final List chunkInfoList; private final OmKeyInfo keyInfo; + private final long blockLength; public ECBlockChecksumComputer( - List chunkInfoList, OmKeyInfo keyInfo) { + List chunkInfoList, OmKeyInfo keyInfo, long blockLength) { this.chunkInfoList = chunkInfoList; this.keyInfo = keyInfo; + this.blockLength = blockLength; } @Override @@ -72,15 +74,13 @@ public void compute(OzoneClientConfig.ChecksumCombineMode combineMode) private void computeMd5Crc() { Preconditions.checkArgument(chunkInfoList.size() > 0); - final ContainerProtos.ChunkInfo firstChunkInfo = chunkInfoList.get(0); - long chunkSize = firstChunkInfo.getLen(); - long bytesPerCrc = firstChunkInfo.getChecksumData().getBytesPerChecksum(); - // Total parity checksum bytes per stripe to remove - int parityBytes = getParityBytes(chunkSize, bytesPerCrc); - final MessageDigest digester = MD5Hash.getDigester(); for (ContainerProtos.ChunkInfo chunkInfo : chunkInfoList) { + long chunkSize = chunkInfo.getLen(); + long bytesPerCrc = chunkInfo.getChecksumData().getBytesPerChecksum(); + // Total parity checksum bytes per stripe to remove + int parityBytes = getParityBytes(chunkSize, bytesPerCrc); ByteString stripeChecksum = chunkInfo.getStripeChecksum(); Preconditions.checkNotNull(stripeChecksum); @@ -121,66 +121,40 @@ private void computeCompositeCrc() throws IOException { // Bytes required to create a CRC long bytesPerCrc = firstChunkInfo.getChecksumData().getBytesPerChecksum(); - long chunkSize = firstChunkInfo.getLen(); - - //When EC chunk size is not a multiple of ozone.client.bytes.per.checksum - // (default = 16KB) the last checksum in an EC chunk is only generated for - // offset. - long bytesPerCrcOffset = chunkSize % bytesPerCrc; - - long keySize = keyInfo.getDataSize(); - // Total parity checksum bytes per stripe to remove - int parityBytes = getParityBytes(chunkSize, bytesPerCrc); - - // Number of checksum per chunk, Eg: 2MB EC chunk will - // have 2 checksum per chunk. - int numChecksumPerChunk = (int) - (Math.ceil((double) chunkSize / bytesPerCrc)); + long blockSize = blockLength; CrcComposer blockCrcComposer = CrcComposer.newCrcComposer(dataChecksumType, bytesPerCrc); for (ContainerProtos.ChunkInfo chunkInfo : chunkInfoList) { ByteString stripeChecksum = chunkInfo.getStripeChecksum(); + long chunkSize = chunkInfo.getLen(); + + // Total parity checksum bytes per stripe to remove + int parityBytes = getParityBytes(chunkSize, bytesPerCrc); Preconditions.checkNotNull(stripeChecksum); final int checksumSize = stripeChecksum.size(); Preconditions.checkArgument(checksumSize % 4 == 0, "Checksum Bytes size does not match"); - CrcComposer chunkCrcComposer = - CrcComposer.newCrcComposer(dataChecksumType, bytesPerCrc); // Limit parity bytes as they do not contribute to fileChecksum final ByteBuffer byteWrap = stripeChecksum.asReadOnlyByteBuffer(); byteWrap.limit(checksumSize - parityBytes); - long chunkOffsetIndex = 1; while (byteWrap.hasRemaining()) { - - /* - When chunk size is not a multiple of bytes.per.crc we get an offset. - For eg, RS-3-2-1524k is not a multiple of 1MB. So two checksums are - generated 1st checksum for 1024k bytes and 2nd checksum for 500k bytes. - When we reach the 2nd Checksum we need to modify the bytesPerCrc as in - this case 500k is the bytes for which the checksum is generated. - */ - long currentChunkOffset = Long.MAX_VALUE; - if ((chunkOffsetIndex % numChecksumPerChunk == 0) - && (bytesPerCrcOffset > 0)) { - currentChunkOffset = bytesPerCrcOffset; + // Here Math.min in mainly required for last stripe's last chunk. The last chunk of the last stripe can be + // less than the chunkSize, chunkSize is only calculated from each stripe's first chunk. This would be fine + // for rest of the stripe because all the chunks are of the same size. But for the last stripe we don't know + // the exact size of the last chunk. So we calculate it with the of blockSize. If the block size is smaller + // than the chunk size, then we know it is the last stripe' last chunk. + long remainingChunkSize = Math.min(blockSize, chunkSize); + while (byteWrap.hasRemaining() && remainingChunkSize > 0) { + final int checksumData = byteWrap.getInt(); + blockCrcComposer.update(checksumData, Math.min(bytesPerCrc, remainingChunkSize)); + remainingChunkSize -= bytesPerCrc; } - - final int checksumDataCrc = byteWrap.getInt(); - //To handle last chunk when it size is lower than 1524K in the case - // of rs-3-2-1524k. - long chunkSizePerChecksum = Math.min(Math.min(keySize, bytesPerCrc), - currentChunkOffset); - chunkCrcComposer.update(checksumDataCrc, chunkSizePerChecksum); - - int chunkChecksumCrc = CrcUtil.readInt(chunkCrcComposer.digest(), 0); - blockCrcComposer.update(chunkChecksumCrc, chunkSizePerChecksum); - keySize -= Math.min(bytesPerCrc, currentChunkOffset); - ++chunkOffsetIndex; + blockSize -= chunkSize; } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECFileChecksumHelper.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECFileChecksumHelper.java index 13ba57169878..58a97b2a90d4 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECFileChecksumHelper.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/checksum/ECFileChecksumHelper.java @@ -102,7 +102,7 @@ private boolean checksumBlock(OmKeyLocationInfo keyLocationInfo) setBytesPerCRC(bytesPerChecksum); ByteBuffer blockChecksumByteBuffer = - getBlockChecksumFromChunkChecksums(chunkInfos); + getBlockChecksumFromChunkChecksums(chunkInfos, keyLocationInfo.getLength()); String blockChecksumForDebug = populateBlockChecksumBuf(blockChecksumByteBuffer); @@ -140,10 +140,11 @@ private String populateBlockChecksumBuf( } private ByteBuffer getBlockChecksumFromChunkChecksums( - List chunkInfos) throws IOException { + List chunkInfos, + long blockLength) throws IOException { AbstractBlockChecksumComputer blockChecksumComputer = - new ECBlockChecksumComputer(chunkInfos, getKeyInfo()); + new ECBlockChecksumComputer(chunkInfos, getKeyInfo(), blockLength); blockChecksumComputer.compute(getCombineMode()); return blockChecksumComputer.getOutByteBuffer(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileChecksum.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileChecksum.java index 649ed50a1020..7b5a95808050 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileChecksum.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileChecksum.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.ozone; import com.google.common.collect.ImmutableList; +import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileSystem; @@ -39,6 +40,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; @@ -53,10 +55,13 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.commons.lang3.RandomStringUtils.randomAlphabetic; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE; import static org.apache.hadoop.ozone.TestDataUtil.createBucket; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.params.provider.Arguments.arguments; /** * Test FileChecksum API. @@ -68,10 +73,16 @@ public class TestOzoneFileChecksum { true, false }; - private static final int[] DATA_SIZES = DoubleStream.of(0.5, 1, 1.5, 2, 7, 8) - .mapToInt(mb -> (int) (1024 * 1024 * mb)) + private static final int[] DATA_SIZES_1 = DoubleStream.of(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 5, 6, 7, 8, 9, 10) + .mapToInt(mb -> (int) (1024 * 1024 * mb) + 510000) .toArray(); + private static final int[] DATA_SIZES_2 = DoubleStream.of(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 5, 6, 7, 8, 9, 10) + .mapToInt(mb -> (int) (1024 * 1024 * mb) + 820000) + .toArray(); + + private int[] dataSizes = new int[DATA_SIZES_1.length + DATA_SIZES_2.length]; + private OzoneConfiguration conf; private MiniOzoneCluster cluster = null; private FileSystem fs; @@ -84,6 +95,8 @@ public class TestOzoneFileChecksum { void setup() throws IOException, InterruptedException, TimeoutException { conf = new OzoneConfiguration(); + conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 1024 * 1024, StorageUnit.BYTES); + conf.setStorageSize(OZONE_SCM_BLOCK_SIZE, 2 * 1024 * 1024, StorageUnit.BYTES); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(5) .build(); @@ -95,9 +108,8 @@ void setup() throws IOException, OzoneConsts.OZONE_OFS_URI_SCHEME); conf.setBoolean(disableCache, true); conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); - fs = FileSystem.get(conf); - ofs = (RootedOzoneFileSystem) fs; - adapter = (BasicRootedOzoneClientAdapterImpl) ofs.getAdapter(); + System.arraycopy(DATA_SIZES_1, 0, dataSizes, 0, DATA_SIZES_1.length); + System.arraycopy(DATA_SIZES_2, 0, dataSizes, DATA_SIZES_1.length, DATA_SIZES_2.length); } @AfterEach @@ -112,9 +124,13 @@ void teardown() { * Test EC checksum with Replicated checksum. */ @ParameterizedTest - @MethodSource("missingIndexes") - void testEcFileChecksum(List missingIndexes) throws IOException { + @MethodSource("missingIndexesAndChecksumSize") + void testEcFileChecksum(List missingIndexes, double checksumSizeInMB) throws IOException { + conf.setInt("ozone.client.bytes.per.checksum", (int) (checksumSizeInMB * 1024 * 1024)); + fs = FileSystem.get(conf); + ofs = (RootedOzoneFileSystem) fs; + adapter = (BasicRootedOzoneClientAdapterImpl) ofs.getAdapter(); String volumeName = UUID.randomUUID().toString(); String legacyBucket = UUID.randomUUID().toString(); String ecBucketName = UUID.randomUUID().toString(); @@ -139,7 +155,7 @@ void testEcFileChecksum(List missingIndexes) throws IOException { Map replicatedChecksums = new HashMap<>(); - for (int dataLen : DATA_SIZES) { + for (int dataLen : dataSizes) { byte[] data = randomAlphabetic(dataLen).getBytes(UTF_8); try (OutputStream file = adapter.createFile(volumeName + "/" @@ -170,7 +186,7 @@ void testEcFileChecksum(List missingIndexes) throws IOException { clientConf.setBoolean(OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY, topologyAware); try (FileSystem fsForRead = FileSystem.get(clientConf)) { - for (int dataLen : DATA_SIZES) { + for (int dataLen : dataSizes) { // Compute checksum after failed DNs Path parent = new Path("/" + volumeName + "/" + ecBucketName + "/"); Path ecKey = new Path(parent, "test" + dataLen); @@ -187,14 +203,13 @@ void testEcFileChecksum(List missingIndexes) throws IOException { } } - static Stream> missingIndexes() { + static Stream missingIndexesAndChecksumSize() { return Stream.of( - ImmutableList.of(0, 1), - ImmutableList.of(1, 2), - ImmutableList.of(2, 3), - ImmutableList.of(3, 4), - ImmutableList.of(0, 3), - ImmutableList.of(0, 4) - ); + arguments(ImmutableList.of(0, 1), 0.001), + arguments(ImmutableList.of(1, 2), 0.01), + arguments(ImmutableList.of(2, 3), 0.1), + arguments(ImmutableList.of(3, 4), 0.5), + arguments(ImmutableList.of(0, 3), 1), + arguments(ImmutableList.of(0, 4), 2)); } } From 1822d2aceb6bb93826d67654ee01d00ef8976ce4 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:12:17 +0800 Subject: [PATCH 13/17] HDDS-11498. Improve SCM deletion efficiency. (#7249) (cherry picked from commit 4670a5ef8c98ae236d1acbaf93e9609309f88094) --- .../DeleteBlocksCommandHandler.java | 8 ++- .../DatanodeDeletedBlockTransactions.java | 3 +- .../hdds/scm/block/DeletedBlockLogImpl.java | 56 +++++++++++-------- .../scm/block/SCMBlockDeletingService.java | 3 +- .../block/ScmBlockDeletingServiceMetrics.java | 34 +++++++++++ 5 files changed, 75 insertions(+), 29 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java index a243b0c7dab1..7e515653a891 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java @@ -354,10 +354,11 @@ private void processCmd(DeleteCmdInfo cmd) { DeletedContainerBlocksSummary summary = DeletedContainerBlocksSummary.getFrom(containerBlocks); LOG.info("Summary of deleting container blocks, numOfTransactions={}, " - + "numOfContainers={}, numOfBlocks={}", + + "numOfContainers={}, numOfBlocks={}, commandId={}.", summary.getNumOfTxs(), summary.getNumOfContainers(), - summary.getNumOfBlocks()); + summary.getNumOfBlocks(), + cmd.getCmd().getId()); if (LOG.isDebugEnabled()) { LOG.debug("Start to delete container blocks, TXIDs={}", summary.getTxIDSummary()); @@ -384,7 +385,8 @@ private void processCmd(DeleteCmdInfo cmd) { LOG.debug("Sending following block deletion ACK to SCM"); for (DeleteBlockTransactionResult result : blockDeletionACK .getResultsList()) { - LOG.debug("{} : {}", result.getTxID(), result.getSuccess()); + LOG.debug("TxId = {} : ContainerId = {} : {}", + result.getTxID(), result.getContainerID(), result.getSuccess()); } } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java index e485fcc98d93..99fd9c7b431d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java @@ -47,7 +47,8 @@ void addTransactionToDN(UUID dnID, DeletedBlocksTransaction tx) { blocksDeleted += tx.getLocalIDCount(); if (SCMBlockDeletingService.LOG.isDebugEnabled()) { SCMBlockDeletingService.LOG - .debug("Transaction added: {} <- TX({})", dnID, tx.getTxID()); + .debug("Transaction added: {} <- TX({}), DN {} <- blocksDeleted Add {}.", + dnID, tx.getTxID(), dnID, tx.getLocalIDCount()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java index ac64f6e973e3..cdc9fddb3c80 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java @@ -35,7 +35,6 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto.DeleteBlockTransactionResult; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler.DeleteBlockStatus; import org.apache.hadoop.hdds.scm.container.ContainerInfo; @@ -204,20 +203,6 @@ private DeletedBlocksTransaction constructNewTransaction( .build(); } - private boolean isTransactionFailed(DeleteBlockTransactionResult result) { - if (LOG.isDebugEnabled()) { - LOG.debug( - "Got block deletion ACK from datanode, TXIDs={}, " + "success={}", - result.getTxID(), result.getSuccess()); - } - if (!result.getSuccess()) { - LOG.warn("Got failed ACK for TXID={}, prepare to resend the " - + "TX in next interval", result.getTxID()); - return true; - } - return false; - } - @Override public int getNumOfValidTransactions() throws IOException { lock.lock(); @@ -304,26 +289,46 @@ private void getTransaction(DeletedBlocksTransaction tx, .setCount(transactionStatusManager.getOrDefaultRetryCount( tx.getTxID(), 0)) .build(); + for (ContainerReplica replica : replicas) { DatanodeDetails details = replica.getDatanodeDetails(); - if (!dnList.contains(details)) { - continue; - } if (!transactionStatusManager.isDuplication( details, updatedTxn.getTxID(), commandStatus)) { transactions.addTransactionToDN(details.getUuid(), updatedTxn); + metrics.incrProcessedTransaction(); } } } private Boolean checkInadequateReplica(Set replicas, - DeletedBlocksTransaction txn) throws ContainerNotFoundException { + DeletedBlocksTransaction txn, + Set dnList) throws ContainerNotFoundException { ContainerInfo containerInfo = containerManager .getContainer(ContainerID.valueOf(txn.getContainerID())); ReplicationManager replicationManager = scmContext.getScm().getReplicationManager(); ContainerHealthResult result = replicationManager .getContainerReplicationHealth(containerInfo, replicas); + + // We have made an improvement here, and we expect that all replicas + // of the Container being sent will be included in the dnList. + // This change benefits ACK confirmation and improves deletion speed. + // The principle behind it is that + // DN can receive the command to delete a certain Container at the same time and provide + // feedback to SCM at roughly the same time. + // This avoids the issue of deletion blocking, + // where some replicas of a Container are deleted while others do not receive the delete command. + long containerId = txn.getContainerID(); + for (ContainerReplica replica : replicas) { + DatanodeDetails datanodeDetails = replica.getDatanodeDetails(); + if (!dnList.contains(datanodeDetails)) { + DatanodeDetails dnDetail = replica.getDatanodeDetails(); + LOG.debug("Skip Container = {}, because DN = {} is not in dnList.", + containerId, dnDetail.getUuid()); + return true; + } + } + return result.getHealthState() != ContainerHealthResult.HealthState.HEALTHY; } @@ -349,6 +354,7 @@ public DatanodeDeletedBlockTransactions getTransactions( .getCommandStatusByTxId(dnList.stream(). map(DatanodeDetails::getUuid).collect(Collectors.toSet())); ArrayList txIDs = new ArrayList<>(); + metrics.setNumBlockDeletionTransactionDataNodes(dnList.size()); // Here takes block replica count as the threshold to avoid the case // that part of replicas committed the TXN and recorded in the // SCMDeletedBlockTransactionStatusManager, while they are counted @@ -362,23 +368,25 @@ public DatanodeDeletedBlockTransactions getTransactions( // HDDS-7126. When container is under replicated, it is possible // that container is deleted, but transactions are not deleted. if (containerManager.getContainer(id).isDeleted()) { - LOG.warn("Container: " + id + " was deleted for the " + - "transaction: " + txn); + LOG.warn("Container: {} was deleted for the " + + "transaction: {}.", id, txn); txIDs.add(txn.getTxID()); } else if (txn.getCount() > -1 && txn.getCount() <= maxRetry && !containerManager.getContainer(id).isOpen()) { Set replicas = containerManager .getContainerReplicas( ContainerID.valueOf(txn.getContainerID())); - if (checkInadequateReplica(replicas, txn)) { + if (checkInadequateReplica(replicas, txn, dnList)) { + metrics.incrSkippedTransaction(); continue; } getTransaction( txn, transactions, dnList, replicas, commandStatus); + } else if (txn.getCount() >= maxRetry || containerManager.getContainer(id).isOpen()) { + metrics.incrSkippedTransaction(); } } catch (ContainerNotFoundException ex) { - LOG.warn("Container: " + id + " was not found for the transaction: " - + txn); + LOG.warn("Container: {} was not found for the transaction: {}.", id, txn); txIDs.add(txn.getTxID()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java index 7271d9dcba68..e6fc45cb5eee 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java @@ -203,9 +203,10 @@ public EmptyTaskResult call() throws Exception { } } LOG.info("Totally added {} blocks to be deleted for" - + " {} datanodes, task elapsed time: {}ms", + + " {} datanodes / {} totalnodes, task elapsed time: {}ms", transactions.getBlocksDeleted(), transactions.getDatanodeTransactionMap().size(), + included.size(), Time.monotonicNow() - startTime); deletedBlockLog.incrementCount(new ArrayList<>(processedTxIDs)); } catch (NotLeaderException nle) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java index 2cadca1d92a4..6637bd183293 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java @@ -24,6 +24,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; /** * Metrics related to Block Deleting Service running in SCM. @@ -76,6 +77,15 @@ public final class ScmBlockDeletingServiceMetrics { @Metric(about = "The number of created txs which are added into DB.") private MutableCounterLong numBlockDeletionTransactionCreated; + @Metric(about = "The number of skipped transactions") + private MutableCounterLong numSkippedTransactions; + + @Metric(about = "The number of processed transactions") + private MutableCounterLong numProcessedTransactions; + + @Metric(about = "The number of dataNodes of delete transactions.") + private MutableGaugeLong numBlockDeletionTransactionDataNodes; + private ScmBlockDeletingServiceMetrics() { } @@ -130,6 +140,18 @@ public void incrBlockDeletionTransactionCreated(long count) { this.numBlockDeletionTransactionCreated.incr(count); } + public void incrSkippedTransaction() { + this.numSkippedTransactions.incr(); + } + + public void incrProcessedTransaction() { + this.numProcessedTransactions.incr(); + } + + public void setNumBlockDeletionTransactionDataNodes(long dataNodes) { + this.numBlockDeletionTransactionDataNodes.set(dataNodes); + } + public long getNumBlockDeletionCommandSent() { return numBlockDeletionCommandSent.value(); } @@ -162,6 +184,18 @@ public long getNumBlockDeletionTransactionCreated() { return numBlockDeletionTransactionCreated.value(); } + public long getNumSkippedTransactions() { + return numSkippedTransactions.value(); + } + + public long getNumProcessedTransactions() { + return numProcessedTransactions.value(); + } + + public long getNumBlockDeletionTransactionDataNodes() { + return numBlockDeletionTransactionDataNodes.value(); + } + @Override public String toString() { StringBuffer buffer = new StringBuffer(); From 67a869e43964235b88cd851af2f632ae3ac63ee2 Mon Sep 17 00:00:00 2001 From: Scolley <74013924+scolley31@users.noreply.github.com> Date: Mon, 21 Oct 2024 00:15:13 +0800 Subject: [PATCH 14/17] HDDS-11570. Fix HDDS Docs build failure with Hugo v0.135.0 (#7337) (cherry picked from commit 8568075ddb75b4a6785cdb31b00b4a592058dc78) --- hadoop-hdds/docs/content/feature/Quota.md | 2 +- hadoop-hdds/docs/content/feature/Quota.zh.md | 2 +- hadoop-hdds/docs/content/security/GDPR.md | 2 +- hadoop-hdds/docs/content/security/GDPR.zh.md | 2 +- hadoop-hdds/docs/content/security/SecureOzone.md | 2 +- hadoop-hdds/docs/content/security/SecureOzone.zh.md | 2 +- hadoop-hdds/docs/content/security/SecuringDatanodes.md | 2 +- hadoop-hdds/docs/content/security/SecuringDatanodes.zh.md | 2 +- hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md | 2 +- hadoop-hdds/docs/content/security/SecuringS3.md | 2 +- hadoop-hdds/docs/content/security/SecuringS3.zh.md | 2 +- hadoop-hdds/docs/content/security/SecuringTDE.md | 2 +- hadoop-hdds/docs/content/security/SecuringTDE.zh.md | 2 +- hadoop-hdds/docs/content/security/SecurityAcls.md | 2 +- hadoop-hdds/docs/content/security/SecurityAcls.zh.md | 2 +- hadoop-hdds/docs/content/security/SecurityWithRanger.md | 2 +- hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hadoop-hdds/docs/content/feature/Quota.md b/hadoop-hdds/docs/content/feature/Quota.md index 90e413357b50..53c196307fa3 100644 --- a/hadoop-hdds/docs/content/feature/Quota.md +++ b/hadoop-hdds/docs/content/feature/Quota.md @@ -1,6 +1,6 @@ --- title: "Quota in Ozone" -date: "2020-October-22" +date: "2020-10-22" weight: 4 summary: Quota in Ozone icon: user diff --git a/hadoop-hdds/docs/content/feature/Quota.zh.md b/hadoop-hdds/docs/content/feature/Quota.zh.md index 16e5db26cde3..d690947ef06c 100644 --- a/hadoop-hdds/docs/content/feature/Quota.zh.md +++ b/hadoop-hdds/docs/content/feature/Quota.zh.md @@ -1,6 +1,6 @@ --- title: "Ozone 中的配额" -date: "2020-October-22" +date: "2020-10-22" weight: 4 summary: Ozone中的配额 icon: user diff --git a/hadoop-hdds/docs/content/security/GDPR.md b/hadoop-hdds/docs/content/security/GDPR.md index 25b2f2c4416b..409a3ae7be0d 100644 --- a/hadoop-hdds/docs/content/security/GDPR.md +++ b/hadoop-hdds/docs/content/security/GDPR.md @@ -1,6 +1,6 @@ --- title: "GDPR in Ozone" -date: "2019-September-17" +date: "2019-09-17" weight: 3 icon: user menu: diff --git a/hadoop-hdds/docs/content/security/GDPR.zh.md b/hadoop-hdds/docs/content/security/GDPR.zh.md index a7db4030871b..8fd3514138f0 100644 --- a/hadoop-hdds/docs/content/security/GDPR.zh.md +++ b/hadoop-hdds/docs/content/security/GDPR.zh.md @@ -1,6 +1,6 @@ --- title: "Ozone 中的 GDPR" -date: "2019-September-17" +date: "2019-09-17" weight: 3 summary: Ozone 中的 GDPR menu: diff --git a/hadoop-hdds/docs/content/security/SecureOzone.md b/hadoop-hdds/docs/content/security/SecureOzone.md index 76fd74701095..bbeef79b6135 100644 --- a/hadoop-hdds/docs/content/security/SecureOzone.md +++ b/hadoop-hdds/docs/content/security/SecureOzone.md @@ -1,6 +1,6 @@ --- title: "Securing Ozone" -date: "2019-April-03" +date: "2019-04-03" summary: Overview of Ozone security concepts and steps to secure Ozone Manager and SCM. weight: 1 menu: diff --git a/hadoop-hdds/docs/content/security/SecureOzone.zh.md b/hadoop-hdds/docs/content/security/SecureOzone.zh.md index a7660233f4d0..e74b5d8dfab5 100644 --- a/hadoop-hdds/docs/content/security/SecureOzone.zh.md +++ b/hadoop-hdds/docs/content/security/SecureOzone.zh.md @@ -1,6 +1,6 @@ --- title: "安全化 Ozone" -date: "2019-April-03" +date: "2019-04-03" summary: 简要介绍 Ozone 中的安全概念以及安全化 OM 和 SCM 的步骤。 weight: 1 menu: diff --git a/hadoop-hdds/docs/content/security/SecuringDatanodes.md b/hadoop-hdds/docs/content/security/SecuringDatanodes.md index 717e746cfb91..2254155e1f4e 100644 --- a/hadoop-hdds/docs/content/security/SecuringDatanodes.md +++ b/hadoop-hdds/docs/content/security/SecuringDatanodes.md @@ -1,6 +1,6 @@ --- title: "Securing Datanodes" -date: "2019-April-03" +date: "2019-04-03" weight: 3 menu: main: diff --git a/hadoop-hdds/docs/content/security/SecuringDatanodes.zh.md b/hadoop-hdds/docs/content/security/SecuringDatanodes.zh.md index 608be16e8a3b..8b37fd2f6ee2 100644 --- a/hadoop-hdds/docs/content/security/SecuringDatanodes.zh.md +++ b/hadoop-hdds/docs/content/security/SecuringDatanodes.zh.md @@ -1,6 +1,6 @@ --- title: "安全化 Datanode" -date: "2019-April-03" +date: "2019-04-03" weight: 3 menu: main: diff --git a/hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md b/hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md index 47c04eb94d93..a8601d7a5e1f 100644 --- a/hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md +++ b/hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md @@ -1,6 +1,6 @@ --- title: "Securing HTTP" -date: "2020-June-17" +date: "2020-06-17" summary: Secure HTTP web-consoles for Ozone services weight: 4 menu: diff --git a/hadoop-hdds/docs/content/security/SecuringS3.md b/hadoop-hdds/docs/content/security/SecuringS3.md index e6218b95e91e..04ef6921af65 100644 --- a/hadoop-hdds/docs/content/security/SecuringS3.md +++ b/hadoop-hdds/docs/content/security/SecuringS3.md @@ -1,6 +1,6 @@ --- title: "Securing S3" -date: "2019-April-03" +date: "2019-04-03" summary: Ozone supports S3 protocol, and uses AWS Signature Version 4 protocol which allows a seamless S3 experience. weight: 5 menu: diff --git a/hadoop-hdds/docs/content/security/SecuringS3.zh.md b/hadoop-hdds/docs/content/security/SecuringS3.zh.md index 218786fd366f..395b9303354b 100644 --- a/hadoop-hdds/docs/content/security/SecuringS3.zh.md +++ b/hadoop-hdds/docs/content/security/SecuringS3.zh.md @@ -1,6 +1,6 @@ --- title: "安全化 S3" -date: "2019-April-03" +date: "2019-04-03" summary: Ozone 支持 S3 协议,并使用 AWS Signature Version 4 protocol which allows a seamless S3 experience. weight: 5 diff --git a/hadoop-hdds/docs/content/security/SecuringTDE.md b/hadoop-hdds/docs/content/security/SecuringTDE.md index 3b75bee1bfd5..0d04a28aec77 100644 --- a/hadoop-hdds/docs/content/security/SecuringTDE.md +++ b/hadoop-hdds/docs/content/security/SecuringTDE.md @@ -1,6 +1,6 @@ --- title: "Transparent Data Encryption" -date: "2019-April-03" +date: "2019-04-03" summary: TDE allows data on the disks to be encrypted-at-rest and automatically decrypted during access. weight: 2 menu: diff --git a/hadoop-hdds/docs/content/security/SecuringTDE.zh.md b/hadoop-hdds/docs/content/security/SecuringTDE.zh.md index ed42519e0b25..d7fa4941e446 100644 --- a/hadoop-hdds/docs/content/security/SecuringTDE.zh.md +++ b/hadoop-hdds/docs/content/security/SecuringTDE.zh.md @@ -1,6 +1,6 @@ --- title: "透明数据加密" -date: "2019-April-03" +date: "2019-04-03" summary: 透明数据加密(Transparent Data Encryption,TDE)以密文形式在磁盘上保存数据,但可以在用户访问的时候自动进行解密。 weight: 2 menu: diff --git a/hadoop-hdds/docs/content/security/SecurityAcls.md b/hadoop-hdds/docs/content/security/SecurityAcls.md index 9976cbbc4fba..ee48999ed25d 100644 --- a/hadoop-hdds/docs/content/security/SecurityAcls.md +++ b/hadoop-hdds/docs/content/security/SecurityAcls.md @@ -1,6 +1,6 @@ --- title: "Ozone ACLs" -date: "2019-April-03" +date: "2019-04-03" weight: 6 menu: main: diff --git a/hadoop-hdds/docs/content/security/SecurityAcls.zh.md b/hadoop-hdds/docs/content/security/SecurityAcls.zh.md index 3d95fcf0877b..99751cd62da3 100644 --- a/hadoop-hdds/docs/content/security/SecurityAcls.zh.md +++ b/hadoop-hdds/docs/content/security/SecurityAcls.zh.md @@ -1,6 +1,6 @@ --- title: "Ozone 访问控制列表" -date: "2019-April-03" +date: "2019-04-03" weight: 6 menu: main: diff --git a/hadoop-hdds/docs/content/security/SecurityWithRanger.md b/hadoop-hdds/docs/content/security/SecurityWithRanger.md index bbbd8c19f32e..7dc1895ad3dc 100644 --- a/hadoop-hdds/docs/content/security/SecurityWithRanger.md +++ b/hadoop-hdds/docs/content/security/SecurityWithRanger.md @@ -1,6 +1,6 @@ --- title: "Apache Ranger" -date: "2019-April-03" +date: "2019-04-03" weight: 7 menu: main: diff --git a/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md b/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md index b7c7b8721bbe..8917c0b84bcf 100644 --- a/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md +++ b/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md @@ -1,6 +1,6 @@ --- title: "Apache Ranger" -date: "2019-April-03" +date: "2019-04-03" weight: 7 menu: main: From 6425679d71d414292153258841d3da79688b4ef2 Mon Sep 17 00:00:00 2001 From: tanvipenumudy <46785609+tanvipenumudy@users.noreply.github.com> Date: Mon, 9 Sep 2024 15:31:52 +0800 Subject: [PATCH 15/17] HDDS-11414. Key listing for FSO buckets fails with forward client (#7161) (cherry picked from commit 8ca33c718b2253297f15941758ec4613926a700a) --- .../org/apache/hadoop/ozone/client/rpc/RpcClient.java | 8 +++++--- .../dist/src/main/smoketest/compatibility/read.robot | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index bd95ac6dffd9..34ef3d0abd29 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -2162,9 +2162,11 @@ public List listStatusLight(String volumeName, String bucketName, String keyName, boolean recursive, String startKey, long numEntries, boolean allowPartialPrefixes) throws IOException { OmKeyArgs keyArgs = prepareOmKeyArgs(volumeName, bucketName, keyName); - return ozoneManagerClient - .listStatusLight(keyArgs, recursive, startKey, numEntries, - allowPartialPrefixes); + // Disable listStatusLight in Ozone-1.4.x due to compatibility issues with Ozone-1.4.x listStatusLight. + return ozoneManagerClient.listStatus(keyArgs, recursive, startKey, numEntries, allowPartialPrefixes) + .stream() + .map(OzoneFileStatusLight::fromOzoneFileStatus) + .collect(Collectors.toList()); } /** diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/read.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/read.robot index 511679c56f4d..57715cda95f8 100644 --- a/hadoop-ozone/dist/src/main/smoketest/compatibility/read.robot +++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/read.robot @@ -30,6 +30,10 @@ Key Can Be Read Dir Can Be Listed Execute ozone fs -ls o3fs://bucket1.vol1/dir-${SUFFIX} +Dir Can Be Listed Using Shell + ${result} = Execute ozone sh key list /vol1/bucket1 + Should Contain ${result} key-${SUFFIX} + File Can Be Get Execute ozone fs -get o3fs://bucket1.vol1/dir-${SUFFIX}/file-${SUFFIX} /tmp/ Execute diff -q ${TESTFILE} /tmp/file-${SUFFIX} From adcc577e249590029e4334fa447d97c365d383dc Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Sun, 21 Jan 2024 15:42:31 +0100 Subject: [PATCH 16/17] HDDS-10168. Add Ozone 1.4.0 to compatibility acceptance tests (#6040) (cherry picked from commit dcdc47ec8dc1867905a5a6002646b30bd05f0031) --- hadoop-ozone/dist/src/main/compose/upgrade/test.sh | 5 +++-- hadoop-ozone/dist/src/main/compose/xcompat/clients.yaml | 7 +++++++ hadoop-ozone/dist/src/main/compose/xcompat/test.sh | 6 +++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh index a267080bb190..9d7ec5d4e604 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh @@ -35,8 +35,9 @@ RESULT_DIR="$ALL_RESULT_DIR" create_results_dir # This is the version of Ozone that should use the runner image to run the # code that was built. Other versions will pull images from docker hub. -export OZONE_CURRENT_VERSION=1.4.0 -run_test ha non-rolling-upgrade 1.3.0 "$OZONE_CURRENT_VERSION" +export OZONE_CURRENT_VERSION=1.5.0 +run_test ha non-rolling-upgrade 1.4.0 "$OZONE_CURRENT_VERSION" +# run_test ha non-rolling-upgrade 1.3.0 "$OZONE_CURRENT_VERSION" # run_test ha non-rolling-upgrade 1.2.1 "$OZONE_CURRENT_VERSION" # run_test om-ha non-rolling-upgrade 1.1.0 "$OZONE_CURRENT_VERSION" diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/clients.yaml b/hadoop-ozone/dist/src/main/compose/xcompat/clients.yaml index 15d4c7e427da..2057cdd8a993 100644 --- a/hadoop-ozone/dist/src/main/compose/xcompat/clients.yaml +++ b/hadoop-ozone/dist/src/main/compose/xcompat/clients.yaml @@ -45,6 +45,13 @@ services: volumes: - ../..:/opt/ozone command: ["sleep","1000000"] + old_client_1_4_0: + image: apache/ozone:1.4.0 + env_file: + - docker-config + volumes: + - ../..:/opt/ozone + command: ["sleep","1000000"] new_client: image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} env_file: diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test.sh index baa239d56a82..419d397c19ec 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test.sh @@ -21,8 +21,8 @@ COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" export COMPOSE_DIR basename=$(basename ${COMPOSE_DIR}) -current_version=1.4.0 -old_versions="1.0.0 1.1.0 1.2.1 1.3.0" # container is needed for each version in clients.yaml +current_version=1.5.0 +old_versions="1.0.0 1.1.0 1.2.1 1.3.0 1.4.0" # container is needed for each version in clients.yaml # shellcheck source=hadoop-ozone/dist/src/main/compose/testlib.sh source "${COMPOSE_DIR}/../testlib.sh" @@ -77,7 +77,7 @@ test_cross_compatibility() { test_ec_cross_compatibility() { echo "Running Erasure Coded storage backward compatibility tests." - local cluster_versions_with_ec="1.3.0" + local cluster_versions_with_ec="1.3.0 1.4.0" local non_ec_client_versions="1.0.0 1.1.0 1.2.1" for cluster_version in ${cluster_versions_with_ec}; do From b28b4168a81f494bd3205192931fd18977b0e0b3 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Fri, 23 Aug 2024 09:28:17 +0200 Subject: [PATCH 17/17] HDDS-11333. Avoid hard-coded current version in upgrade/xcompat tests (#7089) (cherry picked from commit 5b32c7dd6cfc1b7852320857ed19ae9440031d39) --- hadoop-ozone/dist/src/main/compose/upgrade/test.sh | 2 +- hadoop-ozone/dist/src/main/compose/xcompat/test.sh | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh index 9d7ec5d4e604..6fc4763631bd 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh @@ -35,7 +35,7 @@ RESULT_DIR="$ALL_RESULT_DIR" create_results_dir # This is the version of Ozone that should use the runner image to run the # code that was built. Other versions will pull images from docker hub. -export OZONE_CURRENT_VERSION=1.5.0 +export OZONE_CURRENT_VERSION="${ozone.version}" run_test ha non-rolling-upgrade 1.4.0 "$OZONE_CURRENT_VERSION" # run_test ha non-rolling-upgrade 1.3.0 "$OZONE_CURRENT_VERSION" # run_test ha non-rolling-upgrade 1.2.1 "$OZONE_CURRENT_VERSION" diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test.sh index 419d397c19ec..695d8bf06abc 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test.sh @@ -21,7 +21,7 @@ COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" export COMPOSE_DIR basename=$(basename ${COMPOSE_DIR}) -current_version=1.5.0 +current_version="${ozone.version}" old_versions="1.0.0 1.1.0 1.2.1 1.3.0 1.4.0" # container is needed for each version in clients.yaml # shellcheck source=hadoop-ozone/dist/src/main/compose/testlib.sh @@ -77,7 +77,8 @@ test_cross_compatibility() { test_ec_cross_compatibility() { echo "Running Erasure Coded storage backward compatibility tests." - local cluster_versions_with_ec="1.3.0 1.4.0" + # local cluster_versions_with_ec="1.3.0 1.4.0 ${current_version}" + local cluster_versions_with_ec="${current_version}" # until HDDS-11334 local non_ec_client_versions="1.0.0 1.1.0 1.2.1" for cluster_version in ${cluster_versions_with_ec}; do