diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 82356bafa595..f540c1df77a3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -147,7 +147,7 @@ jobs: distribution: 'temurin' java-version: ${{ matrix.java }} - name: Compile Ozone using Java ${{ matrix.java }} - run: hadoop-ozone/dev-support/checks/build.sh -Dskip.npx -Dskip.installnpx + run: hadoop-ozone/dev-support/checks/build.sh -Dskip.npx -Dskip.installnpx -Djavac.version=${{ matrix.java }} - name: Delete temporary build artifacts before caching run: | #Never cache local artifacts diff --git a/.github/workflows/post-commit.yml b/.github/workflows/post-commit.yml index c38a0b058d49..c1678f12c8e4 100644 --- a/.github/workflows/post-commit.yml +++ b/.github/workflows/post-commit.yml @@ -23,3 +23,4 @@ concurrency: jobs: CI: uses: ./.github/workflows/ci.yml + secrets: inherit diff --git a/.github/workflows/scheduled_ci.yml b/.github/workflows/scheduled_ci.yml index b89f52a4f189..86c3943999b7 100644 --- a/.github/workflows/scheduled_ci.yml +++ b/.github/workflows/scheduled_ci.yml @@ -19,3 +19,4 @@ on: jobs: CI: uses: ./.github/workflows/ci.yml + secrets: inherit diff --git a/LICENSE.txt b/LICENSE.txt index 76e979f2837c..021266844b82 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -207,8 +207,8 @@ licenses. This section summarizes those components and their licenses. See licenses/ for text of these licenses. -Apache Software Foundation License 2.0 --------------------------------------- +Apache License 2.0 +------------------ hadoop-hdds/framework/src/main/resources/webapps/static/nvd3-1.8.5.min.js.map hadoop-hdds/framework/src/main/resources/webapps/static/nvd3-1.8.5.min.css.map @@ -221,15 +221,15 @@ BSD 3-Clause ------------ hadoop-hdds/framework/src/main/resources/webapps/static/d3-3.5.17.min.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-3.5.17.min.js -hadoop-hdds/docs/themes/ozonedoc/static/fonts/glyphicons-* -hadoop-hdds/docs/themes/ozonedoc/static/js/bootstrap.min.js + MIT License ----------- hadoop-hdds/framework/src/main/resources/webapps/static/bootstrap-3.4.1 hadoop-hdds/docs/themes/ozonedoc/static/css/bootstrap-* +hadoop-hdds/docs/themes/ozonedoc/static/js/bootstrap.min.js +hadoop-hdds/docs/themes/ozonedoc/static/fonts/glyphicons-* hadoop-hdds/framework/src/main/resources/webapps/static/angular-route-1.8.0.min.js hadoop-hdds/framework/src/main/resources/webapps/static/angular-nvd3-1.0.9.min.js diff --git a/NOTICE.txt b/NOTICE.txt index 3a7cf6effe57..7a1e855f6a33 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Apache Ozone -Copyright 2021 The Apache Software Foundation +Copyright 2022 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/hadoop-hdds/annotations/pom.xml b/hadoop-hdds/annotations/pom.xml index cbef3d46715d..582e232889d5 100644 --- a/hadoop-hdds/annotations/pom.xml +++ b/hadoop-hdds/annotations/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-annotation-processing - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone annotation processing tools for validating custom annotations at compile time. diff --git a/hadoop-hdds/client/pom.xml b/hadoop-hdds/client/pom.xml index 3e3ff0cb9f8c..fd44ffc2432e 100644 --- a/hadoop-hdds/client/pom.xml +++ b/hadoop-hdds/client/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-client - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Client Library Apache Ozone HDDS Client jar diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java index 14a8aacbc015..e2c5471be8e4 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java @@ -66,6 +66,37 @@ public enum ChecksumCombineMode { tags = ConfigTag.CLIENT) private int streamBufferSize = 4 * 1024 * 1024; + @Config(key = "datastream.buffer.flush.size", + defaultValue = "16MB", + type = ConfigType.SIZE, + description = "The boundary at which putBlock is executed", + tags = ConfigTag.CLIENT) + private long dataStreamBufferFlushSize = 16 * 1024 * 1024; + + @Config(key = "datastream.min.packet.size", + defaultValue = "1MB", + type = ConfigType.SIZE, + description = "The maximum size of the ByteBuffer " + + "(used via ratis streaming)", + tags = ConfigTag.CLIENT) + private int dataStreamMinPacketSize = 1024 * 1024; + + @Config(key = "datastream.window.size", + defaultValue = "64MB", + type = ConfigType.SIZE, + description = "Maximum size of BufferList(used for retry) size per " + + "BlockDataStreamOutput instance", + tags = ConfigTag.CLIENT) + private long streamWindowSize = 64 * 1024 * 1024; + + @Config(key = "datastream.pipeline.mode", + defaultValue = "true", + description = "Streaming write support both pipeline mode(datanode1->" + + "datanode2->datanode3) and star mode(datanode1->datanode2, " + + "datanode1->datanode3). By default we use pipeline mode.", + tags = ConfigTag.CLIENT) + private boolean datastreamPipelineMode = true; + @Config(key = "stream.buffer.increment", defaultValue = "0B", type = ConfigType.SIZE, @@ -143,6 +174,13 @@ public enum ChecksumCombineMode { tags = ConfigTag.CLIENT) private int maxECStripeWriteRetries = 10; + @Config(key = "ec.stripe.queue.size", + defaultValue = "2", + description = "The max number of EC stripes can be buffered in client " + + " before flushing into datanodes.", + tags = ConfigTag.CLIENT) + private int ecStripeQueueSize = 2; + @Config(key = "exclude.nodes.expiry.time", defaultValue = "600000", description = "Time after which an excluded node is reconsidered for" + @@ -244,6 +282,22 @@ public void setStreamBufferMaxSize(long streamBufferMaxSize) { this.streamBufferMaxSize = streamBufferMaxSize; } + public int getDataStreamMinPacketSize() { + return dataStreamMinPacketSize; + } + + public void setDataStreamMinPacketSize(int dataStreamMinPacketSize) { + this.dataStreamMinPacketSize = dataStreamMinPacketSize; + } + + public long getStreamWindowSize() { + return streamWindowSize; + } + + public void setStreamWindowSize(long streamWindowSize) { + this.streamWindowSize = streamWindowSize; + } + public int getMaxRetryCount() { return maxRetryCount; } @@ -288,6 +342,10 @@ public int getMaxECStripeWriteRetries() { return this.maxECStripeWriteRetries; } + public int getEcStripeQueueSize() { + return this.ecStripeQueueSize; + } + public long getExcludeNodesExpiryTime() { return excludeNodesExpiryTime; } @@ -296,6 +354,14 @@ public int getBufferIncrement() { return bufferIncrement; } + public long getDataStreamBufferFlushSize() { + return dataStreamBufferFlushSize; + } + + public void setDataStreamBufferFlushSize(long dataStreamBufferFlushSize) { + this.dataStreamBufferFlushSize = dataStreamBufferFlushSize; + } + public ChecksumCombineMode getChecksumCombineMode() { try { return ChecksumCombineMode.valueOf(checksumCombineMode); @@ -325,4 +391,12 @@ public void setFsDefaultBucketLayout(String bucketLayout) { public String getFsDefaultBucketLayout() { return fsDefaultBucketLayout; } + + public boolean isDatastreamPipelineMode() { + return datastreamPipelineMode; + } + + public void setDatastreamPipelineMode(boolean datastreamPipelineMode) { + this.datastreamPipelineMode = datastreamPipelineMode; + } } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java index cab7bee7801c..1fee58c3b45e 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java @@ -198,7 +198,7 @@ protected NettyChannelBuilder createChannel(DatanodeDetails dn, int port) NettyChannelBuilder.forAddress(dn.getIpAddress(), port).usePlaintext() .maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE) .intercept(new GrpcClientInterceptor()); - if (secConfig.isGrpcTlsEnabled()) { + if (secConfig.isSecurityEnabled() && secConfig.isGrpcTlsEnabled()) { SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient(); if (caCerts != null) { sslContextBuilder.trustManager(caCerts); diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java index d0fd0db12950..3ea269b08b08 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java @@ -53,6 +53,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.ratis.client.RaftClient; +import org.apache.ratis.client.api.DataStreamApi; import org.apache.ratis.grpc.GrpcTlsConfig; import org.apache.ratis.proto.RaftProtos; import org.apache.ratis.proto.RaftProtos.ReplicationLevel; @@ -135,7 +136,7 @@ private long updateCommitInfosMap(RaftClientReply reply) { .orElse(0L); } - private long updateCommitInfosMap( + public long updateCommitInfosMap( Collection commitInfoProtos) { // if the commitInfo map is empty, just update the commit indexes for each // of the servers @@ -382,4 +383,8 @@ public XceiverClientReply sendCommandAsync( throw new UnsupportedOperationException( "Operation Not supported for ratis client"); } + + public DataStreamApi getDataStreamApi() { + return this.getClient().getDataStreamApi(); + } } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/AbstractDataStreamOutput.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/AbstractDataStreamOutput.java new file mode 100644 index 000000000000..cad1d0479249 --- /dev/null +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/AbstractDataStreamOutput.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.storage; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.scm.client.HddsClientUtils; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.ratis.protocol.exceptions.AlreadyClosedException; +import org.apache.ratis.protocol.exceptions.RaftRetryFailureException; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.util.Map; +import java.util.Objects; + +/** + * This class is used for error handling methods. + */ +public abstract class AbstractDataStreamOutput + implements ByteBufferStreamOutput { + + private final Map, RetryPolicy> retryPolicyMap; + private int retryCount; + private boolean isException; + + protected AbstractDataStreamOutput( + Map, RetryPolicy> retryPolicyMap) { + this.retryPolicyMap = retryPolicyMap; + this.isException = false; + this.retryCount = 0; + } + + @VisibleForTesting + public int getRetryCount() { + return retryCount; + } + + protected void resetRetryCount() { + retryCount = 0; + } + + protected boolean isException() { + return isException; + } + + /** + * Checks if the provided exception signifies retry failure in ratis client. + * In case of retry failure, ratis client throws RaftRetryFailureException + * and all succeeding operations are failed with AlreadyClosedException. + */ + protected boolean checkForRetryFailure(Throwable t) { + return t instanceof RaftRetryFailureException + || t instanceof AlreadyClosedException; + } + + // Every container specific exception from datatnode will be seen as + // StorageContainerException + protected boolean checkIfContainerToExclude(Throwable t) { + return t instanceof StorageContainerException; + } + + protected void setExceptionAndThrow(IOException ioe) throws IOException { + isException = true; + throw ioe; + } + + protected void handleRetry(IOException exception) throws IOException { + RetryPolicy retryPolicy = retryPolicyMap + .get(HddsClientUtils.checkForException(exception).getClass()); + if (retryPolicy == null) { + retryPolicy = retryPolicyMap.get(Exception.class); + } + handleRetry(exception, retryPolicy); + } + + protected void handleRetry(IOException exception, RetryPolicy retryPolicy) + throws IOException { + RetryPolicy.RetryAction action = null; + try { + action = retryPolicy.shouldRetry(exception, retryCount, 0, true); + } catch (Exception e) { + setExceptionAndThrow(new IOException(e)); + } + if (action != null && + action.action == RetryPolicy.RetryAction.RetryDecision.FAIL) { + String msg = ""; + if (action.reason != null) { + msg = "Retry request failed. " + action.reason; + } + setExceptionAndThrow(new IOException(msg, exception)); + } + + // Throw the exception if the thread is interrupted + if (Thread.currentThread().isInterrupted()) { + setExceptionAndThrow(exception); + } + Objects.requireNonNull(action); + Preconditions.checkArgument( + action.action == RetryPolicy.RetryAction.RetryDecision.RETRY); + if (action.delayMillis > 0) { + try { + Thread.sleep(action.delayMillis); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + IOException ioe = (IOException) new InterruptedIOException( + "Interrupted: action=" + action + ", retry policy=" + retryPolicy) + .initCause(e); + setExceptionAndThrow(ioe); + } + } + retryCount++; + } +} diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockDataStreamOutput.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockDataStreamOutput.java new file mode 100644 index 000000000000..d19f2aea1300 --- /dev/null +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockDataStreamOutput.java @@ -0,0 +1,733 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.storage; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.BlockData; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.KeyValue; +import org.apache.hadoop.hdds.ratis.ContainerCommandRequestMessage; +import org.apache.hadoop.hdds.ratis.RatisHelper; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.XceiverClientFactory; +import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientMetrics; +import org.apache.hadoop.hdds.scm.XceiverClientRatis; +import org.apache.hadoop.hdds.scm.XceiverClientReply; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.ozone.common.Checksum; +import org.apache.hadoop.ozone.common.ChecksumData; +import org.apache.hadoop.ozone.common.OzoneChecksumException; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.ratis.client.api.DataStreamOutput; +import org.apache.ratis.io.StandardWriteOption; +import org.apache.ratis.protocol.DataStreamReply; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.putBlockAsync; + +/** + * An {@link ByteBufferStreamOutput} used by the REST service in combination + * with the SCMClient to write the value of a key to a sequence + * of container chunks. Writes are buffered locally and periodically written to + * the container as a new chunk. In order to preserve the semantics that + * replacement of a pre-existing key is atomic, each instance of the stream has + * an internal unique identifier. This unique identifier and a monotonically + * increasing chunk index form a composite key that is used as the chunk name. + * After all data is written, a putKey call creates or updates the corresponding + * container key, and this call includes the full list of chunks that make up + * the key data. The list of chunks is updated all at once. Therefore, a + * concurrent reader never can see an intermediate state in which different + * chunks of data from different versions of the key data are interleaved. + * This class encapsulates all state management for buffering and writing + * through to the container. + */ +public class BlockDataStreamOutput implements ByteBufferStreamOutput { + public static final Logger LOG = + LoggerFactory.getLogger(BlockDataStreamOutput.class); + + public static final int PUT_BLOCK_REQUEST_LENGTH_MAX = 1 << 20; // 1MB + + public static final String EXCEPTION_MSG = + "Unexpected Storage Container Exception: "; + private static final CompletableFuture[] EMPTY_FUTURE_ARRAY = {}; + + private AtomicReference blockID; + + private final BlockData.Builder containerBlockData; + private XceiverClientFactory xceiverClientFactory; + private XceiverClientRatis xceiverClient; + private OzoneClientConfig config; + + private int chunkIndex; + private final AtomicLong chunkOffset = new AtomicLong(); + + // Similar to 'BufferPool' but this list maintains only references + // to the ByteBuffers. + private List bufferList; + + // The IOException will be set by response handling thread in case there is an + // exception received in the response. If the exception is set, the next + // request will fail upfront. + private final AtomicReference ioException; + private final ExecutorService responseExecutor; + + // the effective length of data flushed so far + private long totalDataFlushedLength; + + // effective data write attempted so far for the block + private long writtenDataLength; + + // This object will maintain the commitIndexes and byteBufferList in order + // Also, corresponding to the logIndex, the corresponding list of buffers will + // be released from the buffer pool. + private final StreamCommitWatcher commitWatcher; + + private Queue> + putBlockFutures = new LinkedList<>(); + + private final List failedServers; + private final Checksum checksum; + + //number of buffers used before doing a flush/putBlock. + private int flushPeriod; + private final Token token; + private final DataStreamOutput out; + private CompletableFuture dataStreamCloseReply; + private List> futures = new ArrayList<>(); + private final long syncSize = 0; // TODO: disk sync is disabled for now + private long syncPosition = 0; + private StreamBuffer currentBuffer; + private XceiverClientMetrics metrics; + // buffers for which putBlock is yet to be executed + private List buffersForPutBlock; + private boolean isDatastreamPipelineMode; + /** + * Creates a new BlockDataStreamOutput. + * + * @param blockID block ID + * @param xceiverClientManager client manager that controls client + * @param pipeline pipeline where block will be written + */ + public BlockDataStreamOutput( + BlockID blockID, + XceiverClientFactory xceiverClientManager, + Pipeline pipeline, + OzoneClientConfig config, + Token token, + List bufferList + ) throws IOException { + this.xceiverClientFactory = xceiverClientManager; + this.config = config; + this.isDatastreamPipelineMode = config.isDatastreamPipelineMode(); + this.blockID = new AtomicReference<>(blockID); + KeyValue keyValue = + KeyValue.newBuilder().setKey("TYPE").setValue("KEY").build(); + this.containerBlockData = + BlockData.newBuilder().setBlockID(blockID.getDatanodeBlockIDProtobuf()) + .addMetadata(keyValue); + this.xceiverClient = + (XceiverClientRatis)xceiverClientManager.acquireClient(pipeline); + // Alternatively, stream setup can be delayed till the first chunk write. + this.out = setupStream(pipeline); + this.token = token; + this.bufferList = bufferList; + flushPeriod = (int) (config.getStreamBufferFlushSize() / config + .getStreamBufferSize()); + + Preconditions + .checkArgument( + (long) flushPeriod * config.getStreamBufferSize() == config + .getStreamBufferFlushSize()); + + // A single thread executor handle the responses of async requests + responseExecutor = Executors.newSingleThreadExecutor(); + commitWatcher = new StreamCommitWatcher(xceiverClient, bufferList); + totalDataFlushedLength = 0; + writtenDataLength = 0; + failedServers = new ArrayList<>(0); + ioException = new AtomicReference<>(null); + checksum = new Checksum(config.getChecksumType(), + config.getBytesPerChecksum()); + metrics = XceiverClientManager.getXceiverClientMetrics(); + } + + private DataStreamOutput setupStream(Pipeline pipeline) throws IOException { + // Execute a dummy WriteChunk request to get the path of the target file, + // but does NOT write any data to it. + ContainerProtos.WriteChunkRequestProto.Builder writeChunkRequest = + ContainerProtos.WriteChunkRequestProto.newBuilder() + .setBlockID(blockID.get().getDatanodeBlockIDProtobuf()); + + String id = xceiverClient.getPipeline().getFirstNode().getUuidString(); + ContainerProtos.ContainerCommandRequestProto.Builder builder = + ContainerProtos.ContainerCommandRequestProto.newBuilder() + .setCmdType(ContainerProtos.Type.StreamInit) + .setContainerID(blockID.get().getContainerID()) + .setDatanodeUuid(id).setWriteChunk(writeChunkRequest); + + ContainerCommandRequestMessage message = + ContainerCommandRequestMessage.toMessage(builder.build(), null); + + if (isDatastreamPipelineMode) { + return Preconditions.checkNotNull(xceiverClient.getDataStreamApi()) + .stream(message.getContent().asReadOnlyByteBuffer(), + RatisHelper.getRoutingTable(pipeline)); + } else { + return Preconditions.checkNotNull(xceiverClient.getDataStreamApi()) + .stream(message.getContent().asReadOnlyByteBuffer()); + } + } + + public BlockID getBlockID() { + return blockID.get(); + } + + public long getWrittenDataLength() { + return writtenDataLength; + } + + public List getFailedServers() { + return failedServers; + } + + @VisibleForTesting + public XceiverClientRatis getXceiverClient() { + return xceiverClient; + } + + public IOException getIoException() { + return ioException.get(); + } + + @Override + public void write(ByteBuffer b, int off, int len) throws IOException { + checkOpen(); + if (b == null) { + throw new NullPointerException(); + } + if (len == 0) { + return; + } + while (len > 0) { + allocateNewBufferIfNeeded(); + int writeLen = Math.min(len, currentBuffer.length()); + final StreamBuffer buf = new StreamBuffer(b, off, writeLen); + currentBuffer.put(buf); + writeChunkIfNeeded(); + off += writeLen; + writtenDataLength += writeLen; + len -= writeLen; + doFlushIfNeeded(); + } + } + + private void writeChunkIfNeeded() throws IOException { + if (currentBuffer.length() == 0) { + writeChunk(currentBuffer); + currentBuffer = null; + } + } + + private void writeChunk(StreamBuffer sb) throws IOException { + bufferList.add(sb); + if (buffersForPutBlock == null) { + buffersForPutBlock = new ArrayList<>(); + } + buffersForPutBlock.add(sb); + ByteBuffer dup = sb.duplicate(); + dup.position(0); + dup.limit(sb.position()); + writeChunkToContainer(dup); + } + + private void allocateNewBufferIfNeeded() { + if (currentBuffer == null) { + currentBuffer = + StreamBuffer.allocate(config.getDataStreamMinPacketSize()); + } + } + + private void doFlushIfNeeded() throws IOException { + long boundary = config.getDataStreamBufferFlushSize() / config + .getDataStreamMinPacketSize(); + // streamWindow is the maximum number of buffers that + // are allowed to exist in the bufferList. If buffers in + // the list exceed this limit , client will till it gets + // one putBlockResponse (first index) . This is similar to + // the bufferFull condition in async write path. + long streamWindow = config.getStreamWindowSize() / config + .getDataStreamMinPacketSize(); + if (!bufferList.isEmpty() && bufferList.size() % boundary == 0 && + buffersForPutBlock != null && !buffersForPutBlock.isEmpty()) { + updateFlushLength(); + executePutBlock(false, false); + } + if (bufferList.size() == streamWindow) { + try { + checkOpen(); + if (!putBlockFutures.isEmpty()) { + putBlockFutures.remove().get(); + } + } catch (ExecutionException e) { + handleExecutionException(e); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + handleInterruptedException(ex, true); + } + watchForCommit(true); + } + } + + private void updateFlushLength() { + totalDataFlushedLength = writtenDataLength; + } + + @VisibleForTesting + public long getTotalDataFlushedLength() { + return totalDataFlushedLength; + } + /** + * Will be called on the retryPath in case closedContainerException/ + * TimeoutException. + * @param len length of data to write + * @throws IOException if error occurred + */ + + public void writeOnRetry(long len) throws IOException { + if (len == 0) { + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Retrying write length {} for blockID {}", len, blockID); + } + int count = 0; + while (len > 0) { + final StreamBuffer buf = bufferList.get(count); + final long writeLen = Math.min(buf.position(), len); + if (buffersForPutBlock == null) { + buffersForPutBlock = new ArrayList<>(); + } + buffersForPutBlock.add(buf); + final ByteBuffer duplicated = buf.duplicate(); + duplicated.position(0); + duplicated.limit(buf.position()); + writeChunkToContainer(duplicated); + len -= writeLen; + count++; + writtenDataLength += writeLen; + } + + + } + + /** + * calls watchForCommit API of the Ratis Client. For Standalone client, + * it is a no op. + * @param bufferFull flag indicating whether bufferFull condition is hit or + * its called as part flush/close + * @return minimum commit index replicated to all nodes + * @throws IOException IOException in case watch gets timed out + */ + private void watchForCommit(boolean bufferFull) throws IOException { + checkOpen(); + try { + XceiverClientReply reply = bufferFull ? + commitWatcher.streamWatchOnFirstIndex() : + commitWatcher.streamWatchOnLastIndex(); + if (reply != null) { + List dnList = reply.getDatanodes(); + if (!dnList.isEmpty()) { + Pipeline pipe = xceiverClient.getPipeline(); + + LOG.warn("Failed to commit BlockId {} on {}. Failed nodes: {}", + blockID, pipe, dnList); + failedServers.addAll(dnList); + } + } + } catch (IOException ioe) { + setIoException(ioe); + throw getIoException(); + } + + } + + /** + * @param close whether putBlock is happening as part of closing the stream + * @param force true if no data was written since most recent putBlock and + * stream is being closed + */ + private void executePutBlock(boolean close, + boolean force) throws IOException { + checkOpen(); + long flushPos = totalDataFlushedLength; + final List byteBufferList; + if (!force) { + Preconditions.checkNotNull(bufferList); + byteBufferList = buffersForPutBlock; + buffersForPutBlock = null; + Preconditions.checkNotNull(byteBufferList); + } else { + byteBufferList = null; + } + waitFuturesComplete(); + final BlockData blockData = containerBlockData.build(); + if (close) { + final ContainerCommandRequestProto putBlockRequest + = ContainerProtocolCalls.getPutBlockRequest( + xceiverClient.getPipeline(), blockData, true, token); + dataStreamCloseReply = executePutBlockClose(putBlockRequest, + PUT_BLOCK_REQUEST_LENGTH_MAX, out); + dataStreamCloseReply.whenComplete((reply, e) -> { + if (e != null || reply == null || !reply.isSuccess()) { + LOG.warn("Failed executePutBlockClose, reply=" + reply, e); + try { + executePutBlock(true, false); + } catch (IOException ex) { + throw new CompletionException(ex); + } + } + }); + } + + try { + XceiverClientReply asyncReply = + putBlockAsync(xceiverClient, blockData, close, token); + final CompletableFuture flushFuture + = asyncReply.getResponse().thenApplyAsync(e -> { + try { + validateResponse(e); + } catch (IOException sce) { + throw new CompletionException(sce); + } + // if the ioException is not set, putBlock is successful + if (getIoException() == null && !force) { + BlockID responseBlockID = BlockID.getFromProtobuf( + e.getPutBlock().getCommittedBlockLength().getBlockID()); + Preconditions.checkState(blockID.get().getContainerBlockID() + .equals(responseBlockID.getContainerBlockID())); + // updates the bcsId of the block + blockID.set(responseBlockID); + if (LOG.isDebugEnabled()) { + LOG.debug("Adding index " + asyncReply.getLogIndex() + + " commitMap size " + + commitWatcher.getCommitInfoMapSize() + " flushLength " + + flushPos + " blockID " + blockID); + } + // for standalone protocol, logIndex will always be 0. + commitWatcher + .updateCommitInfoMap(asyncReply.getLogIndex(), + byteBufferList); + } + return e; + }, responseExecutor).exceptionally(e -> { + if (LOG.isDebugEnabled()) { + LOG.debug("putBlock failed for blockID {} with exception {}", + blockID, e.getLocalizedMessage()); + } + CompletionException ce = new CompletionException(e); + setIoException(ce); + throw ce; + }); + putBlockFutures.add(flushFuture); + } catch (IOException | ExecutionException e) { + throw new IOException(EXCEPTION_MSG + e.toString(), e); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + handleInterruptedException(ex, false); + } + } + + public static CompletableFuture executePutBlockClose( + ContainerCommandRequestProto putBlockRequest, int max, + DataStreamOutput out) { + final ByteBuffer putBlock = ContainerCommandRequestMessage.toMessage( + putBlockRequest, null).getContent().asReadOnlyByteBuffer(); + final ByteBuffer protoLength = getProtoLength(putBlock, max); + RatisHelper.debug(putBlock, "putBlock", LOG); + out.writeAsync(putBlock); + RatisHelper.debug(protoLength, "protoLength", LOG); + return out.writeAsync(protoLength, StandardWriteOption.CLOSE); + } + + public static ByteBuffer getProtoLength(ByteBuffer putBlock, int max) { + final int protoLength = putBlock.remaining(); + Preconditions.checkState(protoLength <= max, + "protoLength== %s > max = %s", protoLength, max); + final ByteBuffer buffer = ByteBuffer.allocate(4); + buffer.putInt(protoLength); + buffer.flip(); + LOG.debug("protoLength = {}", protoLength); + Preconditions.checkState(buffer.remaining() == 4); + return buffer.asReadOnlyBuffer(); + } + + @Override + public void flush() throws IOException { + if (xceiverClientFactory != null && xceiverClient != null + && !config.isStreamBufferFlushDelay()) { + waitFuturesComplete(); + } + } + + public void waitFuturesComplete() throws IOException { + try { + CompletableFuture.allOf(futures.toArray(EMPTY_FUTURE_ARRAY)).get(); + futures.clear(); + } catch (Exception e) { + LOG.warn("Failed to write all chunks through stream: " + e); + throw new IOException(e); + } + } + + /** + * @param close whether the flush is happening as part of closing the stream + */ + private void handleFlush(boolean close) + throws IOException, InterruptedException, ExecutionException { + checkOpen(); + // flush the last chunk data residing on the currentBuffer + if (totalDataFlushedLength < writtenDataLength) { + // This can be a partially filled chunk. Since we are flushing the buffer + // here, we just limit this buffer to the current position. So that next + // write will happen in new buffer + + if (currentBuffer != null) { + writeChunk(currentBuffer); + currentBuffer = null; + } + updateFlushLength(); + executePutBlock(close, false); + } else if (close) { + // forcing an "empty" putBlock if stream is being closed without new + // data since latest flush - we need to send the "EOF" flag + executePutBlock(true, true); + } + CompletableFuture.allOf(putBlockFutures.toArray(EMPTY_FUTURE_ARRAY)).get(); + watchForCommit(false); + // just check again if the exception is hit while waiting for the + // futures to ensure flush has indeed succeeded + + // irrespective of whether the commitIndex2flushedDataMap is empty + // or not, ensure there is no exception set + checkOpen(); + } + + @Override + public void close() throws IOException { + if (xceiverClientFactory != null && xceiverClient != null) { + try { + handleFlush(true); + dataStreamCloseReply.get(); + } catch (ExecutionException e) { + handleExecutionException(e); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + handleInterruptedException(ex, true); + } finally { + cleanup(false); + } + + } + } + + private void validateResponse( + ContainerProtos.ContainerCommandResponseProto responseProto) + throws IOException { + try { + // if the ioException is already set, it means a prev request has failed + // just throw the exception. The current operation will fail with the + // original error + IOException exception = getIoException(); + if (exception != null) { + throw exception; + } + ContainerProtocolCalls.validateContainerResponse(responseProto); + } catch (StorageContainerException sce) { + setIoException(sce); + throw sce; + } + } + + + private void setIoException(Throwable e) { + IOException ioe = getIoException(); + if (ioe == null) { + IOException exception = new IOException(EXCEPTION_MSG + e.toString(), e); + ioException.compareAndSet(null, exception); + } else { + LOG.debug("Previous request had already failed with " + ioe.toString() + + " so subsequent request also encounters" + + " Storage Container Exception ", e); + } + } + + public void cleanup(boolean invalidateClient) { + if (xceiverClientFactory != null) { + xceiverClientFactory.releaseClient(xceiverClient, invalidateClient); + } + xceiverClientFactory = null; + xceiverClient = null; + commitWatcher.cleanup(); + responseExecutor.shutdown(); + } + + /** + * Checks if the stream is open or exception has occurred. + * If not, throws an exception. + * + * @throws IOException if stream is closed + */ + private void checkOpen() throws IOException { + if (isClosed()) { + throw new IOException("BlockDataStreamOutput has been closed."); + } else if (getIoException() != null) { + throw getIoException(); + } + } + + public boolean isClosed() { + return xceiverClient == null; + } + + private boolean needSync(long position) { + if (syncSize > 0) { + // TODO: or position >= fileLength + if (position - syncPosition >= syncSize) { + syncPosition = position; + return true; + } + } + return false; + } + + /** + * Writes buffered data as a new chunk to the container and saves chunk + * information to be used later in putKey call. + * + * @param buf chunk data to write, from position to limit + * @throws IOException if there is an I/O error while performing the call + * @throws OzoneChecksumException if there is an error while computing + * checksum + */ + private void writeChunkToContainer(ByteBuffer buf) + throws IOException { + final int effectiveChunkSize = buf.remaining(); + final long offset = chunkOffset.getAndAdd(effectiveChunkSize); + ChecksumData checksumData = checksum.computeChecksum( + buf.asReadOnlyBuffer()); + ChunkInfo chunkInfo = ChunkInfo.newBuilder() + .setChunkName(blockID.get().getLocalID() + "_chunk_" + ++chunkIndex) + .setOffset(offset) + .setLen(effectiveChunkSize) + .setChecksumData(checksumData.getProtoBufMessage()) + .build(); + metrics.incrPendingContainerOpsMetrics(ContainerProtos.Type.WriteChunk); + + if (LOG.isDebugEnabled()) { + LOG.debug("Writing chunk {} length {} at offset {}", + chunkInfo.getChunkName(), effectiveChunkSize, offset); + } + + CompletableFuture future = + (needSync(offset + effectiveChunkSize) ? + out.writeAsync(buf, StandardWriteOption.SYNC) : + out.writeAsync(buf)) + .whenCompleteAsync((r, e) -> { + if (e != null || !r.isSuccess()) { + if (e == null) { + e = new IOException("result is not success"); + } + String msg = + "Failed to write chunk " + chunkInfo.getChunkName() + + " " + "into block " + blockID; + LOG.debug("{}, exception: {}", msg, e.getLocalizedMessage()); + CompletionException ce = new CompletionException(msg, e); + setIoException(ce); + throw ce; + } else if (r.isSuccess()) { + xceiverClient.updateCommitInfosMap(r.getCommitInfos()); + } + }, responseExecutor); + + futures.add(future); + containerBlockData.addChunks(chunkInfo); + } + + @VisibleForTesting + public void setXceiverClient(XceiverClientRatis xceiverClient) { + this.xceiverClient = xceiverClient; + } + + /** + * Handles InterruptedExecution. + * + * @param ex + * @param processExecutionException is optional, if passed as TRUE, then + * handle ExecutionException else skip it. + * @throws IOException + */ + private void handleInterruptedException(Exception ex, + boolean processExecutionException) + throws IOException { + LOG.error("Command execution was interrupted."); + if (processExecutionException) { + handleExecutionException(ex); + } else { + throw new IOException(EXCEPTION_MSG + ex.toString(), ex); + } + } + + /** + * Handles ExecutionException by adjusting buffers. + * @param ex + * @throws IOException + */ + private void handleExecutionException(Exception ex) throws IOException { + setIoException(ex); + throw getIoException(); + } + + public long getTotalAckDataLength() { + return commitWatcher.getTotalAckDataLength(); + } +} diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockExtendedInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockExtendedInputStream.java index 5be2b078a7e8..5f46059b66f0 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockExtendedInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockExtendedInputStream.java @@ -23,12 +23,19 @@ * Abstract class used as an interface for input streams related to Ozone * blocks. */ -public abstract class BlockExtendedInputStream extends ExtendedInputStream { +public abstract class BlockExtendedInputStream extends ExtendedInputStream + implements PartInputStream { public abstract BlockID getBlockID(); - public abstract long getRemaining(); + @Override + public long getRemaining() { + return getLength() - getPos(); + } + @Override public abstract long getLength(); + @Override + public abstract long getPos(); } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java index 4626f580f5e4..e89c7f9ee8c0 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java @@ -275,11 +275,6 @@ protected ChunkInputStream createChunkInputStream(ChunkInfo chunkInfo) { xceiverClientFactory, () -> pipeline, verifyChecksum, token); } - @Override - public synchronized long getRemaining() { - return length - getPos(); - } - @Override protected synchronized int readWithStrategy(ByteReaderStrategy strategy) throws IOException { diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java index de75e47781c0..5a478294bbd7 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java @@ -372,6 +372,10 @@ public void writeOnRetry(long len) throws IOException { * @throws IOException */ private void handleFullBuffer() throws IOException { + waitForFlushAndCommit(true); + } + + void waitForFlushAndCommit(boolean bufferFull) throws IOException { try { checkOpen(); waitOnFlushFutures(); @@ -381,7 +385,7 @@ private void handleFullBuffer() throws IOException { Thread.currentThread().interrupt(); handleInterruptedException(ex, true); } - watchForCommit(true); + watchForCommit(bufferFull); } void releaseBuffersOnException() { diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ByteBufferStreamOutput.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ByteBufferStreamOutput.java new file mode 100644 index 000000000000..0650a685b634 --- /dev/null +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ByteBufferStreamOutput.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.storage; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; + +/** +* This interface is for writing an output stream of ByteBuffers. +* An ByteBufferStreamOutput accepts nio ByteBuffer and sends them to some sink. +*/ +public interface ByteBufferStreamOutput extends Closeable { + /** + * Try to write all the bytes in ByteBuf b to DataStream. + * + * @param b the data. + * @exception IOException if an I/O error occurs. + */ + default void write(ByteBuffer b) throws IOException { + write(b, b.position(), b.remaining()); + } + + /** + * Try to write the [off:off + len) slice in ByteBuf b to DataStream. + * + * @param b the data. + * @param off the start offset in the data. + * @param len the number of bytes to write. + * @exception IOException if an I/O error occurs. + */ + void write(ByteBuffer b, int off, int len) throws IOException; + + /** + * Flushes this DataStream output and forces any buffered output bytes + * to be written out. + * + * @exception IOException if an I/O error occurs. + */ + void flush() throws IOException; +} diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/MultipartCryptoKeyInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/MultipartInputStream.java similarity index 63% rename from hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/MultipartCryptoKeyInputStream.java rename to hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/MultipartInputStream.java index c7fc21cbb242..4bc144f3bd75 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/MultipartCryptoKeyInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/MultipartInputStream.java @@ -16,38 +16,28 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.client.io; +package org.apache.hadoop.hdds.scm.storage; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import org.apache.hadoop.crypto.CryptoInputStream; -import org.apache.hadoop.fs.CanUnbuffer; import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.Seekable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.EOFException; import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; import java.util.List; /** - * {@link OzoneInputStream} for accessing MPU keys in encrypted buckets. + * A stream for accessing multipart streams. */ -public class MultipartCryptoKeyInputStream extends OzoneInputStream - implements Seekable, CanUnbuffer { +public class MultipartInputStream extends ExtendedInputStream { - private static final Logger LOG = - LoggerFactory.getLogger(MultipartCryptoKeyInputStream.class); + private final String key; + private final long length; - private static final int EOF = -1; - - private String key; - private long length = 0L; - private boolean closed = false; - - // List of OzoneCryptoInputStream, one for each part of the key - private List partStreams; + // List of PartInputStream, one for each part of the key + private final List partStreams; // partOffsets[i] stores the index of the first data byte in // partStream w.r.t the whole key data. @@ -55,18 +45,19 @@ public class MultipartCryptoKeyInputStream extends OzoneInputStream // data from indices 0 - 199, part[1] from indices 200 - 399 and so on. // Then, partOffsets[0] = 0 (the offset of the first byte of data in // part[0]), partOffsets[1] = 200 and so on. - private long[] partOffsets; + private final long[] partOffsets; + private boolean closed; // Index of the partStream corresponding to the current position of the // MultipartCryptoKeyInputStream. - private int partIndex = 0; + private int partIndex; // Tracks the partIndex corresponding to the last seeked position so that it // can be reset if a new position is seeked. - private int prevPartIndex = 0; + private int prevPartIndex; - public MultipartCryptoKeyInputStream(String keyName, - List inputStreams) { + public MultipartInputStream(String keyName, + List inputStreams) { Preconditions.checkNotNull(inputStreams); @@ -76,69 +67,59 @@ public MultipartCryptoKeyInputStream(String keyName, // Calculate and update the partOffsets this.partOffsets = new long[inputStreams.size()]; int i = 0; - for (OzoneCryptoInputStream ozoneCryptoInputStream : inputStreams) { - this.partOffsets[i++] = length; - length += ozoneCryptoInputStream.getLength(); + long streamLength = 0L; + for (PartInputStream partInputStream : inputStreams) { + this.partOffsets[i++] = streamLength; + streamLength += partInputStream.getLength(); } + this.length = streamLength; } - /** - * {@inheritDoc} - */ - @Override - public int read() throws IOException { - byte[] buf = new byte[1]; - if (read(buf, 0, 1) == EOF) { - return EOF; - } - return Byte.toUnsignedInt(buf[0]); - } - - /** - * {@inheritDoc} - */ @Override - public int read(byte[] b, int off, int len) throws IOException { + protected synchronized int readWithStrategy(ByteReaderStrategy strategy) + throws IOException { + Preconditions.checkArgument(strategy != null); checkOpen(); - if (b == null) { - throw new NullPointerException(); - } - if (off < 0 || len < 0 || len > b.length - off) { - throw new IndexOutOfBoundsException(); - } - if (len == 0) { - return 0; - } + int totalReadLen = 0; - while (len > 0) { + while (strategy.getTargetLength() > 0) { if (partStreams.size() == 0 || - (partStreams.size() - 1 <= partIndex && - partStreams.get(partIndex).getRemaining() == 0)) { + partStreams.size() - 1 <= partIndex && + partStreams.get(partIndex).getRemaining() == 0) { return totalReadLen == 0 ? EOF : totalReadLen; } // Get the current partStream and read data from it - OzoneCryptoInputStream current = partStreams.get(partIndex); - int numBytesRead = current.read(b, off, len); + PartInputStream current = partStreams.get(partIndex); + int numBytesToRead = getNumBytesToRead(strategy, current); + int numBytesRead = strategy + .readFromBlock((InputStream) current, numBytesToRead); + checkPartBytesRead(numBytesToRead, numBytesRead, current); totalReadLen += numBytesRead; - off += numBytesRead; - len -= numBytesRead; if (current.getRemaining() <= 0 && - ((partIndex + 1) < partStreams.size())) { + partIndex + 1 < partStreams.size()) { partIndex += 1; } - } return totalReadLen; } + protected int getNumBytesToRead(ByteReaderStrategy strategy, + PartInputStream current) throws IOException { + return strategy.getTargetLength(); + } + + protected void checkPartBytesRead(int numBytesToRead, int numBytesRead, + PartInputStream stream) throws IOException { + } + /** * Seeks the InputStream to the specified position. This involves 2 steps: - * 1. Updating the partIndex to the partStream corresponding to the - * seeked position. - * 2. Seeking the corresponding partStream to the adjusted position. - * + * 1. Updating the partIndex to the partStream corresponding to the + * seeked position. + * 2. Seeking the corresponding partStream to the adjusted position. + *

* For example, let’s say the part sizes are 200 bytes and part[0] stores * data from indices 0 - 199, part[1] from indices 200 - 399 and so on. * Let’s say we seek to position 240. In the first step, the partIndex @@ -147,14 +128,16 @@ public int read(byte[] b, int off, int len) throws IOException { * 240 - blockOffset[1] (= 200)). */ @Override - public void seek(long pos) throws IOException { + public synchronized void seek(long pos) throws IOException { + checkOpen(); if (pos == 0 && length == 0) { // It is possible for length and pos to be zero in which case // seek should return instead of throwing exception return; } if (pos < 0 || pos > length) { - throw new EOFException("EOF encountered at pos: " + pos); + throw new EOFException( + "EOF encountered at pos: " + pos + " for key: " + key); } // 1. Update the partIndex @@ -192,32 +175,26 @@ public void seek(long pos) throws IOException { @Override public synchronized long getPos() throws IOException { - checkOpen(); - return length == 0 ? 0 : partOffsets[partIndex] + - partStreams.get(partIndex).getPos(); + return length == 0 ? 0 : + partOffsets[partIndex] + partStreams.get(partIndex).getPos(); } @Override - public boolean seekToNewSource(long targetPos) throws IOException { - return false; - } - - @Override - public int available() throws IOException { + public synchronized int available() throws IOException { checkOpen(); long remaining = length - getPos(); return remaining <= Integer.MAX_VALUE ? (int) remaining : Integer.MAX_VALUE; } @Override - public void unbuffer() { - for (CryptoInputStream cryptoInputStream : partStreams) { - cryptoInputStream.unbuffer(); + public synchronized void unbuffer() { + for (PartInputStream stream : partStreams) { + stream.unbuffer(); } } @Override - public long skip(long n) throws IOException { + public synchronized long skip(long n) throws IOException { if (n <= 0) { return 0; } @@ -230,14 +207,15 @@ public long skip(long n) throws IOException { @Override public synchronized void close() throws IOException { closed = true; - for (OzoneCryptoInputStream partStream : partStreams) { - partStream.close(); + for (PartInputStream stream : partStreams) { + stream.close(); } } /** * Verify that the input stream is open. Non blocking; this gives * the last state of the volatile {@link #closed} field. + * * @throws IOException if the connection is closed. */ private void checkOpen() throws IOException { @@ -246,4 +224,23 @@ private void checkOpen() throws IOException { ": " + FSExceptionMessages.STREAM_IS_CLOSED + " Key: " + key); } } + + public long getLength() { + return length; + } + + @VisibleForTesting + public synchronized int getCurrentStreamIndex() { + return partIndex; + } + + @VisibleForTesting + public long getRemainingOfIndex(int index) throws IOException { + return partStreams.get(index).getRemaining(); + } + + @VisibleForTesting + public List getPartStreams() { + return partStreams; + } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/OMEchoRPCRequest.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/PartInputStream.java similarity index 54% rename from hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/OMEchoRPCRequest.java rename to hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/PartInputStream.java index 1d52bf92256a..ab16dded403b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/OMEchoRPCRequest.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/PartInputStream.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,27 +15,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.hadoop.hdds.scm.storage; + +import org.apache.hadoop.fs.CanUnbuffer; +import org.apache.hadoop.fs.Seekable; -package org.apache.hadoop.ozone.om.request; +import java.io.IOException; -import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerDoubleBufferHelper; -import org.apache.hadoop.ozone.om.response.OMClientResponse; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; /** - * Handles EchoRPCRequest. + * A stream that can be a part of a {@link MultipartInputStream}. */ -public class OMEchoRPCRequest extends OMClientRequest { - public OMEchoRPCRequest(OMRequest omRequest) { - super(omRequest); - } +public interface PartInputStream + extends CanUnbuffer, Seekable { + long getLength(); - @Override - public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, - long transactionLogIndex, - OzoneManagerDoubleBufferHelper ozoneManagerDoubleBufferHelper) { - return null; + default long getRemaining() throws IOException { + return getLength() - getPos(); } + void close() throws IOException; } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/RatisBlockOutputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/RatisBlockOutputStream.java index 7c2d87d9bd0b..35a70e7bd79e 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/RatisBlockOutputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/RatisBlockOutputStream.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.storage; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; @@ -54,7 +55,8 @@ * This class encapsulates all state management for buffering and writing * through to the container. */ -public class RatisBlockOutputStream extends BlockOutputStream { +public class RatisBlockOutputStream extends BlockOutputStream + implements Syncable { public static final Logger LOG = LoggerFactory.getLogger( RatisBlockOutputStream.class); @@ -126,4 +128,16 @@ void waitOnFlushFutures() throws InterruptedException, ExecutionException { void cleanup() { commitWatcher.cleanup(); } + + @Override + public void hflush() throws IOException { + hsync(); + } + + @Override + public void hsync() throws IOException { + if (!isClosed()) { + waitForFlushAndCommit(false); + } + } } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/StreamBuffer.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/StreamBuffer.java new file mode 100644 index 000000000000..d34e4dca9483 --- /dev/null +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/StreamBuffer.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.storage; + +import java.nio.ByteBuffer; + +/** + * Used for streaming write. + */ +public class StreamBuffer { + private final ByteBuffer buffer; + + public StreamBuffer(ByteBuffer buffer) { + this.buffer = buffer; + } + + public StreamBuffer(ByteBuffer buffer, int offset, int length) { + this((ByteBuffer) buffer.asReadOnlyBuffer().position(offset) + .limit(offset + length)); + } + + public ByteBuffer duplicate() { + return buffer.duplicate(); + } + + public int length() { + return buffer.limit() - buffer.position(); + } + + public int position() { + return buffer.position(); + } + + + public void put(StreamBuffer sb) { + buffer.put(sb.buffer); + } + + public static StreamBuffer allocate(int size) { + return new StreamBuffer(ByteBuffer.allocate(size)); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/StreamCommitWatcher.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/StreamCommitWatcher.java new file mode 100644 index 000000000000..8ca70de81684 --- /dev/null +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/StreamCommitWatcher.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.storage; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.scm.XceiverClientReply; +import org.apache.hadoop.hdds.scm.XceiverClientSpi; +import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.MemoizedSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; + +/** + * This class executes watchForCommit on ratis pipeline and releases + * buffers once data successfully gets replicated. + */ +public class StreamCommitWatcher { + + private static final Logger LOG = + LoggerFactory.getLogger(StreamCommitWatcher.class); + + private Map> commitIndexMap; + private final List bufferList; + + // total data which has been successfully flushed and acknowledged + // by all servers + private long totalAckDataLength; + private final ConcurrentMap> + replies = new ConcurrentHashMap<>(); + + private final XceiverClientSpi xceiverClient; + + public StreamCommitWatcher(XceiverClientSpi xceiverClient, + List bufferList) { + this.xceiverClient = xceiverClient; + commitIndexMap = new ConcurrentSkipListMap<>(); + this.bufferList = bufferList; + totalAckDataLength = 0; + } + + public void updateCommitInfoMap(long index, List buffers) { + commitIndexMap.computeIfAbsent(index, k -> new LinkedList<>()) + .addAll(buffers); + } + + int getCommitInfoMapSize() { + return commitIndexMap.size(); + } + + /** + * Calls watch for commit for the first index in commitIndex2flushedDataMap to + * the Ratis client. + * @return {@link XceiverClientReply} reply from raft client + * @throws IOException in case watchForCommit fails + */ + public XceiverClientReply streamWatchOnFirstIndex() throws IOException { + if (!commitIndexMap.isEmpty()) { + // wait for the first commit index in the commitIndex2flushedDataMap + // to get committed to all or majority of nodes in case timeout + // happens. + long index = + commitIndexMap.keySet().stream().mapToLong(v -> v).min() + .getAsLong(); + if (LOG.isDebugEnabled()) { + LOG.debug("waiting for first index {} to catch up", index); + } + return streamWatchForCommit(index); + } else { + return null; + } + } + + /** + * Calls watch for commit for the last index in commitIndex2flushedDataMap to + * the Ratis client. + * @return {@link XceiverClientReply} reply from raft client + * @throws IOException in case watchForCommit fails + */ + public XceiverClientReply streamWatchOnLastIndex() + throws IOException { + if (!commitIndexMap.isEmpty()) { + // wait for the commit index in the commitIndex2flushedDataMap + // to get committed to all or majority of nodes in case timeout + // happens. + long index = + commitIndexMap.keySet().stream().mapToLong(v -> v).max() + .getAsLong(); + if (LOG.isDebugEnabled()) { + LOG.debug("waiting for last flush Index {} to catch up", index); + } + return streamWatchForCommit(index); + } else { + return null; + } + } + + /** + * calls watchForCommit API of the Ratis Client. This method is for streaming + * and no longer requires releaseBuffers + * @param commitIndex log index to watch for + * @return minimum commit index replicated to all nodes + * @throws IOException IOException in case watch gets timed out + */ + public XceiverClientReply streamWatchForCommit(long commitIndex) + throws IOException { + final MemoizedSupplier> supplier + = JavaUtils.memoize(CompletableFuture::new); + final CompletableFuture f = replies.compute(commitIndex, + (key, value) -> value != null ? value : supplier.get()); + if (!supplier.isInitialized()) { + // future already exists + return f.join(); + } + + try { + XceiverClientReply reply = + xceiverClient.watchForCommit(commitIndex); + f.complete(reply); + final CompletableFuture removed + = replies.remove(commitIndex); + Preconditions.checkState(removed == f); + + adjustBuffers(reply.getLogIndex()); + return reply; + } catch (InterruptedException e) { + // Re-interrupt the thread while catching InterruptedException + Thread.currentThread().interrupt(); + throw getIOExceptionForWatchForCommit(commitIndex, e); + } catch (TimeoutException | ExecutionException e) { + throw getIOExceptionForWatchForCommit(commitIndex, e); + } + } + + void releaseBuffersOnException() { + adjustBuffers(xceiverClient.getReplicatedMinCommitIndex()); + } + + private void adjustBuffers(long commitIndex) { + List keyList = commitIndexMap.keySet().stream() + .filter(p -> p <= commitIndex).collect(Collectors.toList()); + if (!keyList.isEmpty()) { + releaseBuffers(keyList); + } + } + + private long releaseBuffers(List indexes) { + Preconditions.checkArgument(!commitIndexMap.isEmpty()); + for (long index : indexes) { + Preconditions.checkState(commitIndexMap.containsKey(index)); + final List buffers = commitIndexMap.remove(index); + final long length = + buffers.stream().mapToLong(StreamBuffer::position).sum(); + totalAckDataLength += length; + for (StreamBuffer byteBuffer : buffers) { + bufferList.remove(byteBuffer); + } + } + return totalAckDataLength; + } + + public long getTotalAckDataLength() { + return totalAckDataLength; + } + + private IOException getIOExceptionForWatchForCommit(long commitIndex, + Exception e) { + LOG.warn("watchForCommit failed for index {}", commitIndex, e); + IOException ioException = new IOException( + "Unexpected Storage Container Exception: " + e.toString(), e); + releaseBuffersOnException(); + return ioException; + } + + public void cleanup() { + if (commitIndexMap != null) { + commitIndexMap.clear(); + } + commitIndexMap = null; + } +} diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStream.java index 5734d4dc4bd3..ed57ea5a1ed3 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStream.java @@ -308,11 +308,6 @@ protected synchronized int readWithStrategy(ByteReaderStrategy strategy) return totalRead; } - @Override - public synchronized long getRemaining() { - return blockInfo.getLength() - position; - } - @Override public synchronized long getLength() { return blockInfo.getLength(); diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStreamProxy.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStreamProxy.java index 7a8b0d3e8eea..c1356230b2bd 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStreamProxy.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockInputStreamProxy.java @@ -220,7 +220,7 @@ public synchronized void unbuffer() { } @Override - public synchronized long getPos() throws IOException { + public synchronized long getPos() { return blockReader != null ? blockReader.getPos() : 0; } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockReconstructedInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockReconstructedInputStream.java index 96aaa3692047..5edb5c399860 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockReconstructedInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockReconstructedInputStream.java @@ -149,7 +149,7 @@ public synchronized void unbuffer() { } @Override - public synchronized long getPos() throws IOException { + public synchronized long getPos() { return position; } diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/ECStreamTestUtil.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/ECStreamTestUtil.java index 9994d3d4e559..0e15d834a26d 100644 --- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/ECStreamTestUtil.java +++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/ECStreamTestUtil.java @@ -337,11 +337,6 @@ public long getLength() { return length; } - @Override - public long getRemaining() { - return getLength() - getPos(); - } - @Override public int read(byte[] b, int off, int len) throws IOException { diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/TestECBlockInputStream.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/TestECBlockInputStream.java index 5197517925fc..e354fc71793b 100644 --- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/TestECBlockInputStream.java +++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/TestECBlockInputStream.java @@ -496,11 +496,6 @@ public long getLength() { return length; } - @Override - public long getRemaining() { - return getLength() - position; - } - @Override public int read(byte[] b, int off, int len) throws IOException { diff --git a/hadoop-hdds/common/pom.xml b/hadoop-hdds/common/pom.xml index 6a129f9df929..d63515b3b73d 100644 --- a/hadoop-hdds/common/pom.xml +++ b/hadoop-hdds/common/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-common - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Common Apache Ozone HDDS Common jar @@ -212,7 +212,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.reflections reflections - 0.9.11 org.mockito @@ -235,7 +234,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> com.codahale.metrics metrics-core - 3.0.2 test diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 945ca91a4088..cb258dfa74dc 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -193,6 +193,28 @@ public final class HddsConfigKeys { "hdds.x509.renew.grace.duration"; public static final String HDDS_X509_RENEW_GRACE_DURATION_DEFAULT = "P28D"; + public static final String HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX = "-next"; + public static final String HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX = "-previous"; + + public static final String HDDS_CONTAINER_REPLICATION_COMPRESSION = + "hdds.container.replication.compression"; + public static final String HDDS_X509_ROOTCA_CERTIFICATE_FILE = + "hdds.x509.rootca.certificate.file"; + + public static final String HDDS_X509_ROOTCA_CERTIFICATE_FILE_DEFAULT = + ""; + + public static final String HDDS_X509_ROOTCA_PUBLIC_KEY_FILE = + "hdds.x509.rootca.public.key.file"; + + public static final String HDDS_X509_ROOTCA_PUBLIC_KEY_FILE_DEFAULT = + ""; + + public static final String HDDS_X509_ROOTCA_PRIVATE_KEY_FILE = + "hdds.x509.rootca.private.key.file"; + + public static final String HDDS_X509_ROOTCA_PRIVATE_KEY_FILE_DEFAULT = + ""; /** * Do not instantiate. diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/BlockID.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/BlockID.java index 1a979f1eb2a1..3b39817bab51 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/BlockID.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/BlockID.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.hdds.client; +import com.fasterxml.jackson.annotation.JsonIgnore; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -84,6 +85,7 @@ public void appendTo(StringBuilder sb) { sb.append(" bcsId: ").append(blockCommitSequenceId); } + @JsonIgnore public ContainerProtos.DatanodeBlockID getDatanodeBlockIDProtobuf() { return ContainerProtos.DatanodeBlockID.newBuilder(). setContainerID(containerBlockID.getContainerID()) @@ -91,18 +93,21 @@ public ContainerProtos.DatanodeBlockID getDatanodeBlockIDProtobuf() { .setBlockCommitSequenceId(blockCommitSequenceId).build(); } + @JsonIgnore public static BlockID getFromProtobuf( ContainerProtos.DatanodeBlockID blockID) { return new BlockID(blockID.getContainerID(), blockID.getLocalID(), blockID.getBlockCommitSequenceId()); } + @JsonIgnore public HddsProtos.BlockID getProtobuf() { return HddsProtos.BlockID.newBuilder() .setContainerBlockID(containerBlockID.getProtobuf()) .setBlockCommitSequenceId(blockCommitSequenceId).build(); } + @JsonIgnore public static BlockID getFromProtobuf(HddsProtos.BlockID blockID) { return new BlockID( ContainerBlockID.getFromProtobuf(blockID.getContainerBlockID()), diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ContainerBlockID.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ContainerBlockID.java index 94a1c87d4154..57980363850f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ContainerBlockID.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ContainerBlockID.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.hdds.client; +import com.fasterxml.jackson.annotation.JsonIgnore; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import java.util.Objects; @@ -52,11 +53,13 @@ public void appendTo(StringBuilder sb) { .append(" locID: ").append(localID); } + @JsonIgnore public HddsProtos.ContainerBlockID getProtobuf() { return HddsProtos.ContainerBlockID.newBuilder(). setContainerID(containerID).setLocalID(localID).build(); } + @JsonIgnore public static ContainerBlockID getFromProtobuf( HddsProtos.ContainerBlockID containerBlockID) { return new ContainerBlockID(containerBlockID.getContainerID(), diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java index 610419527a4b..7542409679b0 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java @@ -76,6 +76,17 @@ static ReplicationConfig getDefault(ConfigurationSource config) { return parse(null, replication, config); } + static ReplicationConfig resolve(ReplicationConfig replicationConfig, + ReplicationConfig bucketReplicationConfig, ConfigurationSource conf) { + if (replicationConfig == null) { + replicationConfig = bucketReplicationConfig; + } + if (replicationConfig == null) { + replicationConfig = getDefault(conf); + } + return replicationConfig; + } + /** * Helper method to serialize from proto. *

diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/OzoneConfiguration.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/OzoneConfiguration.java index f35073c4728b..91415c34ecce 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/OzoneConfiguration.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/OzoneConfiguration.java @@ -46,6 +46,7 @@ import org.apache.ratis.server.RaftServerConfigKeys; import static org.apache.hadoop.hdds.ratis.RatisHelper.HDDS_DATANODE_RATIS_PREFIX_KEY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CONTAINER_COPY_WORKDIR; /** * Configuration for ozone. @@ -308,7 +309,9 @@ private static void addDeprecatedKeys() { new DeprecationDelta("dfs.datanode.keytab.file", DFSConfigKeysLegacy.DFS_DATANODE_KERBEROS_KEYTAB_FILE_KEY), new DeprecationDelta("ozone.scm.chunk.layout", - ScmConfigKeys.OZONE_SCM_CONTAINER_LAYOUT_KEY) + ScmConfigKeys.OZONE_SCM_CONTAINER_LAYOUT_KEY), + new DeprecationDelta("hdds.datanode.replication.work.dir", + OZONE_CONTAINER_COPY_WORKDIR) }); } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java index 32358ef40a44..fc3f8ae6fb82 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java @@ -284,8 +284,10 @@ public synchronized Port getPort(Port.Name name) { return port; } } - // if no separate admin/server port, return single Ratis one for compat - if (name == Name.RATIS_ADMIN || name == Name.RATIS_SERVER) { + // if no separate admin/server/datastream port, return single Ratis one for + // compat + if (name == Name.RATIS_ADMIN || name == Name.RATIS_SERVER || + name == Name.RATIS_DATASTREAM) { return getPort(Name.RATIS); } return null; @@ -492,6 +494,10 @@ public void setCurrentVersion(int currentVersion) { @Override public String toString() { + return uuidString + "(" + hostName + "/" + ipAddress + ")"; + } + + public String toDebugString() { return uuid.toString() + "{" + "ip: " + ipAddress + @@ -795,7 +801,8 @@ public static final class Port { * Ports that are supported in DataNode. */ public enum Name { - STANDALONE, RATIS, REST, REPLICATION, RATIS_ADMIN, RATIS_SERVER; + STANDALONE, RATIS, REST, REPLICATION, RATIS_ADMIN, RATIS_SERVER, + RATIS_DATASTREAM; public static final Set ALL_PORTS = ImmutableSet.copyOf( Name.values()); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java index be6076a9183b..5b7ecb0c6b97 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.ratis; import java.io.IOException; +import java.nio.ByteBuffer; import java.security.cert.X509Certificate; import java.util.ArrayList; import java.util.Collection; @@ -46,17 +47,21 @@ import org.apache.ratis.client.RaftClientConfigKeys; import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.datastream.SupportedDataStreamType; import org.apache.ratis.grpc.GrpcConfigKeys; import org.apache.ratis.grpc.GrpcTlsConfig; +import org.apache.ratis.netty.NettyConfigKeys; import org.apache.ratis.proto.RaftProtos; import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.RoutingTable; import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.rpc.RpcType; import org.apache.ratis.rpc.SupportedRpcType; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -134,7 +139,9 @@ private static RaftPeer.Builder raftPeerBuilderFor(DatanodeDetails dn) { .setId(toRaftPeerId(dn)) .setAddress(toRaftPeerAddress(dn, Port.Name.RATIS_SERVER)) .setAdminAddress(toRaftPeerAddress(dn, Port.Name.RATIS_ADMIN)) - .setClientAddress(toRaftPeerAddress(dn, Port.Name.RATIS)); + .setClientAddress(toRaftPeerAddress(dn, Port.Name.RATIS)) + .setDataStreamAddress( + toRaftPeerAddress(dn, Port.Name.RATIS_DATASTREAM)); } private static List toRaftPeers(Pipeline pipeline) { @@ -188,6 +195,7 @@ public static RaftClient newRaftClient(RpcType rpcType, Pipeline pipeline, ConfigurationSource ozoneConfiguration) throws IOException { return newRaftClient(rpcType, toRaftPeerId(pipeline.getLeaderNode()), + toRaftPeer(pipeline.getFirstNode()), newRaftGroup(RaftGroupId.valueOf(pipeline.getId().getId()), pipeline.getNodes()), retryPolicy, tlsConfig, ozoneConfiguration); } @@ -207,7 +215,7 @@ public static BiFunction newRaftClient( public static RaftClient newRaftClient(RpcType rpcType, RaftPeer leader, RetryPolicy retryPolicy, GrpcTlsConfig tlsConfig, ConfigurationSource configuration) { - return newRaftClient(rpcType, leader.getId(), + return newRaftClient(rpcType, leader.getId(), leader, newRaftGroup(Collections.singletonList(leader)), retryPolicy, tlsConfig, configuration); } @@ -215,14 +223,14 @@ public static RaftClient newRaftClient(RpcType rpcType, RaftPeer leader, public static RaftClient newRaftClient(RpcType rpcType, RaftPeer leader, RetryPolicy retryPolicy, ConfigurationSource ozoneConfiguration) { - return newRaftClient(rpcType, leader.getId(), + return newRaftClient(rpcType, leader.getId(), leader, newRaftGroup(Collections.singletonList(leader)), retryPolicy, null, ozoneConfiguration); } @SuppressWarnings("checkstyle:ParameterNumber") private static RaftClient newRaftClient(RpcType rpcType, RaftPeerId leader, - RaftGroup group, RetryPolicy retryPolicy, + RaftPeer primary, RaftGroup group, RetryPolicy retryPolicy, GrpcTlsConfig tlsConfig, ConfigurationSource ozoneConfiguration) { if (LOG.isTraceEnabled()) { LOG.trace("newRaftClient: {}, leader={}, group={}", @@ -236,6 +244,7 @@ private static RaftClient newRaftClient(RpcType rpcType, RaftPeerId leader, return RaftClient.newBuilder() .setRaftGroup(group) .setLeaderId(leader) + .setPrimaryDataStreamServer(primary) .setProperties(properties) .setParameters(setClientTlsConf(rpcType, tlsConfig)) .setRetryPolicy(retryPolicy) @@ -265,6 +274,7 @@ private static void setClientTlsConf(Parameters parameters, GrpcTlsConfig tlsConfig) { if (tlsConfig != null) { GrpcConfigKeys.Client.setTlsConf(parameters, tlsConfig); + NettyConfigKeys.DataStream.Client.setTlsConf(parameters, tlsConfig); } } @@ -275,6 +285,8 @@ public static Parameters setServerTlsConf( GrpcConfigKeys.Server.setTlsConf(parameters, serverConf); GrpcConfigKeys.TLS.setConf(parameters, serverConf); setAdminTlsConf(parameters, serverConf); + + NettyConfigKeys.DataStream.Server.setTlsConf(parameters, serverConf); } setClientTlsConf(parameters, clientConf); return parameters; @@ -293,6 +305,8 @@ public static RaftProperties newRaftProperties(RpcType rpcType) { public static RaftProperties setRpcType(RaftProperties properties, RpcType rpcType) { RaftConfigKeys.Rpc.setType(properties, rpcType); + RaftConfigKeys.DataStream.setType(properties, + SupportedDataStreamType.NETTY); return properties; } @@ -310,7 +324,8 @@ public static void createRaftClientProperties(ConfigurationSource ozoneConf, Map ratisClientConf = getDatanodeRatisPrefixProps(ozoneConf); ratisClientConf.forEach((key, val) -> { - if (isClientConfig(key) || isGrpcClientConfig(key)) { + if (isClientConfig(key) || isGrpcClientConfig(key) + || isNettyStreamConfig(key)) { raftProperties.set(key, val); } }); @@ -326,6 +341,15 @@ private static boolean isGrpcClientConfig(String key) { !key.startsWith(GrpcConfigKeys.Admin.PREFIX) && !key.startsWith(GrpcConfigKeys.Server.PREFIX); } + + private static boolean isNettyStreamConfig(String key) { + return key.startsWith(NettyConfigKeys.DataStream.PREFIX); + } + + private static boolean isStreamClientConfig(String key) { + return key.startsWith(RaftClientConfigKeys.DataStream.PREFIX); + } + /** * Set all server properties matching with prefix * {@link RatisHelper#HDDS_DATANODE_RATIS_PREFIX_KEY} in @@ -340,7 +364,8 @@ public static void createRaftServerProperties(ConfigurationSource ozoneConf, getDatanodeRatisPrefixProps(ozoneConf); ratisServerConf.forEach((key, val) -> { // Exclude ratis client configuration. - if (!isClientConfig(key)) { + if (isNettyStreamConfig(key) || isStreamClientConfig(key) || + !isClientConfig(key)) { raftProperties.set(key, val); } }); @@ -403,4 +428,59 @@ private static Class getClass(String name, throw new RuntimeException(e); } } + + public static RoutingTable getRoutingTable(Pipeline pipeline) { + RaftPeerId primaryId = null; + List raftPeers = new ArrayList<>(); + + for (DatanodeDetails dn : pipeline.getNodes()) { + final RaftPeerId raftPeerId = RaftPeerId.valueOf(dn.getUuidString()); + try { + if (dn == pipeline.getFirstNode()) { + primaryId = raftPeerId; + } + } catch (IOException e) { + LOG.error("Can not get FirstNode from the pipeline: {} with " + + "exception: {}", pipeline.toString(), e.getLocalizedMessage()); + return null; + } + raftPeers.add(raftPeerId); + } + + RoutingTable.Builder builder = RoutingTable.newBuilder(); + RaftPeerId previousId = primaryId; + for (RaftPeerId peerId : raftPeers) { + if (peerId.equals(primaryId)) { + continue; + } + builder.addSuccessor(previousId, peerId); + previousId = peerId; + } + + return builder.build(); + } + + public static void debug(ByteBuffer buffer, String name, Logger log) { + if (!log.isDebugEnabled()) { + return; + } + buffer = buffer.duplicate(); + final StringBuilder builder = new StringBuilder(); + for (int i = 1; buffer.remaining() > 0; i++) { + builder.append(buffer.get()).append(i % 20 == 0 ? "\n " : ", "); + } + log.debug("{}: {}\n {}", name, buffer, builder); + } + + public static void debug(ByteBuf buf, String name, Logger log) { + if (!log.isDebugEnabled()) { + return; + } + buf = buf.duplicate(); + final StringBuilder builder = new StringBuilder(); + for (int i = 1; buf.readableBytes() > 0; i++) { + builder.append(buf.readByte()).append(i % 20 == 0 ? "\n " : ", "); + } + log.debug("{}: {}\n {}", name, buf, builder); + } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java index a1823b355d1c..c1f36fb6defe 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java @@ -68,4 +68,7 @@ private ReconConfigKeys() { public static final String OZONE_RECON_ADMINISTRATORS_GROUPS = "ozone.recon.administrators.groups"; + + public static final String OZONE_RECON_TASK_SAFEMODE_WAIT_THRESHOLD + = "ozone.recon.task.safemode.wait.threshold"; } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java index b240e5c3b789..248e7d715b22 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/PlacementPolicy.java @@ -22,12 +22,14 @@ import java.io.IOException; import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.Set; /** * A PlacementPolicy support choosing datanodes to build * pipelines or containers with specified constraints. */ -public interface PlacementPolicy { +public interface PlacementPolicy { default List chooseDatanodes( List excludedNodes, @@ -60,9 +62,29 @@ List chooseDatanodes(List usedNodes, * Given a list of datanode and the number of replicas required, return * a PlacementPolicyStatus object indicating if the container meets the * placement policy - ie is it on the correct number of racks, etc. - * @param dns List of datanodes holding a replica of the container + * @param dns List of replica holding a replica of the container * @param replicas The expected number of replicas */ ContainerPlacementStatus validateContainerPlacement( - List dns, int replicas); + List dns, int replicas); + + /** + * Given a set of replicas of a container which are + * neither over underreplicated nor overreplicated, + * return a set of replicas to copy to another node to fix misreplication. + * @param replicas: Map of replicas with value signifying if + * replica can be copied + */ + Set replicasToCopyToFixMisreplication( + Map replicas); + + /** + * Given a set of replicas of a container which are overreplicated, + * return a set of replicas to delete to fix overreplication. + * @param replicas: Set of existing replicas of the container + * @param expectedCountPerUniqueReplica: Replication factor of each + * unique replica + */ + Set replicasToRemoveToFixOverreplication( + Set replicas, int expectedCountPerUniqueReplica); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 1c4e09ead9e7..c324c3e8415a 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -344,6 +344,8 @@ public final class ScmConfigKeys { public static final String OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY = "ozone.scm.container.placement.impl"; + public static final String OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY = + "ozone.scm.pipeline.placement.impl"; public static final String OZONE_SCM_CONTAINER_PLACEMENT_EC_IMPL_KEY = "ozone.scm.container.placement.ec.impl"; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ExcludeList.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ExcludeList.java index 026b31304013..0ffa8d68e89f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ExcludeList.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ExcludeList.java @@ -21,8 +21,8 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; -import org.apache.hadoop.ozone.common.MonotonicClock; +import java.time.Clock; import java.time.ZoneOffset; import java.util.Collection; import java.util.HashSet; @@ -49,7 +49,7 @@ public ExcludeList() { datanodes = new ConcurrentHashMap<>(); containerIds = new HashSet<>(); pipelineIds = new HashSet<>(); - clock = new MonotonicClock(ZoneOffset.UTC); + clock = Clock.system(ZoneOffset.UTC); } public ExcludeList(long autoExpiryTime, java.time.Clock clock) { diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 34bd2748f688..f06066b2aa09 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -426,4 +426,11 @@ StatusAndMessages queryUpgradeFinalizationProgress( Token getContainerToken(ContainerID containerID) throws IOException; long getContainerCount() throws IOException; + + long getContainerCount(HddsProtos.LifeCycleState state) + throws IOException; + + List getListOfContainers( + long startContainerID, int count, HddsProtos.LifeCycleState state) + throws IOException; } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java index e024d79b9a7d..d0999268be75 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hdds.scm.container.common.helpers.BlockNotCommittedException; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier; import org.apache.hadoop.ozone.common.Checksum; import org.apache.hadoop.ozone.common.ChecksumData; @@ -233,11 +234,19 @@ public static XceiverClientReply putBlockAsync(XceiverClientSpi xceiverClient, BlockData containerBlockData, boolean eof, Token token) throws IOException, InterruptedException, ExecutionException { + final ContainerCommandRequestProto request = getPutBlockRequest( + xceiverClient.getPipeline(), containerBlockData, eof, token); + return xceiverClient.sendCommandAsync(request); + } + + public static ContainerCommandRequestProto getPutBlockRequest( + Pipeline pipeline, BlockData containerBlockData, boolean eof, + Token token) throws IOException { PutBlockRequestProto.Builder createBlockRequest = PutBlockRequestProto.newBuilder() .setBlockData(containerBlockData) .setEof(eof); - String id = xceiverClient.getPipeline().getFirstNode().getUuidString(); + final String id = pipeline.getFirstNode().getUuidString(); ContainerCommandRequestProto.Builder builder = ContainerCommandRequestProto.newBuilder().setCmdType(Type.PutBlock) .setContainerID(containerBlockData.getBlockID().getContainerID()) @@ -246,8 +255,7 @@ public static XceiverClientReply putBlockAsync(XceiverClientSpi xceiverClient, if (token != null) { builder.setEncodedToken(token.encodeToUrlString()); } - ContainerCommandRequestProto request = builder.build(); - return xceiverClient.sendCommandAsync(request); + return builder.build(); } /** diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/SecurityConfig.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/SecurityConfig.java index 94401e5e3223..1fe22a45c9a9 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/SecurityConfig.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/SecurityConfig.java @@ -30,6 +30,7 @@ import org.apache.hadoop.ozone.OzoneConfigKeys; import com.google.common.base.Preconditions; + import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_TOKEN_ENABLED; @@ -37,6 +38,12 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_KEY_ALGORITHM; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_KEY_LEN; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_SECURITY_PROVIDER; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_ROOTCA_CERTIFICATE_FILE; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_ROOTCA_CERTIFICATE_FILE_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_ROOTCA_PRIVATE_KEY_FILE; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_ROOTCA_PRIVATE_KEY_FILE_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_ROOTCA_PUBLIC_KEY_FILE; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_ROOTCA_PUBLIC_KEY_FILE_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_PROVIDER; @@ -74,6 +81,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECURITY_SSL_TRUSTSTORE_RELOAD_INTERVAL_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; + import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslProvider; import org.bouncycastle.jce.provider.BouncyCastleProvider; import org.slf4j.Logger; @@ -111,6 +119,9 @@ public class SecurityConfig { private boolean grpcTlsUseTestCert; private final long keystoreReloadInterval; private final long truststoreReloadInterval; + private final String externalRootCaPublicKeyPath; + private final String externalRootCaPrivateKeyPath; + private final String externalRootCaCert; /** * Constructs a SecurityConfig. @@ -175,15 +186,20 @@ public SecurityConfig(ConfigurationSource configuration) { HDDS_X509_RENEW_GRACE_DURATION_DEFAULT); renewalGracePeriod = Duration.parse(renewalGraceDurationString); - if (maxCertDuration.compareTo(defaultCertDuration) < 0) { - LOG.error("Certificate duration {} should not be greater than Maximum " + - "Certificate duration {}", maxCertDuration, defaultCertDuration); - throw new IllegalArgumentException("Certificate duration should not be " + - "greater than maximum Certificate duration"); - } + validateCertificateValidityConfig(); + + this.externalRootCaCert = this.configuration.get( + HDDS_X509_ROOTCA_CERTIFICATE_FILE, + HDDS_X509_ROOTCA_CERTIFICATE_FILE_DEFAULT); + this.externalRootCaPublicKeyPath = this.configuration.get( + HDDS_X509_ROOTCA_PUBLIC_KEY_FILE, + HDDS_X509_ROOTCA_PUBLIC_KEY_FILE_DEFAULT); + this.externalRootCaPrivateKeyPath = this.configuration.get( + HDDS_X509_ROOTCA_PRIVATE_KEY_FILE, + HDDS_X509_ROOTCA_PRIVATE_KEY_FILE_DEFAULT); this.crlName = this.configuration.get(HDDS_X509_CRL_NAME, - HDDS_X509_CRL_NAME_DEFAULT); + HDDS_X509_CRL_NAME_DEFAULT); // First Startup -- if the provider is null, check for the provider. if (SecurityConfig.provider == null) { @@ -207,6 +223,44 @@ public SecurityConfig(ConfigurationSource configuration) { TimeUnit.MILLISECONDS); } + /** + * Check for certificate validity configuration. + */ + private void validateCertificateValidityConfig() { + if (maxCertDuration.isNegative() || maxCertDuration.isZero()) { + String msg = "Property " + HDDS_X509_MAX_DURATION + + " should not be zero or negative"; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + if (defaultCertDuration.isNegative() || defaultCertDuration.isZero()) { + String msg = "Property " + HDDS_X509_DEFAULT_DURATION + + " should not be zero or negative"; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + if (renewalGracePeriod.isNegative() || renewalGracePeriod.isZero()) { + String msg = "Property " + HDDS_X509_RENEW_GRACE_DURATION + + " should not be zero or negative"; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + + if (maxCertDuration.compareTo(defaultCertDuration) < 0) { + String msg = "Property " + HDDS_X509_DEFAULT_DURATION + + " should not be greater than Property " + HDDS_X509_MAX_DURATION; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + if (defaultCertDuration.compareTo(renewalGracePeriod) < 0) { + String msg = "Property " + HDDS_X509_RENEW_GRACE_DURATION + + " should not be greater than Property " + + HDDS_X509_DEFAULT_DURATION; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + } + /** * Returns the CRL Name. * @@ -399,6 +453,18 @@ public SslProvider getGrpcSslProvider() { HDDS_GRPC_TLS_PROVIDER_DEFAULT)); } + public String getExternalRootCaPrivateKeyPath() { + return externalRootCaPrivateKeyPath; + } + + public String getExternalRootCaPublicKeyPath() { + return externalRootCaPublicKeyPath; + } + + public String getExternalRootCaCert() { + return externalRootCaCert; + } + /** * Return true if using test certificates with authority as localhost. This * should be used only for unit test where certificates are generated by diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java index 03e4c53da826..6e590df04898 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java @@ -79,6 +79,11 @@ public CertificateCodec(SecurityConfig config, String component) { this.location = securityConfig.getCertificateLocation(component); } + public CertificateCodec(SecurityConfig config, Path certPath) { + this.securityConfig = config; + this.location = certPath; + } + /** * Returns a X509 Certificate from the Certificate Holder. * diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 2a1c84aedbe9..70c22eebfcd5 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -57,6 +57,12 @@ public final class OzoneConfigKeys { public static final boolean DFS_CONTAINER_IPC_RANDOM_PORT_DEFAULT = false; + public static final String DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT = + "dfs.container.ratis.datastream.random.port"; + public static final boolean + DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT_DEFAULT = + false; + public static final String DFS_CONTAINER_CHUNK_WRITE_SYNC_KEY = "dfs.container.chunk.write.sync"; public static final boolean DFS_CONTAINER_CHUNK_WRITE_SYNC_DEFAULT = false; @@ -79,6 +85,26 @@ public final class OzoneConfigKeys { "dfs.container.ratis.server.port"; public static final int DFS_CONTAINER_RATIS_SERVER_PORT_DEFAULT = 9856; + /** + * Ratis Port where containers listen to datastream requests. + */ + public static final String DFS_CONTAINER_RATIS_DATASTREAM_ENABLED + = "dfs.container.ratis.datastream.enabled"; + public static final boolean DFS_CONTAINER_RATIS_DATASTREAM_ENABLED_DEFAULT + = false; + public static final String DFS_CONTAINER_RATIS_DATASTREAM_PORT + = "dfs.container.ratis.datastream.port"; + public static final int DFS_CONTAINER_RATIS_DATASTREAM_PORT_DEFAULT + = 9855; + + /** + * Flag to enable ratis streaming on filesystem writes. + */ + public static final String OZONE_FS_DATASTREAM_ENABLED + = "ozone.fs.datastream.enabled"; + public static final boolean OZONE_FS_DATASTREAM_ENABLED_DEFAULT + = false; + /** * When set to true, allocate a random free port for ozone container, so that * a mini cluster is able to launch multiple containers on a node. diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/DNAction.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/DNAction.java index 1c87f2bdebad..73aff9ac830c 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/DNAction.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/DNAction.java @@ -38,7 +38,8 @@ public enum DNAction implements AuditAction { PUT_SMALL_FILE, GET_SMALL_FILE, CLOSE_CONTAINER, - GET_COMMITTED_BLOCK_LENGTH; + GET_COMMITTED_BLOCK_LENGTH, + STREAM_INIT; @Override public String getAction() { diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java index bb4b5e3cedbd..939527a5e231 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java @@ -140,6 +140,11 @@ public ChecksumData computeChecksum(byte[] data) */ public ChecksumData computeChecksum(ByteBuffer data) throws OzoneChecksumException { + // If type is set to NONE, we do not need to compute the checksums. We also + // need to avoid unnecessary conversions. + if (checksumType == ChecksumType.NONE) { + return new ChecksumData(checksumType, bytesPerChecksum); + } if (!data.isReadOnly()) { data = data.asReadOnlyBuffer(); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerCommandRequestPBHelper.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerCommandRequestPBHelper.java index a13f164eec62..4d7f0f37c4eb 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerCommandRequestPBHelper.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerCommandRequestPBHelper.java @@ -187,6 +187,7 @@ public static DNAction getAuditAction(Type cmdType) { case GetSmallFile : return DNAction.GET_SMALL_FILE; case CloseContainer : return DNAction.CLOSE_CONTAINER; case GetCommittedBlockLength : return DNAction.GET_COMMITTED_BLOCK_LENGTH; + case StreamInit : return DNAction.STREAM_INIT; default : LOG.debug("Invalid command type - {}", cmdType); return null; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index fe11a3b72e18..0bdb5fe91aeb 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -53,6 +53,27 @@ OZONE, CONTAINER, MANAGEMENT The ipc port number of container. + + + dfs.container.ratis.datastream.enabled + false + OZONE, CONTAINER, RATIS, DATASTREAM + It specifies whether to enable data stream of container. + + + dfs.container.ratis.datastream.port + 9855 + OZONE, CONTAINER, RATIS, DATASTREAM + The datastream port number of container. + + + dfs.container.ratis.datastream.random.port + false + OZONE, CONTAINER, RATIS, DATASTREAM + Allocates a random free port for ozone container datastream. + This is used only while running unit tests. + + dfs.container.ipc.random.port false @@ -1558,9 +1579,9 @@ hdds.datanode.replication.work.dir DATANODE - Temporary which is used during the container replication - betweeen datanodes. Should have enough space to store multiple container - (in compressed format), but doesn't require fast io access such as SSD. + This configuration is deprecated. Temporary sub directory under + each hdds.datanode.dir will be used during the container replication between + datanodes to save the downloaded container(in compressed format). @@ -2087,7 +2108,8 @@ Max time for which certificate issued by SCM CA are valid. This duration is used for self-signed root cert and scm sub-ca certs issued by root ca. The formats accepted are based on the ISO-8601 - duration format PnDTnHnMn.nS + duration format PnDTnHnMn.nS + hdds.x509.signature.algorithm @@ -2095,6 +2117,49 @@ OZONE, HDDS, SECURITY X509 signature certificate. + + hdds.container.replication.compression + NO_COMPRESSION + OZONE, HDDS, DATANODE + Compression algorithm used for closed container replication. + Possible chooices include NO_COMPRESSION, GZIP, SNAPPY, LZ4, ZSTD + + + + hdds.x509.rootca.certificate.file + + Path to an external CA certificate. The file format is expected + to be pem. This certificate is used when initializing SCM to create + a root certificate authority. By default, a self-signed certificate is + generated instead. Note that this certificate is only used for Ozone's + internal communication, and it does not affect the certificates used for + HTTPS protocol at WebUIs as they can be configured separately. + + + + hdds.x509.rootca.private.key.file + + Path to an external private key. The file format is expected + to be pem. This private key is later used when initializing SCM to sign + certificates as the root certificate authority. When not specified a + private and public key is generated instead. + These keys are only used for Ozone's internal communication, and it does + not affect the HTTPS protocol at WebUIs as they can be configured + separately. + + + + hdds.x509.rootca.public.key.file + + Path to an external public key. The file format is expected + to be pem. This public key is later used when initializing SCM to sign + certificates as the root certificate authority. + When only the private key is specified the public key is read from the + external certificate. Note that this is only used for Ozone's internal + communication, and it does not affect the HTTPS protocol at WebUIs as + they can be configured separately. + + ozone.scm.security.handler.count.key 2 @@ -3401,6 +3466,16 @@ + + ozone.om.enable.ofs.shared.tmp.dir + false + OZONE, OM + + Enable shared ofs tmp directory ofs://tmp. Allows a root tmp + directory with sticky-bit behaviour. + + + ozone.fs.listing.page.size 1024 @@ -3433,4 +3508,29 @@ + + ozone.fs.datastream.enabled + false + OZONE, DATANODE + + To enable/disable filesystem write via ratis streaming. + + + + + ozone.recon.scm.snapshot.task.initial.delay + 1m + OZONE, MANAGEMENT, RECON + + Initial delay in MINUTES by Recon to request SCM DB Snapshot. + + + + ozone.recon.scm.snapshot.task.interval.delay + 24h + OZONE, MANAGEMENT, RECON + + Interval in MINUTES by Recon to request SCM DB Snapshot. + + diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/audit/AuditLogTestUtils.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/audit/AuditLogTestUtils.java new file mode 100644 index 000000000000..d619c91f0de0 --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/audit/AuditLogTestUtils.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.audit; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * Utility class to read audit logs. + */ +public final class AuditLogTestUtils { + private static final String AUDITLOG_FILENAME = "audit.log"; + + private AuditLogTestUtils() { + } + + /** + * Enables audit logging for the mini ozone cluster. Must be called in static + * block of the test class before starting the cluster. + */ + public static void enableAuditLog() { + System.setProperty("log4j.configurationFile", "auditlog.properties"); + } + + /** + * Searches for the given action in the audit log file. + */ + public static void verifyAuditLog(AuditAction action, + AuditEventStatus eventStatus) { + Path file = Paths.get(AUDITLOG_FILENAME); + try (BufferedReader br = Files.newBufferedReader(file, + StandardCharsets.UTF_8)) { + String line; + while ((line = br.readLine()) != null) { + if (line.contains(action.getAction()) && + line.contains(eventStatus.getStatus())) { + return; + } + } + } catch (Exception e) { + throw new AssertionError(e); + } finally { + truncateAuditLogFile(); + } + throw new AssertionError("Audit log file doesn't contain " + + "the message with params event=" + action.getAction() + + " result=" + eventStatus.getStatus()); + } + + private static void truncateAuditLogFile() { + File auditLogFile = new File(AUDITLOG_FILENAME); + try { + new FileOutputStream(auditLogFile).getChannel().truncate(0).close(); + } catch (IOException e) { + System.out.println("Failed to truncate file: " + AUDITLOG_FILENAME); + } + } +} diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java index 09fff2371eac..3e5c237edc67 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java @@ -470,19 +470,29 @@ public static Builder newGetCommittedBlockLengthBuilder(Pipeline pipeline, * Returns a close container request. * @param pipeline - pipeline * @param containerID - ID of the container. + * @param token - container token * @return ContainerCommandRequestProto. */ public static ContainerCommandRequestProto getCloseContainer( - Pipeline pipeline, long containerID) throws IOException { - return ContainerCommandRequestProto.newBuilder() + Pipeline pipeline, long containerID, Token token) throws IOException { + Builder builder = ContainerCommandRequestProto.newBuilder() .setCmdType(ContainerProtos.Type.CloseContainer) .setContainerID(containerID) .setCloseContainer( ContainerProtos.CloseContainerRequestProto.getDefaultInstance()) - .setDatanodeUuid(pipeline.getFirstNode().getUuidString()) - .build(); + .setDatanodeUuid(pipeline.getFirstNode().getUuidString()); + + if (token != null) { + builder.setEncodedToken(token.encodeToUrlString()); + } + + return builder.build(); } + public static ContainerCommandRequestProto getCloseContainer( + Pipeline pipeline, long containerID) throws IOException { + return getCloseContainer(pipeline, containerID, null); + } /** * Returns a simple request without traceId. * @param pipeline - pipeline @@ -546,6 +556,18 @@ public static String getFixedLengthString(String string, int length) { return String.format("%1$" + length + "s", string); } + public static byte[] generateData(int length, boolean random) { + final byte[] data = new byte[length]; + if (random) { + ThreadLocalRandom.current().nextBytes(data); + } else { + for (int i = 0; i < length; i++) { + data[i] = (byte) i; + } + } + return data; + } + /** * Construct fake protobuf messages for various types of requests. * This is tedious, however necessary to test. Protobuf classes are final diff --git a/hadoop-hdds/config/pom.xml b/hadoop-hdds/config/pom.xml index cb106986c10f..52837149f85e 100644 --- a/hadoop-hdds/config/pom.xml +++ b/hadoop-hdds/config/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-config - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Config Tools Apache Ozone HDDS Config jar diff --git a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigTag.java b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigTag.java index 8cf584d75f61..3728a0b1f590 100644 --- a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigTag.java +++ b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigTag.java @@ -46,5 +46,6 @@ public enum ConfigTag { DELETION, HA, BALANCER, - UPGRADE + UPGRADE, + DATASTREAM } diff --git a/hadoop-hdds/container-service/pom.xml b/hadoop-hdds/container-service/pom.xml index 9bfd9433bbb2..21d4b755d92e 100644 --- a/hadoop-hdds/container-service/pom.xml +++ b/hadoop-hdds/container-service/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-container-service - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Container Service Apache Ozone HDDS Container Service jar @@ -42,6 +42,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.commons commons-compress + + com.github.luben + zstd-jni + org.apache.ozone hdds-common diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java index 45b453f4a767..62302d04ce9f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java @@ -21,8 +21,6 @@ import java.io.File; import java.io.IOException; import java.net.InetAddress; -import java.security.KeyPair; -import java.security.cert.CertificateException; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -38,13 +36,10 @@ import org.apache.hadoop.hdds.StringUtils; import org.apache.hadoop.hdds.cli.GenericCli; import org.apache.hadoop.hdds.cli.HddsVersionProvider; -import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.datanode.metadata.DatanodeCRLStore; import org.apache.hadoop.hdds.datanode.metadata.DatanodeCRLStoreImpl; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto; -import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.client.DNCertificateClient; @@ -71,16 +66,12 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import com.sun.jmx.mbeanserver.Introspector; -import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec.getX509Certificate; -import static org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_DATANODE_PLUGINS_KEY; import static org.apache.hadoop.ozone.conf.OzoneServiceConfig.DEFAULT_SHUTDOWN_HOOK_PRIORITY; import static org.apache.hadoop.ozone.common.Storage.StorageState.INITIALIZED; import static org.apache.hadoop.util.ExitUtil.terminate; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine.Command; @@ -99,6 +90,7 @@ public class HddsDatanodeService extends GenericCli implements ServicePlugin { HddsDatanodeService.class); private OzoneConfiguration conf; + private SecurityConfig secConf; private DatanodeDetails datanodeDetails; private DatanodeStateMachine datanodeStateMachine; private List plugins; @@ -110,6 +102,7 @@ public class HddsDatanodeService extends GenericCli implements ServicePlugin { private final AtomicBoolean isStopped = new AtomicBoolean(false); private final Map ratisMetricsMap = new ConcurrentHashMap<>(); + private List ratisReporterList = null; private DNMXBeanImpl serviceRuntimeInfo = new DNMXBeanImpl(HddsVersionInfo.HDDS_VERSION_INFO) { }; private ObjectName dnInfoBeanName; @@ -154,7 +147,6 @@ public static void main(String[] args) { try { OzoneNetUtils.disableJvmNetworkAddressCacheIfRequired( new OzoneConfiguration()); - Introspector.checkCompliance(DNMXBeanImpl.class); HddsDatanodeService hddsDatanodeService = createHddsDatanodeService(args, true); hddsDatanodeService.run(args); @@ -212,8 +204,8 @@ public void start(OzoneConfiguration configuration) { public void start() { serviceRuntimeInfo.setStartTime(); - RatisDropwizardExports. - registerRatisMetricReporters(ratisMetricsMap, () -> isStopped.get()); + ratisReporterList = RatisDropwizardExports + .registerRatisMetricReporters(ratisMetricsMap, () -> isStopped.get()); OzoneConfiguration.activate(); HddsServerUtil.initializeMetrics(conf, "HddsDatanode"); @@ -238,8 +230,10 @@ public void start() { if (OzoneSecurityUtil.isSecurityEnabled(conf)) { component = "dn-" + datanodeDetails.getUuidString(); - dnCertClient = new DNCertificateClient(new SecurityConfig(conf), - datanodeDetails.getCertSerialId()); + secConf = new SecurityConfig(conf); + dnCertClient = new DNCertificateClient(secConf, datanodeDetails, + datanodeDetails.getCertSerialId(), this::saveNewCertId, + this::terminateDatanode); if (SecurityUtil.getAuthenticationMethod(conf).equals( UserGroupInformation.AuthenticationMethod.KERBEROS)) { @@ -274,7 +268,7 @@ public void start() { dnCRLStore = new DatanodeCRLStoreImpl(conf); if (OzoneSecurityUtil.isSecurityEnabled(conf)) { - initializeCertificateClient(conf); + dnCertClient = initializeCertificateClient(dnCertClient); } datanodeStateMachine = new DatanodeStateMachine(datanodeDetails, conf, dnCertClient, this::terminateDatanode, dnCRLStore); @@ -334,15 +328,16 @@ private void startRatisForTest() throws IOException { * Initializes secure Datanode. * */ @VisibleForTesting - public void initializeCertificateClient(OzoneConfiguration config) - throws IOException { + public CertificateClient initializeCertificateClient( + CertificateClient certClient) throws IOException { LOG.info("Initializing secure Datanode."); - CertificateClient.InitResponse response = dnCertClient.init(); + CertificateClient.InitResponse response = certClient.init(); if (response.equals(CertificateClient.InitResponse.REINIT)) { LOG.info("Re-initialize certificate client."); - dnCertClient = new DNCertificateClient(new SecurityConfig(conf)); - response = dnCertClient.init(); + certClient = new DNCertificateClient(secConf, datanodeDetails, null, + this::saveNewCertId, this::terminateDatanode); + response = certClient.init(); } LOG.info("Init response: {}", response); switch (response) { @@ -350,7 +345,14 @@ public void initializeCertificateClient(OzoneConfiguration config) LOG.info("Initialization successful, case:{}.", response); break; case GETCERT: - getSCMSignedCert(config); + CertificateSignRequest.Builder csrBuilder = certClient.getCSRBuilder(); + String dnCertSerialId = + certClient.signAndStoreCertificate(csrBuilder.build()); + // persist cert ID to VERSION file + datanodeDetails.setCertSerialId(dnCertSerialId); + persistDatanodeDetails(datanodeDetails); + // set new certificate ID + certClient.setCertificateId(dnCertSerialId); LOG.info("Successfully stored SCM signed certificate, case:{}.", response); break; @@ -366,51 +368,8 @@ public void initializeCertificateClient(OzoneConfiguration config) response); throw new RuntimeException("DN security initialization failed."); } - } - /** - * Get SCM signed certificate and store it using certificate client. - * @param config - * */ - private void getSCMSignedCert(OzoneConfiguration config) { - try { - PKCS10CertificationRequest csr = getCSR(config); - // TODO: For SCM CA we should fetch certificate from multiple SCMs. - SCMSecurityProtocolClientSideTranslatorPB secureScmClient = - HddsServerUtil.getScmSecurityClientWithMaxRetry(config); - SCMGetCertResponseProto response = secureScmClient. - getDataNodeCertificateChain( - datanodeDetails.getProtoBufMessage(), - getEncodedString(csr)); - // Persist certificates. - if (response.hasX509CACertificate()) { - String pemEncodedCert = response.getX509Certificate(); - dnCertClient.storeCertificate(pemEncodedCert, true); - dnCertClient.storeCertificate(response.getX509CACertificate(), true, - true); - - // Store Root CA certificate. - if (response.hasX509RootCACertificate()) { - dnCertClient.storeRootCACertificate( - response.getX509RootCACertificate(), true); - } - String dnCertSerialId = getX509Certificate(pemEncodedCert). - getSerialNumber().toString(); - datanodeDetails.setCertSerialId(dnCertSerialId); - persistDatanodeDetails(datanodeDetails); - // Rebuild dnCertClient with the new CSR result so that the default - // certSerialId and the x509Certificate can be updated. - dnCertClient = new DNCertificateClient( - new SecurityConfig(config), dnCertSerialId); - - } else { - throw new RuntimeException("Unable to retrieve datanode certificate " + - "chain"); - } - } catch (IOException | CertificateException e) { - LOG.error("Error while storing SCM signed certificate.", e); - throw new RuntimeException(e); - } + return certClient; } private void registerMXBean() { @@ -428,30 +387,6 @@ private void unregisterMXBean() { } } - /** - * Creates CSR for DN. - * @param config - * */ - @VisibleForTesting - public PKCS10CertificationRequest getCSR(ConfigurationSource config) - throws IOException { - CertificateSignRequest.Builder builder = dnCertClient.getCSRBuilder(); - KeyPair keyPair = new KeyPair(dnCertClient.getPublicKey(), - dnCertClient.getPrivateKey()); - - String hostname = InetAddress.getLocalHost().getCanonicalHostName(); - String subject = UserGroupInformation.getCurrentUser() - .getShortUserName() + "@" + hostname; - - builder.setCA(false) - .setKey(keyPair) - .setConfiguration(config) - .setSubject(subject); - - LOG.info("Creating csr for DN-> subject:{}", subject); - return builder.build(); - } - /** * Returns DatanodeDetails or null in case of Error. * @@ -585,11 +520,13 @@ public void stop() { unregisterMXBean(); // stop dn crl store try { - dnCRLStore.stop(); + if (dnCRLStore != null) { + dnCRLStore.stop(); + } } catch (Exception ex) { LOG.error("Datanode CRL store stop failed", ex); } - RatisDropwizardExports.clear(ratisMetricsMap); + RatisDropwizardExports.clear(ratisMetricsMap, ratisReporterList); } } @@ -604,6 +541,14 @@ public void close() { } } } + + if (dnCertClient != null) { + try { + dnCertClient.close(); + } catch (IOException e) { + LOG.warn("Certificate client could not be closed", e); + } + } } @VisibleForTesting @@ -624,4 +569,18 @@ public void setCertificateClient(CertificateClient client) { public void printError(Throwable error) { LOG.error("Exception in HddsDatanodeService.", error); } + + public void saveNewCertId(String newCertId) { + // save new certificate Id to VERSION file + datanodeDetails.setCertSerialId(newCertId); + try { + persistDatanodeDetails(datanodeDetails); + } catch (IOException ex) { + // New cert ID cannot be persisted into VERSION file. + String msg = "Failed to persist new cert ID " + newCertId + + "to VERSION file. Terminating datanode..."; + LOG.error(msg, ex); + terminateDatanode(); + } + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java index e0480735e5b3..81a6935098af 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java @@ -33,6 +33,8 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.fs.FileAlreadyExistsException; @@ -278,4 +280,22 @@ public static long getContainerID(File containerBaseDir) { return Long.parseLong(containerBaseDir.getName()); } + public static String getContainerTarGzName(long containerId) { + return "container-" + containerId + ".tar.gz"; + } + + public static long retrieveContainerIdFromTarGzName(String tarGzName) + throws IOException { + assert tarGzName != null; + Pattern pattern = Pattern.compile("container-(\\d+).tar.gz"); + // Now create matcher object. + Matcher m = pattern.matcher(tarGzName); + + if (m.find()) { + return Long.parseLong(m.group(1)); + } else { + throw new IOException("Illegal container tar gz file " + + tarGzName); + } + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java index d3caa319fafb..55427b87b096 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java @@ -52,7 +52,7 @@ import org.slf4j.LoggerFactory; import org.yaml.snakeyaml.Yaml; import org.yaml.snakeyaml.constructor.AbstractConstruct; -import org.yaml.snakeyaml.constructor.Constructor; +import org.yaml.snakeyaml.constructor.SafeConstructor; import org.yaml.snakeyaml.introspector.BeanAccess; import org.yaml.snakeyaml.introspector.Property; import org.yaml.snakeyaml.introspector.PropertyUtils; @@ -160,7 +160,7 @@ public static ContainerData readContainer(InputStream input) KeyValueContainerData.getYamlFields()); representer.setPropertyUtils(propertyUtils); - Constructor containerDataConstructor = new ContainerDataConstructor(); + SafeConstructor containerDataConstructor = new ContainerDataConstructor(); Yaml yaml = new Yaml(containerDataConstructor, representer); yaml.setBeanAccess(BeanAccess.FIELD); @@ -200,7 +200,7 @@ public static Yaml getYamlForContainerType(ContainerType containerType, KeyValueContainerData.class, KEYVALUE_YAML_TAG); - Constructor keyValueDataConstructor = new ContainerDataConstructor(); + SafeConstructor keyValueDataConstructor = new ContainerDataConstructor(); return new Yaml(keyValueDataConstructor, representer); } @@ -255,7 +255,7 @@ protected NodeTuple representJavaBeanProperty( /** * Constructor class for KeyValueData, which will be used by Yaml. */ - private static class ContainerDataConstructor extends Constructor { + private static class ContainerDataConstructor extends SafeConstructor { ContainerDataConstructor() { //Adding our own specific constructors for tags. // When a new Container type is added, we need to add yamlConstructor diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 3e81333d34a3..0c5ae6aeebc7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -24,7 +24,6 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; @@ -63,7 +62,7 @@ public class ContainerSet { private long recoveringTimeout; public ContainerSet(long recoveringTimeout) { - this.clock = new MonotonicClock(ZoneOffset.UTC); + this.clock = Clock.system(ZoneOffset.UTC); this.recoveringTimeout = recoveringTimeout; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java index 5059a6429135..fc711b5a3717 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java @@ -59,6 +59,7 @@ import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerScanner; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Time; +import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.thirdparty.com.google.protobuf.ProtocolMessageEnum; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -200,7 +201,8 @@ private ContainerCommandResponseProto dispatchRequest( boolean isWriteStage = (cmdType == Type.WriteChunk && dispatcherContext != null && dispatcherContext.getStage() - == DispatcherContext.WriteChunkStage.WRITE_DATA); + == DispatcherContext.WriteChunkStage.WRITE_DATA) + || (cmdType == Type.StreamInit); boolean isWriteCommitStage = (cmdType == Type.WriteChunk && dispatcherContext != null && dispatcherContext.getStage() @@ -699,4 +701,21 @@ private boolean isAllowed(String action) { default: return false; } } + + @Override + public StateMachine.DataChannel getStreamDataChannel( + ContainerCommandRequestProto msg) + throws StorageContainerException { + long containerID = msg.getContainerID(); + Container container = getContainer(containerID); + if (container != null) { + Handler handler = getHandler(getContainerType(container)); + return handler.getStreamDataChannel(container, msg); + } else { + throw new StorageContainerException( + "ContainerID " + containerID + " does not exist", + ContainerProtos.Result.CONTAINER_NOT_FOUND); + } + } + } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java index a2e397d54615..d02bae0a35ad 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java @@ -25,6 +25,7 @@ .ContainerCommandResponseProto; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; +import org.apache.ratis.statemachine.StateMachine; import java.util.Map; @@ -84,4 +85,13 @@ void validateContainerCommand( * @param clusterId */ void setClusterId(String clusterId); + + /** + * When uploading using stream, get StreamDataChannel. + */ + default StateMachine.DataChannel getStreamDataChannel( + ContainerCommandRequestProto msg) throws StorageContainerException { + throw new UnsupportedOperationException( + "getStreamDataChannel not supported."); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerPacker.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerPacker.java index 8308c23866b8..a7c7f5ad20fd 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerPacker.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerPacker.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.file.Path; import org.apache.hadoop.ozone.container.common.impl.ContainerData; @@ -39,7 +40,7 @@ public interface ContainerPacker { * file but returned). */ byte[] unpackContainerData(Container container, - InputStream inputStream) + InputStream inputStream, Path tmpDir, Path destContainerDir) throws IOException; /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java index 0fd7e14699e8..62418f2bbe33 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -37,6 +37,7 @@ import org.apache.hadoop.ozone.container.common.volume.VolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; +import org.apache.ratis.statemachine.StateMachine; /** * Dispatcher sends ContainerCommandRequests to Handler. Each Container Type @@ -81,6 +82,10 @@ public static Handler getHandlerForContainerType( } } + public abstract StateMachine.DataChannel getStreamDataChannel( + Container container, ContainerCommandRequestProto msg) + throws StorageContainerException; + /** * Returns the Id of this datanode. * diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java index 4d72bb317f3e..4b8f0be1e3e4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java @@ -18,6 +18,8 @@ import java.io.Closeable; import java.io.IOException; +import java.time.Clock; +import java.time.ZoneId; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -139,6 +141,7 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, this.conf = conf; this.datanodeDetails = datanodeDetails; + Clock clock = Clock.system(ZoneId.systemDefault()); // Expected to be initialized already. layoutStorage = new DatanodeLayoutStorage(conf, datanodeDetails.getUuidString()); @@ -169,10 +172,10 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, nextHB = new AtomicLong(Time.monotonicNow()); ContainerReplicator replicator = - new DownloadAndImportReplicator(container.getContainerSet(), + new DownloadAndImportReplicator(conf, container.getContainerSet(), container.getController(), new SimpleContainerDownloader(conf, dnCertClient), - new TarContainerPacker()); + new TarContainerPacker(), container.getVolumeSet()); replicatorMetrics = new MeasuredReplicator(replicator); @@ -180,7 +183,7 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, conf.getObject(ReplicationConfig.class); supervisor = new ReplicationSupervisor(container.getContainerSet(), context, - replicatorMetrics, replicationConfig); + replicatorMetrics, replicationConfig, clock); replicationSupervisorMetrics = ReplicationSupervisorMetrics.create(supervisor); @@ -188,12 +191,12 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, ecReconstructionMetrics = ECReconstructionMetrics.create(); ECReconstructionCoordinator ecReconstructionCoordinator = - new ECReconstructionCoordinator(conf, certClient, + new ECReconstructionCoordinator(conf, certClient, context, ecReconstructionMetrics); ecReconstructionSupervisor = new ECReconstructionSupervisor(container.getContainerSet(), context, replicationConfig.getReplicationMaxStreams(), - ecReconstructionCoordinator); + ecReconstructionCoordinator, clock); // When we add new handlers just adding a new handler here should do the @@ -207,7 +210,7 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, .addHandler(new ReconstructECContainersCommandHandler(conf, ecReconstructionSupervisor)) .addHandler(new DeleteContainerCommandHandler( - dnConf.getContainerDeleteThreads())) + dnConf.getContainerDeleteThreads(), clock)) .addHandler(new ClosePipelineCommandHandler()) .addHandler(new CreatePipelineCommandHandler(conf)) .addHandler(new SetNodeOperationalStateCommandHandler(conf)) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java index eb44b3b8b630..c4d3428a8018 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java @@ -24,7 +24,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Optional; +import java.util.OptionalLong; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -142,7 +142,7 @@ public class StateContext { * * For non-HA mode, term of SCMCommand will be 0. */ - private Optional termOfLeaderSCM = Optional.empty(); + private OptionalLong termOfLeaderSCM = OptionalLong.empty(); /** * Starting with a 2 sec heartbeat frequency which will be updated to the @@ -720,10 +720,9 @@ private void initTermOfLeaderSCM() { // if commandQueue is not empty, init termOfLeaderSCM // with the largest term found in commandQueue - commandQueue.stream() + termOfLeaderSCM = commandQueue.stream() .mapToLong(SCMCommand::getTerm) - .max() - .ifPresent(term -> termOfLeaderSCM = Optional.of(term)); + .max(); } /** @@ -731,12 +730,27 @@ private void initTermOfLeaderSCM() { * Always record the latest term that has seen. */ private void updateTermOfLeaderSCM(SCMCommand command) { + updateTermOfLeaderSCM(command.getTerm()); + } + + public void updateTermOfLeaderSCM(final long newTerm) { if (!termOfLeaderSCM.isPresent()) { - LOG.error("should init termOfLeaderSCM before update it."); return; } - termOfLeaderSCM = Optional.of( - Long.max(termOfLeaderSCM.get(), command.getTerm())); + + final long currentTerm = termOfLeaderSCM.getAsLong(); + if (currentTerm < newTerm) { + setTermOfLeaderSCM(newTerm); + } + } + + @VisibleForTesting + public void setTermOfLeaderSCM(long term) { + termOfLeaderSCM = OptionalLong.of(term); + } + + public OptionalLong getTermOfLeaderSCM() { + return termOfLeaderSCM; } /** @@ -759,13 +773,14 @@ public SCMCommand getNextCommand() { } updateTermOfLeaderSCM(command); - if (command.getTerm() == termOfLeaderSCM.get()) { + final long currentTerm = termOfLeaderSCM.getAsLong(); + if (command.getTerm() == currentTerm) { return command; } LOG.warn("Detect and drop a SCMCommand {} from stale leader SCM," + " stale term {}, latest term {}.", - command, command.getTerm(), termOfLeaderSCM.get()); + command, command.getTerm(), currentTerm); } } finally { lock.unlock(); @@ -780,6 +795,7 @@ public SCMCommand getNextCommand() { public void addCommand(SCMCommand command) { lock.lock(); try { + updateTermOfLeaderSCM(command); commandQueue.add(command); } finally { lock.unlock(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java index c5912a2ffdc3..3e63af05f24f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java @@ -333,7 +333,8 @@ private void markBlocksForDeletionSchemaV3( DeletionMarker schemaV3Marker = (table, batch, tid, txn) -> { Table delTxTable = (Table) table; - delTxTable.putWithBatch(batch, containerData.deleteTxnKey(tid), txn); + delTxTable.putWithBatch(batch, containerData.getDeleteTxnKey(tid), + txn); }; markBlocksForDeletionTransaction(containerData, delTX, newDeletionBlocks, @@ -403,10 +404,10 @@ private void markBlocksForDeletionSchemaV1( try (BatchOperation batch = containerDB.getStore().getBatchHandler() .initBatchOperation()) { for (Long blkLong : delTX.getLocalIDList()) { - String blk = containerData.blockKey(blkLong); + String blk = containerData.getBlockKey(blkLong); BlockData blkInfo = blockDataTable.get(blk); if (blkInfo != null) { - String deletingKey = containerData.deletingBlockKey(blkLong); + String deletingKey = containerData.getDeletingBlockKey(blkLong); if (blockDataTable.get(deletingKey) != null || deletedBlocksTable.get(blk) != null) { if (LOG.isDebugEnabled()) { @@ -463,15 +464,15 @@ private void updateMetaData(KeyValueContainerData containerData, if (delTX.getTxID() > containerData.getDeleteTransactionId()) { // Update in DB pending delete key count and delete transaction ID. metadataTable - .putWithBatch(batchOperation, containerData.latestDeleteTxnKey(), - delTX.getTxID()); + .putWithBatch(batchOperation, + containerData.getLatestDeleteTxnKey(), delTX.getTxID()); } long pendingDeleteBlocks = containerData.getNumPendingDeletionBlocks() + newDeletionBlocks; metadataTable .putWithBatch(batchOperation, - containerData.pendingDeleteBlockCountKey(), + containerData.getPendingDeleteBlockCountKey(), pendingDeleteBlocks); // update pending deletion blocks count and delete transaction ID in diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java index 58ad2d18e4ff..767c00c04497 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java @@ -32,6 +32,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.time.Clock; +import java.util.OptionalLong; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ThreadPoolExecutor; @@ -48,15 +50,22 @@ public class DeleteContainerCommandHandler implements CommandHandler { LoggerFactory.getLogger(DeleteContainerCommandHandler.class); private final AtomicInteger invocationCount = new AtomicInteger(0); + private final AtomicInteger timeoutCount = new AtomicInteger(0); private final AtomicLong totalTime = new AtomicLong(0); private final ExecutorService executor; + private final Clock clock; - public DeleteContainerCommandHandler(int threadPoolSize) { - this.executor = Executors.newFixedThreadPool( + public DeleteContainerCommandHandler(int threadPoolSize, Clock clock) { + this(clock, Executors.newFixedThreadPool( threadPoolSize, new ThreadFactoryBuilder() - .setNameFormat("DeleteContainerThread-%d").build()); + .setNameFormat("DeleteContainerThread-%d").build())); } + protected DeleteContainerCommandHandler(Clock clock, + ExecutorService executor) { + this.executor = executor; + this.clock = clock; + } @Override public void handle(final SCMCommand command, final OzoneContainer ozoneContainer, @@ -65,18 +74,44 @@ public void handle(final SCMCommand command, final DeleteContainerCommand deleteContainerCommand = (DeleteContainerCommand) command; final ContainerController controller = ozoneContainer.getController(); - executor.execute(() -> { - final long startTime = Time.monotonicNow(); - invocationCount.incrementAndGet(); - try { - controller.deleteContainer(deleteContainerCommand.getContainerID(), - deleteContainerCommand.isForce()); - } catch (IOException e) { - LOG.error("Exception occurred while deleting the container.", e); - } finally { - totalTime.getAndAdd(Time.monotonicNow() - startTime); + executor.execute(() -> + handleInternal(command, context, deleteContainerCommand, controller)); + } + + private void handleInternal(SCMCommand command, StateContext context, + DeleteContainerCommand deleteContainerCommand, + ContainerController controller) { + final long startTime = Time.monotonicNow(); + invocationCount.incrementAndGet(); + try { + if (command.hasExpired(clock.millis())) { + LOG.info("Not processing the delete container command for " + + "container {} as the current time {}ms is after the command " + + "deadline {}ms", deleteContainerCommand.getContainerID(), + clock.millis(), command.getDeadline()); + timeoutCount.incrementAndGet(); + return; + } + + if (context != null) { + final OptionalLong currentTerm = context.getTermOfLeaderSCM(); + final long cmdTerm = command.getTerm(); + if (currentTerm.isPresent() && cmdTerm < currentTerm.getAsLong()) { + LOG.info("Ignoring delete container command for container {} since " + + "SCM leader has new term ({} < {})", + deleteContainerCommand.getContainerID(), + cmdTerm, currentTerm.getAsLong()); + return; + } } - }); + + controller.deleteContainer(deleteContainerCommand.getContainerID(), + deleteContainerCommand.isForce()); + } catch (IOException e) { + LOG.error("Exception occurred while deleting the container.", e); + } finally { + totalTime.getAndAdd(Time.monotonicNow() - startTime); + } } @Override @@ -94,6 +129,10 @@ public int getInvocationCount() { return this.invocationCount.get(); } + public int getTimeoutCount() { + return this.timeoutCount.get(); + } + @Override public long getAverageRunTime() { final int invocations = invocationCount.get(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java index 57d4d16f8ab1..c6abfc27c33a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java @@ -47,11 +47,7 @@ public void handle(SCMCommand command, OzoneContainer container, ReconstructECContainersCommand ecContainersCommand = (ReconstructECContainersCommand) command; ECReconstructionCommandInfo reconstructionCommandInfo = - new ECReconstructionCommandInfo(ecContainersCommand.getContainerID(), - ecContainersCommand.getEcReplicationConfig(), - ecContainersCommand.getMissingContainerIndexes(), - ecContainersCommand.getSources(), - ecContainersCommand.getTargetDatanodes()); + new ECReconstructionCommandInfo(ecContainersCommand); this.supervisor.addTask(reconstructionCommandInfo); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java index 44c783846ad1..df589e287d87 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java @@ -71,7 +71,8 @@ public void handle(SCMCommand command, OzoneContainer container, "Replication command is received for container %s " + "without source datanodes.", containerID); - supervisor.addTask(new ReplicationTask(containerID, sourceDatanodes)); + ReplicationTask task = new ReplicationTask(replicateCommand); + supervisor.addTask(task); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java index ccb0e8b7d7d9..3a1bd8ffb31d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java @@ -307,6 +307,9 @@ private void processResponse(SCMHeartbeatResponseProto response, Preconditions.checkState(response.getDatanodeUUID() .equalsIgnoreCase(datanodeDetails.getUuid()), "Unexpected datanode ID in the response."); + if (response.hasTerm()) { + context.updateTermOfLeaderSCM(response.getTerm()); + } // Verify the response is indeed for this datanode. for (SCMCommandProto commandResponseProto : response.getCommandsList()) { switch (commandResponseProto.getCommandType()) { @@ -426,6 +429,7 @@ private void processResponse(SCMHeartbeatResponseProto response, * Common processing for SCM commands. * - set term * - set encoded token + * - any deadline which is relevant to the command * - add to context's queue */ private void processCommonCommand( @@ -436,6 +440,9 @@ private void processCommonCommand( if (response.hasEncodedToken()) { cmd.setEncodedToken(response.getEncodedToken()); } + if (response.hasDeadlineMsSinceEpoch()) { + cmd.setDeadline(response.getDeadlineMsSinceEpoch()); + } context.addCommand(cmd); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java index c1fc95079221..528f4b8bd7e3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java @@ -140,10 +140,10 @@ public XceiverServerGrpc(DatanodeDetails datanodeDetails, new GrpcXceiverService(dispatcher), new GrpcServerInterceptor())); SecurityConfig secConf = new SecurityConfig(conf); - if (secConf.isGrpcTlsEnabled()) { + if (secConf.isSecurityEnabled() && secConf.isGrpcTlsEnabled()) { try { SslContextBuilder sslClientContextBuilder = SslContextBuilder.forServer( - caClient.getPrivateKey(), caClient.getCertificate()); + caClient.getServerKeyStoresFactory().getKeyManagers()[0]); SslContextBuilder sslContextBuilder = GrpcSslContexts.configure( sslClientContextBuilder, secConf.getGrpcSslProvider()); nettyServerBuilder.sslContext(sslContextBuilder.build()); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 02c0a8d2b152..f6f5a99927ca 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -29,6 +29,7 @@ import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -61,6 +62,7 @@ import org.apache.hadoop.ozone.common.utils.BufferUtils; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; +import org.apache.hadoop.ozone.container.keyvalue.impl.KeyValueStreamDataChannel; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; import org.apache.hadoop.util.Time; @@ -80,6 +82,7 @@ import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.server.storage.RaftStorage; +import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.StateMachineStorage; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; @@ -90,6 +93,7 @@ import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat; import org.apache.ratis.util.TaskQueue; import org.apache.ratis.util.function.CheckedSupplier; +import org.apache.ratis.util.JavaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -423,6 +427,20 @@ private ContainerCommandResponseProto runCommand( return dispatchCommand(requestProto, context); } + private CompletableFuture runCommandAsync( + ContainerCommandRequestProto requestProto, LogEntryProto entry) { + return CompletableFuture.supplyAsync(() -> { + final DispatcherContext context = new DispatcherContext.Builder() + .setTerm(entry.getTerm()) + .setLogIndex(entry.getIndex()) + .setStage(DispatcherContext.WriteChunkStage.COMMIT_DATA) + .setContainer2BCSIDMap(container2BCSIDMap) + .build(); + + return runCommand(requestProto, context); + }, executor); + } + private CompletableFuture handleWriteChunk( ContainerCommandRequestProto requestProto, long entryIndex, long term, long startTime) { @@ -510,6 +528,64 @@ private CompletableFuture handleWriteChunk( return raftFuture; } + private StateMachine.DataChannel getStreamDataChannel( + ContainerCommandRequestProto requestProto, + DispatcherContext context) throws StorageContainerException { + if (LOG.isDebugEnabled()) { + LOG.debug("{}: getStreamDataChannel {} containerID={} pipelineID={} " + + "traceID={}", gid, requestProto.getCmdType(), + requestProto.getContainerID(), requestProto.getPipelineID(), + requestProto.getTraceID()); + } + runCommand(requestProto, context); // stream init + return dispatcher.getStreamDataChannel(requestProto); + } + + @Override + public CompletableFuture stream(RaftClientRequest request) { + return CompletableFuture.supplyAsync(() -> { + try { + ContainerCommandRequestProto requestProto = + message2ContainerCommandRequestProto(request.getMessage()); + DispatcherContext context = + new DispatcherContext.Builder() + .setStage(DispatcherContext.WriteChunkStage.WRITE_DATA) + .setContainer2BCSIDMap(container2BCSIDMap) + .build(); + DataChannel channel = getStreamDataChannel(requestProto, context); + final ExecutorService chunkExecutor = requestProto.hasWriteChunk() ? + getChunkExecutor(requestProto.getWriteChunk()) : null; + return new LocalStream(channel, chunkExecutor); + } catch (IOException e) { + throw new CompletionException("Failed to create data stream", e); + } + }, executor); + } + + @Override + public CompletableFuture link(DataStream stream, LogEntryProto entry) { + if (stream == null) { + return JavaUtils.completeExceptionally(new IllegalStateException( + "DataStream is null")); + } + final DataChannel dataChannel = stream.getDataChannel(); + if (dataChannel.isOpen()) { + return JavaUtils.completeExceptionally(new IllegalStateException( + "DataStream: " + stream + " is not closed properly")); + } + + final ContainerCommandRequestProto request; + if (dataChannel instanceof KeyValueStreamDataChannel) { + request = ((KeyValueStreamDataChannel) dataChannel).getPutBlockRequest(); + } else { + return JavaUtils.completeExceptionally(new IllegalStateException( + "Unexpected DataChannel " + dataChannel.getClass())); + } + return runCommandAsync(request, entry).whenComplete( + (res, e) -> LOG.debug("link {}, entry: {}, request: {}", + res.getResult(), entry, request)); + } + private ExecutorService getChunkExecutor(WriteChunkRequestProto req) { int i = (int)(req.getBlockID().getLocalID() % chunkExecutors.size()); return chunkExecutors.get(i); @@ -803,7 +879,8 @@ public CompletableFuture applyTransaction(TransactionContext trx) { builder.setStage(DispatcherContext.WriteChunkStage.COMMIT_DATA); } if (cmdType == Type.WriteChunk || cmdType == Type.PutSmallFile - || cmdType == Type.PutBlock || cmdType == Type.CreateContainer) { + || cmdType == Type.PutBlock || cmdType == Type.CreateContainer + || cmdType == Type.StreamInit) { builder.setContainer2BCSIDMap(container2BCSIDMap); } CompletableFuture applyTransactionFuture = diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/LocalStream.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/LocalStream.java new file mode 100644 index 000000000000..780f8743988a --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/LocalStream.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.transport.server.ratis; + +import org.apache.ratis.statemachine.StateMachine; + +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.Executor; + +class LocalStream implements StateMachine.DataStream { + private final StateMachine.DataChannel dataChannel; + private final Executor executor; + + LocalStream(StateMachine.DataChannel dataChannel, Executor executor) { + this.dataChannel = dataChannel; + this.executor = executor; + } + + @Override + public StateMachine.DataChannel getDataChannel() { + return dataChannel; + } + + @Override + public CompletableFuture cleanUp() { + return CompletableFuture.supplyAsync(() -> { + try { + dataChannel.close(); + return true; + } catch (IOException e) { + throw new CompletionException("Failed to close data channel", e); + } + }); + } + + @Override + public Executor getExecutor() { + return executor; + } +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index c8d715cc60d2..89e5047b85b6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -98,6 +98,7 @@ import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.rpc.RpcType; import org.apache.ratis.rpc.SupportedRpcType; +import org.apache.ratis.server.DataStreamServerRpc; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.RaftServerRpc; @@ -129,6 +130,7 @@ private static long nextCallId() { private int serverPort; private int adminPort; private int clientPort; + private int dataStreamPort; private final RaftServer server; private final List chunkExecutors; private final ContainerDispatcher dispatcher; @@ -148,6 +150,7 @@ private static long nextCallId() { // Timeout used while calling submitRequest directly. private long requestTimeout; private boolean shouldDeleteRatisLogDirectory; + private boolean streamEnable; private XceiverServerRatis(DatanodeDetails dd, ContainerDispatcher dispatcher, ContainerController containerController, @@ -157,6 +160,9 @@ private XceiverServerRatis(DatanodeDetails dd, Objects.requireNonNull(dd, "id == null"); datanodeDetails = dd; assignPorts(); + this.streamEnable = conf.getBoolean( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED_DEFAULT); RaftProperties serverProperties = newRaftProperties(); this.context = context; this.dispatcher = dispatcher; @@ -213,6 +219,32 @@ private ContainerStateMachine getStateMachine(RaftGroupId gid) { chunkExecutors, this, conf); } + private void setUpRatisStream(RaftProperties properties) { + // set the datastream config + if (conf.getBoolean( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT, + OzoneConfigKeys. + DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT_DEFAULT)) { + dataStreamPort = 0; + } else { + dataStreamPort = conf.getInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_PORT, + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_PORT_DEFAULT); + } + NettyConfigKeys.DataStream.setPort(properties, dataStreamPort); + int dataStreamAsyncRequestThreadPoolSize = + conf.getObject(DatanodeRatisServerConfig.class) + .getStreamRequestThreads(); + RaftServerConfigKeys.DataStream.setAsyncRequestThreadPoolSize(properties, + dataStreamAsyncRequestThreadPoolSize); + int dataStreamClientPoolSize = + conf.getObject(DatanodeRatisServerConfig.class) + .getClientPoolSize(); + RaftServerConfigKeys.DataStream.setClientPoolSize(properties, + dataStreamClientPoolSize); + } + + @SuppressWarnings("checkstyle:methodlength") private RaftProperties newRaftProperties() { final RaftProperties properties = new RaftProperties(); @@ -231,6 +263,10 @@ private RaftProperties newRaftProperties() { // set the configs enable and set the stateMachineData sync timeout RaftServerConfigKeys.Log.StateMachineData.setSync(properties, true); + if (streamEnable) { + setUpRatisStream(properties); + } + timeUnit = OzoneConfigKeys. DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT.getUnit(); duration = conf.getTimeDuration( @@ -491,7 +527,12 @@ public void start() throws IOException { Port.Name.RATIS_ADMIN); serverPort = getRealPort(serverRpc.getInetSocketAddress(), Port.Name.RATIS_SERVER); - + if (streamEnable) { + DataStreamServerRpc dataStreamServerRpc = + server.getDataStreamServerRpc(); + dataStreamPort = getRealPort(dataStreamServerRpc.getInetSocketAddress(), + Port.Name.RATIS_DATASTREAM); + } isStarted = true; } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java index 561708b852cd..a5cddc175eaf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java @@ -368,6 +368,10 @@ public StorageType getStorageType() { } } + public String getVolumeRootDir() { + return volumeInfo != null ? volumeInfo.getRootDir() : null; + } + public long getCapacity() { return volumeInfo != null ? volumeInfo.getCapacity() : 0; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java index c95f9646f859..c053a9ae9248 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCommandInfo.java @@ -23,60 +23,79 @@ import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex; import java.util.Arrays; -import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.stream.IntStream; + +import static java.util.Collections.unmodifiableSortedMap; +import static java.util.stream.Collectors.toMap; /** * This class is to keep the required EC reconstruction info. */ public class ECReconstructionCommandInfo { - private long containerID; - private ECReplicationConfig ecReplicationConfig; - private byte[] missingContainerIndexes; - private List - sources; - private List targetDatanodes; + private final SortedMap sourceNodeMap; + private final SortedMap targetNodeMap; + private final long containerID; + private final ECReplicationConfig ecReplicationConfig; + private final byte[] missingContainerIndexes; + private final long deadlineMsSinceEpoch; + private final long term; - public ECReconstructionCommandInfo(long containerID, - ECReplicationConfig ecReplicationConfig, byte[] missingContainerIndexes, - List sources, - List targetDatanodes) { - this.containerID = containerID; - this.ecReplicationConfig = ecReplicationConfig; + public ECReconstructionCommandInfo(ReconstructECContainersCommand cmd) { + this.containerID = cmd.getContainerID(); + this.ecReplicationConfig = cmd.getEcReplicationConfig(); this.missingContainerIndexes = - Arrays.copyOf(missingContainerIndexes, missingContainerIndexes.length); - this.sources = sources; - this.targetDatanodes = targetDatanodes; + Arrays.copyOf(cmd.getMissingContainerIndexes(), + cmd.getMissingContainerIndexes().length); + this.deadlineMsSinceEpoch = cmd.getDeadline(); + this.term = cmd.getTerm(); + + sourceNodeMap = cmd.getSources().stream() + .collect(toMap( + DatanodeDetailsAndReplicaIndex::getReplicaIndex, + DatanodeDetailsAndReplicaIndex::getDnDetails, + (v1, v2) -> v1, TreeMap::new)); + targetNodeMap = IntStream.range(0, cmd.getTargetDatanodes().size()) + .boxed() + .collect(toMap( + i -> (int) missingContainerIndexes[i], + i -> cmd.getTargetDatanodes().get(i), + (v1, v2) -> v1, TreeMap::new)); } - public long getContainerID() { - return containerID; + public long getDeadline() { + return deadlineMsSinceEpoch; } - public byte[] getMissingContainerIndexes() { - return Arrays - .copyOf(missingContainerIndexes, missingContainerIndexes.length); + public long getContainerID() { + return containerID; } public ECReplicationConfig getEcReplicationConfig() { return ecReplicationConfig; } - public List getSources() { - return sources; + SortedMap getSourceNodeMap() { + return unmodifiableSortedMap(sourceNodeMap); } - public List getTargetDatanodes() { - return targetDatanodes; + SortedMap getTargetNodeMap() { + return unmodifiableSortedMap(targetNodeMap); } @Override public String toString() { - return "ECReconstructionCommandInfo{" + return "ECReconstructionCommand{" + "containerID=" + containerID - + ", ecReplicationConfig=" + ecReplicationConfig - + ", missingContainerIndexes=" + Arrays - .toString(missingContainerIndexes) - + ", sources=" + sources - + ", targetDatanodes=" + targetDatanodes + '}'; + + ", replication=" + ecReplicationConfig.getReplication() + + ", missingIndexes=" + Arrays.toString(missingContainerIndexes) + + ", sources=" + sourceNodeMap + + ", targets=" + targetNodeMap + "}"; + } + + public long getTerm() { + return term; } + } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java index d15157956223..4820fbcecb7a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java @@ -46,6 +46,7 @@ import org.apache.hadoop.ozone.client.io.ECBlockInputStreamProxy; import org.apache.hadoop.ozone.client.io.ECBlockReconstructedStripeInputStream; import org.apache.hadoop.ozone.container.common.helpers.BlockData; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.security.token.Token; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,6 +59,8 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.CompletableFuture; @@ -105,10 +108,13 @@ public class ECReconstructionCoordinator implements Closeable { private final TokenHelper tokenHelper; private final ContainerClientMetrics clientMetrics; private final ECReconstructionMetrics metrics; + private final StateContext context; public ECReconstructionCoordinator(ConfigurationSource conf, CertificateClient certificateClient, + StateContext context, ECReconstructionMetrics metrics) throws IOException { + this.context = context; this.containerOperationClient = new ECContainerOperationClient(conf, certificateClient); this.byteBufferPool = new ElasticByteBufferPool(); @@ -474,4 +480,10 @@ private long calcEffectiveBlockGroupLen(BlockData[] blockGroup, public ECReconstructionMetrics getECReconstructionMetrics() { return this.metrics; } + + OptionalLong getTermOfLeaderSCM() { + return Optional.ofNullable(context) + .map(StateContext::getTermOfLeaderSCM) + .orElse(OptionalLong.empty()); + } } \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java index e0aa14419a4f..03d771da0489 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java @@ -17,36 +17,36 @@ */ package org.apache.hadoop.ozone.container.ec.reconstruction; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex; +import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.SortedMap; -import java.util.TreeMap; +import java.time.Clock; +import java.util.OptionalLong; import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; -import java.util.stream.IntStream; /** * This is the actual EC reconstruction coordination task. */ public class ECReconstructionCoordinatorTask implements Runnable { - static final Logger LOG = + private static final Logger LOG = LoggerFactory.getLogger(ECReconstructionCoordinatorTask.class); private final ConcurrentHashMap.KeySetView inprogressCounter; - private ECReconstructionCoordinator reconstructionCoordinator; - private ECReconstructionCommandInfo reconstructionCommandInfo; + private final ECReconstructionCoordinator reconstructionCoordinator; + private final ECReconstructionCommandInfo reconstructionCommandInfo; + private final Clock clock; public ECReconstructionCoordinatorTask( ECReconstructionCoordinator coordinator, ECReconstructionCommandInfo reconstructionCommandInfo, ConcurrentHashMap.KeySetView - inprogressReconstructionCoordinatorCounter) { + inprogressReconstructionCoordinatorCounter, + Clock clock) { this.reconstructionCoordinator = coordinator; this.reconstructionCommandInfo = reconstructionCommandInfo; this.inprogressCounter = inprogressReconstructionCoordinatorCounter; + this.clock = clock; } @Override @@ -64,33 +64,40 @@ public void run() { // respective container. HDDS-6582 // 5. Close/finalize the recovered containers. long containerID = this.reconstructionCommandInfo.getContainerID(); + long start = Time.monotonicNow(); if (LOG.isDebugEnabled()) { LOG.debug("Starting the EC reconstruction of the container {}", containerID); } try { - SortedMap sourceNodeMap = - reconstructionCommandInfo.getSources().stream().collect(Collectors - .toMap(DatanodeDetailsAndReplicaIndex::getReplicaIndex, - DatanodeDetailsAndReplicaIndex::getDnDetails, (v1, v2) -> v1, - TreeMap::new)); - SortedMap targetNodeMap = IntStream - .range(0, reconstructionCommandInfo.getTargetDatanodes().size()) - .boxed().collect(Collectors.toMap(i -> (int) reconstructionCommandInfo - .getMissingContainerIndexes()[i], - i -> reconstructionCommandInfo.getTargetDatanodes().get(i), - (v1, v2) -> v1, TreeMap::new)); + if (reconstructionCommandInfo.getDeadline() > 0 + && clock.millis() > reconstructionCommandInfo.getDeadline()) { + LOG.info("Ignoring this reconstruct container command for container" + + " {} since the current time {}ms is past the deadline {}ms", + containerID, clock.millis(), + reconstructionCommandInfo.getDeadline()); + return; + } + + final OptionalLong currentTerm = + reconstructionCoordinator.getTermOfLeaderSCM(); + final long taskTerm = reconstructionCommandInfo.getTerm(); + if (currentTerm.isPresent() && taskTerm < currentTerm.getAsLong()) { + LOG.info("Ignoring {} since SCM leader has new term ({} < {})", + reconstructionCommandInfo, taskTerm, currentTerm.getAsLong()); + return; + } reconstructionCoordinator.reconstructECContainerGroup( reconstructionCommandInfo.getContainerID(), - reconstructionCommandInfo.getEcReplicationConfig(), sourceNodeMap, - targetNodeMap); - LOG.info("Completed the EC reconstruction of the container {}", - reconstructionCommandInfo.getContainerID()); + reconstructionCommandInfo.getEcReplicationConfig(), + reconstructionCommandInfo.getSourceNodeMap(), + reconstructionCommandInfo.getTargetNodeMap()); + long elapsed = Time.monotonicNow() - start; + LOG.info("Completed {} in {} ms", reconstructionCommandInfo, elapsed); } catch (IOException e) { - LOG.warn( - "Failed to complete the reconstruction task for the container: " - + reconstructionCommandInfo.getContainerID(), e); + long elapsed = Time.monotonicNow() - start; + LOG.warn("Failed {} after {} ms", reconstructionCommandInfo, elapsed, e); } finally { this.inprogressCounter.remove(containerID); } @@ -98,7 +105,6 @@ public void run() { @Override public String toString() { - return "ECReconstructionCoordinatorTask{" + "reconstructionCommandInfo=" - + reconstructionCommandInfo + '}'; + return "ECReconstructionTask{info=" + reconstructionCommandInfo + '}'; } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java index e2de7ac6959e..e36636d69356 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionSupervisor.java @@ -23,6 +23,7 @@ import java.io.Closeable; import java.io.IOException; +import java.time.Clock; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; @@ -39,6 +40,7 @@ public class ECReconstructionSupervisor implements Closeable { private final StateContext context; private final ExecutorService executor; private final ECReconstructionCoordinator reconstructionCoordinator; + private final Clock clock; /** * how many coordinator tasks currently being running. */ @@ -47,18 +49,19 @@ public class ECReconstructionSupervisor implements Closeable { public ECReconstructionSupervisor(ContainerSet containerSet, StateContext context, ExecutorService executor, - ECReconstructionCoordinator coordinator) { + ECReconstructionCoordinator coordinator, Clock clock) { this.containerSet = containerSet; this.context = context; this.executor = executor; this.reconstructionCoordinator = coordinator; this.inProgressReconstrucionCoordinatorCounter = ConcurrentHashMap.newKeySet(); + this.clock = clock; } public ECReconstructionSupervisor(ContainerSet containerSet, StateContext context, int poolSize, - ECReconstructionCoordinator coordinator) { + ECReconstructionCoordinator coordinator, Clock clock) { // TODO: ReplicationSupervisor and this class can be refactored to have a // common interface. this(containerSet, context, @@ -66,7 +69,7 @@ public ECReconstructionSupervisor(ContainerSet containerSet, new LinkedBlockingQueue<>(), new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("ECContainerReconstructionThread-%d").build()), - coordinator); + coordinator, clock); } public void stop() { @@ -86,7 +89,7 @@ public void addTask(ECReconstructionCommandInfo taskInfo) { .add(taskInfo.getContainerID())) { executor.execute( new ECReconstructionCoordinatorTask(getReconstructionCoordinator(), - taskInfo, inProgressReconstrucionCoordinatorCounter)); + taskInfo, inProgressReconstrucionCoordinatorCounter, clock)); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index 7412e766d0c0..bd58b7253b99 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -23,6 +23,8 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.time.Instant; import java.util.Collections; @@ -56,6 +58,7 @@ import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerLocationUtil; import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil; +import org.apache.hadoop.ozone.container.replication.DownloadAndImportReplicator; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; @@ -507,21 +510,22 @@ public void updateDeleteTransactionId(long deleteTransactionId) { @Override public void importContainerData(InputStream input, - ContainerPacker packer) throws IOException { + ContainerPacker packer) + throws IOException { + HddsVolume hddsVolume = containerData.getVolume(); + String idDir = VersionedDatanodeFeatures.ScmHA.chooseContainerPathID( + hddsVolume, hddsVolume.getClusterID()); + long containerId = containerData.getContainerID(); + Path destContainerDir = + Paths.get(KeyValueContainerLocationUtil.getBaseContainerLocation( + hddsVolume.getHddsRootDir().toString(), idDir, containerId)); + Path tmpDir = DownloadAndImportReplicator.getUntarDirectory(hddsVolume); writeLock(); try { - if (getContainerFile().exists()) { - String errorMessage = String.format( - "Can't import container (cid=%d) data to a specific location" - + " as the container descriptor (%s) has already been exist.", - getContainerData().getContainerID(), - getContainerFile().getAbsolutePath()); - throw new StorageContainerException(errorMessage, - CONTAINER_ALREADY_EXISTS); - } //copy the values from the input stream to the final destination // directory. - byte[] descriptorContent = packer.unpackContainerData(this, input); + byte[] descriptorContent = packer.unpackContainerData(this, input, tmpDir, + destContainerDir); Preconditions.checkNotNull(descriptorContent, "Container descriptor is missing from the container archive: " @@ -533,31 +537,28 @@ public void importContainerData(InputStream input, KeyValueContainerData originalContainerData = (KeyValueContainerData) ContainerDataYaml .readContainer(descriptorContent); - - - containerData.setState(originalContainerData.getState()); - containerData - .setContainerDBType(originalContainerData.getContainerDBType()); - containerData.setSchemaVersion(originalContainerData.getSchemaVersion()); - - //rewriting the yaml file with new checksum calculation. - update(originalContainerData.getMetadata(), true); - - if (containerData.getSchemaVersion().equals(OzoneConsts.SCHEMA_V3)) { - // load metadata from received dump files before we try to parse kv - BlockUtils.loadKVContainerDataFromFiles(containerData, config); + importContainerData(originalContainerData); + } catch (Exception ex) { + // clean data under tmp directory + try { + Path containerUntarDir = tmpDir.resolve(String.valueOf(containerId)); + if (containerUntarDir.toFile().exists()) { + FileUtils.deleteDirectory(containerUntarDir.toFile()); + } + } catch (Exception deleteex) { + LOG.error( + "Can not cleanup container directory under {} for container {}", + tmpDir, containerId, deleteex); } - //fill in memory stat counter (keycount, byte usage) - KeyValueContainerUtil.parseKVContainerData(containerData, config); - - } catch (Exception ex) { + // Throw exception for existed containers if (ex instanceof StorageContainerException && ((StorageContainerException) ex).getResult() == CONTAINER_ALREADY_EXISTS) { throw ex; } - //delete all the temporary data in case of any exception. + + // delete all other temporary data in case of any exception. try { if (containerData.getSchemaVersion() != null && containerData.getSchemaVersion().equals(OzoneConsts.SCHEMA_V3)) { @@ -570,8 +571,7 @@ public void importContainerData(InputStream input, } catch (Exception deleteex) { LOG.error( "Can not cleanup destination directories after a container import" - + " error (cid" + - containerData.getContainerID() + ")", deleteex); + + " error (cid: {}", containerId, deleteex); } throw ex; } finally { @@ -579,6 +579,25 @@ public void importContainerData(InputStream input, } } + public void importContainerData(KeyValueContainerData originalContainerData) + throws IOException { + containerData.setState(originalContainerData.getState()); + containerData + .setContainerDBType(originalContainerData.getContainerDBType()); + containerData.setSchemaVersion(originalContainerData.getSchemaVersion()); + + //rewriting the yaml file with new checksum calculation. + update(originalContainerData.getMetadata(), true); + + if (containerData.getSchemaVersion().equals(OzoneConsts.SCHEMA_V3)) { + // load metadata from received dump files before we try to parse kv + BlockUtils.loadKVContainerDataFromFiles(containerData, config); + } + + //fill in memory stat counter (keycount, byte usage) + KeyValueContainerUtil.parseKVContainerData(containerData, config); + } + @Override public void exportContainerData(OutputStream destination, ContainerPacker packer) throws IOException { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java index 115fc7f37313..27b138da95b8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java @@ -277,7 +277,7 @@ private void scanData(DataTransferThrottler throttler, Canceler canceler) private BlockData getBlockDataFromDB(DBHandle db, BlockData block) throws IOException { String blockKey = - onDiskContainerData.blockKey(block.getBlockID().getLocalID()); + onDiskContainerData.getBlockKey(block.getBlockID().getLocalID()); return db.getStore().getBlockDataTable().get(blockKey); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java index 19a13a6db5b8..58862925c587 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java @@ -306,11 +306,12 @@ public void updateAndCommitDBCounters(DBHandle db, Table metadataTable = db.getStore().getMetadataTable(); // Set Bytes used and block count key. - metadataTable.putWithBatch(batchOperation, bytesUsedKey(), + metadataTable.putWithBatch(batchOperation, getBytesUsedKey(), getBytesUsed() - releasedBytes); - metadataTable.putWithBatch(batchOperation, blockCountKey(), + metadataTable.putWithBatch(batchOperation, getBlockCountKey(), getBlockCount() - deletedBlockCount); - metadataTable.putWithBatch(batchOperation, pendingDeleteBlockCountKey(), + metadataTable.putWithBatch(batchOperation, + getPendingDeleteBlockCountKey(), getNumPendingDeletionBlocks() - deletedBlockCount); db.getStore().getBatchHandler().commitBatchOperation(batchOperation); @@ -328,39 +329,39 @@ public void setReplicaIndex(int replicaIndex) { // to container schemas, we should use them instead of using // raw const variables defined. - public String blockKey(long localID) { + public String getBlockKey(long localID) { return formatKey(Long.toString(localID)); } - public String deletingBlockKey(long localID) { + public String getDeletingBlockKey(long localID) { return formatKey(DELETING_KEY_PREFIX + localID); } - public String deleteTxnKey(long txnID) { + public String getDeleteTxnKey(long txnID) { return formatKey(Long.toString(txnID)); } - public String latestDeleteTxnKey() { + public String getLatestDeleteTxnKey() { return formatKey(DELETE_TRANSACTION_KEY); } - public String bcsIdKey() { + public String getBcsIdKey() { return formatKey(BLOCK_COMMIT_SEQUENCE_ID); } - public String blockCountKey() { + public String getBlockCountKey() { return formatKey(BLOCK_COUNT); } - public String bytesUsedKey() { + public String getBytesUsedKey() { return formatKey(CONTAINER_BYTES_USED); } - public String pendingDeleteBlockCountKey() { + public String getPendingDeleteBlockCountKey() { return formatKey(PENDING_DELETE_BLOCK_COUNT); } - public String deletingBlockKeyPrefix() { + public String getDeletingBlockKeyPrefix() { return formatKey(DELETING_KEY_PREFIX); } @@ -370,7 +371,7 @@ public KeyPrefixFilter getUnprefixedKeyFilter() { } public KeyPrefixFilter getDeletingBlockKeyFilter() { - return new KeyPrefixFilter().addFilter(deletingBlockKeyPrefix()); + return new KeyPrefixFilter().addFilter(getDeletingBlockKeyPrefix()); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java index c6395de27db5..595aa925a4fc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java @@ -229,15 +229,15 @@ private JsonObject getDBMetadataJson(Table metadataTable, JsonObject dBMetadata = new JsonObject(); dBMetadata.addProperty(OzoneConsts.BLOCK_COUNT, - metadataTable.get(containerData.blockCountKey())); + metadataTable.get(containerData.getBlockCountKey())); dBMetadata.addProperty(OzoneConsts.CONTAINER_BYTES_USED, - metadataTable.get(containerData.bytesUsedKey())); + metadataTable.get(containerData.getBytesUsedKey())); dBMetadata.addProperty(OzoneConsts.PENDING_DELETE_BLOCK_COUNT, - metadataTable.get(containerData.pendingDeleteBlockCountKey())); + metadataTable.get(containerData.getPendingDeleteBlockCountKey())); dBMetadata.addProperty(OzoneConsts.DELETE_TRANSACTION_KEY, - metadataTable.get(containerData.latestDeleteTxnKey())); + metadataTable.get(containerData.getLatestDeleteTxnKey())); dBMetadata.addProperty(OzoneConsts.BLOCK_COMMIT_SEQUENCE_ID, - metadataTable.get(containerData.bcsIdKey())); + metadataTable.get(containerData.getBcsIdKey())); return dBMetadata; } @@ -341,7 +341,7 @@ private boolean checkAndRepair(JsonObject parent, BooleanSupplier keyRepairAction = () -> { boolean repaired = false; try { - metadataTable.put(containerData.blockCountKey(), + metadataTable.put(containerData.getBlockCountKey(), blockCountAggregate.getAsLong()); repaired = true; } catch (IOException ex) { @@ -376,7 +376,7 @@ private boolean checkAndRepair(JsonObject parent, BooleanSupplier keyRepairAction = () -> { boolean repaired = false; try { - metadataTable.put(containerData.bytesUsedKey(), + metadataTable.put(containerData.getBytesUsedKey(), usedBytesAggregate.getAsLong()); repaired = true; } catch (IOException ex) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 0fd6631359b1..7a4cc5807e4a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -71,7 +71,6 @@ import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext.WriteChunkStage; -import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; @@ -104,6 +103,7 @@ import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getReadChunkResponse; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getReadContainerResponse; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getSuccessResponse; +import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getSuccessResponseBuilder; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.malformedRequest; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.putBlockResponseSuccess; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.unsupportedRequest; @@ -111,6 +111,7 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ContainerDataProto.State.RECOVERING; +import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -184,6 +185,25 @@ public VolumeChoosingPolicy getVolumeChoosingPolicyForTesting() { return volumeChoosingPolicy; } + @Override + public StateMachine.DataChannel getStreamDataChannel( + Container container, ContainerCommandRequestProto msg) + throws StorageContainerException { + KeyValueContainer kvContainer = (KeyValueContainer) container; + checkContainerOpen(kvContainer); + + if (msg.hasWriteChunk()) { + BlockID blockID = + BlockID.getFromProtobuf(msg.getWriteChunk().getBlockID()); + + return chunkManager.getStreamDataChannel(kvContainer, + blockID, metrics); + } else { + throw new StorageContainerException("Malformed request.", + ContainerProtos.Result.IO_EXCEPTION); + } + } + @Override public void stop() { chunkManager.shutdown(); @@ -233,6 +253,8 @@ static ContainerCommandResponseProto dispatchRequest(KeyValueHandler handler, return handler.handleDeleteChunk(request, kvContainer); case WriteChunk: return handler.handleWriteChunk(request, kvContainer, dispatcherContext); + case StreamInit: + return handler.handleStreamInit(request, kvContainer, dispatcherContext); case ListChunk: return handler.handleUnsupportedOp(request); case CompactChunk: @@ -259,6 +281,35 @@ public BlockManager getBlockManager() { return this.blockManager; } + ContainerCommandResponseProto handleStreamInit( + ContainerCommandRequestProto request, KeyValueContainer kvContainer, + DispatcherContext dispatcherContext) { + final BlockID blockID; + if (request.hasWriteChunk()) { + WriteChunkRequestProto writeChunk = request.getWriteChunk(); + blockID = BlockID.getFromProtobuf(writeChunk.getBlockID()); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Malformed {} request. trace ID: {}", + request.getCmdType(), request.getTraceID()); + } + return malformedRequest(request); + } + + String path = null; + try { + checkContainerOpen(kvContainer); + path = chunkManager + .streamInit(kvContainer, blockID); + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } + + return getSuccessResponseBuilder(request) + .setMessage(path) + .build(); + } + /** * Handles Create Container Request. If successful, adds the container to * ContainerSet and sends an ICR to the SCM. @@ -329,13 +380,11 @@ ContainerCommandResponseProto handleCreateContainer( return getSuccessResponse(request); } - private void populateContainerPathFields(KeyValueContainer container) - throws IOException { + private void populateContainerPathFields(KeyValueContainer container, + HddsVolume hddsVolume) throws IOException { volumeSet.readLock(); + HddsVolume containerVolume = hddsVolume; try { - HddsVolume containerVolume = volumeChoosingPolicy.chooseVolume( - StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), - container.getContainerData().getMaxSize()); String idDir = VersionedDatanodeFeatures.ScmHA.chooseContainerPathID( containerVolume, clusterId); container.populatePathFields(idDir, containerVolume); @@ -962,8 +1011,7 @@ private void checkContainerOpen(KeyValueContainer kvContainer) @Override public Container importContainer(ContainerData originalContainerData, final InputStream rawContainerStream, - final TarContainerPacker packer) - throws IOException { + final TarContainerPacker packer) throws IOException { Preconditions.checkState(originalContainerData instanceof KeyValueContainerData, "Should be KeyValueContainerData instance"); @@ -973,7 +1021,8 @@ public Container importContainer(ContainerData originalContainerData, KeyValueContainer container = new KeyValueContainer(containerData, conf); - populateContainerPathFields(container); + HddsVolume targetVolume = originalContainerData.getVolume(); + populateContainerPathFields(container, targetVolume); container.importContainerData(rawContainerStream, packer); sendICR(container); return container; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java index e555c1081458..3d8c445a84fe 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java @@ -28,13 +28,19 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; import java.util.stream.Stream; +import java.util.Objects; + +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ContainerPacker; + import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveInputStream; import org.apache.commons.compress.archivers.ArchiveOutputStream; @@ -42,10 +48,13 @@ import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerLocationUtil; import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl; import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_ALREADY_EXISTS; import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V3; /** @@ -60,6 +69,18 @@ public class TarContainerPacker private static final String CONTAINER_FILE_NAME = "container.yaml"; + private final String compression; + + private static final String NO_COMPRESSION = "no_compression"; + + public TarContainerPacker() { + this.compression = NO_COMPRESSION; + } + + public TarContainerPacker(String compression) { + this.compression = compression; + } + /** * Given an input stream (tar file) extract the data to the specified * directories. @@ -69,48 +90,35 @@ public class TarContainerPacker */ @Override public byte[] unpackContainerData(Container container, - InputStream input) + InputStream input, Path tmpDir, Path destContainerDir) throws IOException { - byte[] descriptorFileContent = null; KeyValueContainerData containerData = container.getContainerData(); - Path dbRoot = getDbPath(containerData); - Path chunksRoot = Paths.get(containerData.getChunksPath()); + long containerId = containerData.getContainerID(); - try (InputStream decompressed = decompress(input); - ArchiveInputStream archiveInput = untar(decompressed)) { + Path containerUntarDir = tmpDir.resolve(String.valueOf(containerId)); + if (containerUntarDir.toFile().exists()) { + FileUtils.deleteDirectory(containerUntarDir.toFile()); + } - ArchiveEntry entry = archiveInput.getNextEntry(); - while (entry != null) { - String name = entry.getName(); - long size = entry.getSize(); - if (name.startsWith(DB_DIR_NAME + "/")) { - Path destinationPath = dbRoot - .resolve(name.substring(DB_DIR_NAME.length() + 1)); - extractEntry(entry, archiveInput, size, dbRoot, - destinationPath); - } else if (name.startsWith(CHUNKS_DIR_NAME + "/")) { - Path destinationPath = chunksRoot - .resolve(name.substring(CHUNKS_DIR_NAME.length() + 1)); - extractEntry(entry, archiveInput, size, chunksRoot, - destinationPath); - } else if (CONTAINER_FILE_NAME.equals(name)) { - //Don't do anything. Container file should be unpacked in a - //separated step by unpackContainerDescriptor call. - descriptorFileContent = readEntry(archiveInput, size); - } else { - throw new IllegalArgumentException( - "Unknown entry in the tar file: " + "" + name); - } - entry = archiveInput.getNextEntry(); - } - return descriptorFileContent; + Path dbRoot = getDbPath(containerUntarDir, containerData); + Path chunksRoot = getChunkPath(containerUntarDir, containerData); + byte[] descriptorFileContent = innerUnpack(input, dbRoot, chunksRoot); - } catch (CompressorException e) { - throw new IOException( - "Can't uncompress the given container: " + container - .getContainerData().getContainerID(), - e); + if (!Files.exists(destContainerDir)) { + Files.createDirectories(destContainerDir); } + if (FileUtils.isEmptyDirectory(destContainerDir.toFile())) { + Files.move(containerUntarDir, destContainerDir, + StandardCopyOption.ATOMIC_MOVE, + StandardCopyOption.REPLACE_EXISTING); + } else { + String errorMessage = "Container " + containerId + + " unpack failed because ContainerFile " + + destContainerDir.toAbsolutePath() + " already exists"; + throw new StorageContainerException(errorMessage, + CONTAINER_ALREADY_EXISTS); + } + return descriptorFileContent; } private void extractEntry(ArchiveEntry entry, InputStream input, long size, @@ -180,7 +188,7 @@ public void pack(Container container, public byte[] unpackContainerDescriptor(InputStream input) throws IOException { try (InputStream decompressed = decompress(input); - ArchiveInputStream archiveInput = untar(decompressed)) { + ArchiveInputStream archiveInput = untar(decompressed)) { ArchiveEntry entry = archiveInput.getNextEntry(); while (entry != null) { @@ -209,6 +217,33 @@ public static Path getDbPath(KeyValueContainerData containerData) { } } + public static Path getDbPath(Path baseDir, + KeyValueContainerData containerData) { + if (baseDir.toAbsolutePath().toString().equals( + containerData.getContainerPath())) { + return getDbPath(containerData); + } + Path containerPath = Paths.get(containerData.getContainerPath()); + Path dbPath = Paths.get(containerData.getDbFile().getPath()); + Path relativePath = containerPath.relativize(dbPath); + + if (containerData.getSchemaVersion().equals(SCHEMA_V3)) { + Path metadataDir = KeyValueContainerLocationUtil.getContainerMetaDataPath( + baseDir.toString()).toPath(); + return DatanodeStoreSchemaThreeImpl.getDumpDir(metadataDir.toFile()) + .toPath(); + } else { + return baseDir.resolve(relativePath); + } + } + + public static Path getChunkPath(Path baseDir, + KeyValueContainerData containerData) { + Path chunkDir = KeyValueContainerLocationUtil.getChunksLocationPath( + baseDir.toString()).toPath(); + return chunkDir; + } + private byte[] readEntry(InputStream input, final long size) throws IOException { ByteArrayOutputStream output = new ByteArrayOutputStream(); @@ -259,16 +294,56 @@ private static ArchiveOutputStream tar(OutputStream output) { return new TarArchiveOutputStream(output); } - private static InputStream decompress(InputStream input) + @VisibleForTesting + InputStream decompress(InputStream input) throws CompressorException { - return new CompressorStreamFactory() - .createCompressorInputStream(CompressorStreamFactory.GZIP, input); + return Objects.equals(compression, NO_COMPRESSION) ? + input : new CompressorStreamFactory() + .createCompressorInputStream(compression, input); } - private static OutputStream compress(OutputStream output) + @VisibleForTesting + OutputStream compress(OutputStream output) throws CompressorException { - return new CompressorStreamFactory() - .createCompressorOutputStream(CompressorStreamFactory.GZIP, output); + return Objects.equals(compression, NO_COMPRESSION) ? + output : new CompressorStreamFactory() + .createCompressorOutputStream(compression, output); } + private byte[] innerUnpack(InputStream input, Path dbRoot, Path chunksRoot) + throws IOException { + byte[] descriptorFileContent = null; + try (InputStream decompressed = decompress(input); + ArchiveInputStream archiveInput = untar(decompressed)) { + ArchiveEntry entry = archiveInput.getNextEntry(); + while (entry != null) { + String name = entry.getName(); + long size = entry.getSize(); + if (name.startsWith(DB_DIR_NAME + "/")) { + Path destinationPath = dbRoot + .resolve(name.substring(DB_DIR_NAME.length() + 1)); + extractEntry(entry, archiveInput, size, dbRoot, + destinationPath); + } else if (name.startsWith(CHUNKS_DIR_NAME + "/")) { + Path destinationPath = chunksRoot + .resolve(name.substring(CHUNKS_DIR_NAME.length() + 1)); + extractEntry(entry, archiveInput, size, chunksRoot, + destinationPath); + } else if (CONTAINER_FILE_NAME.equals(name)) { + //Don't do anything. Container file should be unpacked in a + //separated step by unpackContainerDescriptor call. + descriptorFileContent = readEntry(archiveInput, size); + } else { + throw new IllegalArgumentException( + "Unknown entry in the tar file: " + "" + name); + } + entry = archiveInput.getNextEntry(); + } + return descriptorFileContent; + + } catch (CompressorException e) { + throw new IOException("Can't uncompress to dbRoot: " + dbRoot + + ", chunksRoot: " + chunksRoot, e); + } + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java index 13a5a6981155..591efe04e2e5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java @@ -45,15 +45,22 @@ private KeyValueContainerLocationUtil() { public static File getContainerMetaDataPath(String hddsVolumeDir, String clusterId, long containerId) { - String containerMetaDataPath = - getBaseContainerLocation(hddsVolumeDir, clusterId, - containerId); - containerMetaDataPath = containerMetaDataPath + File.separator + + return getContainerMetaDataPath( + getBaseContainerLocation(hddsVolumeDir, clusterId, containerId)); + } + + /** + * Returns Container Metadata Location. + * @param containerBaseDir Base container dir + * @return containerMetadata Path to container metadata location where + * .container file will be stored. + */ + public static File getContainerMetaDataPath(String containerBaseDir) { + String containerMetaDataPath = containerBaseDir + File.separator + OzoneConsts.CONTAINER_META_PATH; return new File(containerMetaDataPath); } - /** * Returns Container Chunks Location. * @param baseDir @@ -63,8 +70,17 @@ public static File getContainerMetaDataPath(String hddsVolumeDir, */ public static File getChunksLocationPath(String baseDir, String clusterId, long containerId) { - String chunksPath = - getBaseContainerLocation(baseDir, clusterId, containerId) + return getChunksLocationPath( + getBaseContainerLocation(baseDir, clusterId, containerId)); + } + + /** + * Returns Container Chunks Location. + * @param containerBaseDir + * @return chunksPath + */ + public static File getChunksLocationPath(String containerBaseDir) { + String chunksPath = containerBaseDir + File.separator + OzoneConsts.STORAGE_DIR_CHUNKS; return new File(chunksPath); } @@ -76,7 +92,7 @@ public static File getChunksLocationPath(String baseDir, String clusterId, * @param containerId * @return base directory for container. */ - private static String getBaseContainerLocation(String hddsVolumeDir, + public static String getBaseContainerLocation(String hddsVolumeDir, String clusterId, long containerId) { Preconditions.checkNotNull(hddsVolumeDir, "Base Directory cannot be null"); @@ -111,7 +127,12 @@ public static File getContainerDBFile(KeyValueContainerData containerData) { return new File(containerData.getVolume().getDbParentDir(), OzoneConsts.CONTAINER_DB_NAME); } - return new File(containerData.getMetadataPath(), - containerData.getContainerID() + OzoneConsts.DN_CONTAINER_DB); + return getContainerDBFile(containerData.getMetadataPath(), containerData); + } + + public static File getContainerDBFile(String baseDir, + KeyValueContainerData containerData) { + return new File(baseDir, containerData.getContainerID() + + OzoneConsts.DN_CONTAINER_DB); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java index b23a49556f76..bc3d96d9b211 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java @@ -246,7 +246,7 @@ private static void populateContainerMetadata( // Set pending deleted block count. Long pendingDeleteBlockCount = metadataTable.get(kvContainerData - .pendingDeleteBlockCountKey()); + .getPendingDeleteBlockCountKey()); if (pendingDeleteBlockCount != null) { kvContainerData.incrPendingDeletionBlocks( pendingDeleteBlockCount); @@ -263,7 +263,7 @@ private static void populateContainerMetadata( // Set delete transaction id. Long delTxnId = - metadataTable.get(kvContainerData.latestDeleteTxnKey()); + metadataTable.get(kvContainerData.getLatestDeleteTxnKey()); if (delTxnId != null) { kvContainerData .updateDeleteTransactionId(delTxnId); @@ -271,7 +271,7 @@ private static void populateContainerMetadata( // Set BlockCommitSequenceId. Long bcsId = metadataTable.get( - kvContainerData.bcsIdKey()); + kvContainerData.getBcsIdKey()); if (bcsId != null) { kvContainerData .updateBlockCommitSequenceId(bcsId); @@ -280,7 +280,7 @@ private static void populateContainerMetadata( // Set bytes used. // commitSpace for Open Containers relies on usedBytes Long bytesUsed = - metadataTable.get(kvContainerData.bytesUsedKey()); + metadataTable.get(kvContainerData.getBytesUsedKey()); if (bytesUsed != null) { isBlockMetadataSet = true; kvContainerData.setBytesUsed(bytesUsed); @@ -288,7 +288,7 @@ private static void populateContainerMetadata( // Set block count. Long blockCount = metadataTable.get( - kvContainerData.blockCountKey()); + kvContainerData.getBlockCountKey()); if (blockCount != null) { isBlockMetadataSet = true; kvContainerData.setBlockCount(blockCount); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java index 67b38ff68d2c..d822ce51198d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java @@ -158,7 +158,7 @@ public static long persistPutBlock(KeyValueContainer container, // If block exists in cache, blockCount should not be incremented. if (!isBlockInCache) { if (db.getStore().getBlockDataTable().get( - containerData.blockKey(localID)) == null) { + containerData.getBlockKey(localID)) == null) { // Block does not exist in DB => blockCount needs to be // incremented when the block is added into DB. incrBlockCount = true; @@ -166,10 +166,10 @@ public static long persistPutBlock(KeyValueContainer container, } db.getStore().getBlockDataTable().putWithBatch( - batch, containerData.blockKey(localID), data); + batch, containerData.getBlockKey(localID), data); if (bcsId != 0) { db.getStore().getMetadataTable().putWithBatch( - batch, containerData.bcsIdKey(), bcsId); + batch, containerData.getBcsIdKey(), bcsId); } // Set Bytes used, this bytes used will be updated for every write and @@ -179,13 +179,13 @@ public static long persistPutBlock(KeyValueContainer container, // is only used to compute the bytes used. This is done to keep the // current behavior and avoid DB write during write chunk operation. db.getStore().getMetadataTable().putWithBatch( - batch, containerData.bytesUsedKey(), + batch, containerData.getBytesUsedKey(), containerData.getBytesUsed()); // Set Block Count for a container. if (incrBlockCount) { db.getStore().getMetadataTable().putWithBatch( - batch, containerData.blockCountKey(), + batch, containerData.getBlockCountKey(), containerData.getBlockCount() + 1); } @@ -327,7 +327,7 @@ public List listBlock(Container container, long startLocalID, int try (DBHandle db = BlockUtils.getDB(cData, config)) { result = new ArrayList<>(); String startKey = (startLocalID == -1) ? cData.startKeyEmpty() - : cData.blockKey(startLocalID); + : cData.getBlockKey(startLocalID); List> range = db.getStore().getBlockDataTable() .getSequentialRangeKVs(startKey, count, @@ -352,7 +352,7 @@ public void shutdown() { private BlockData getBlockByID(DBHandle db, BlockID blockID, KeyValueContainerData containerData) throws IOException { - String blockKey = containerData.blockKey(blockID.getLocalID()); + String blockKey = containerData.getBlockKey(blockID.getLocalID()); BlockData blockData = db.getStore().getBlockDataTable().get(blockKey); if (blockData == null) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerDispatcher.java index 763647313b8c..92f6327447ab 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerDispatcher.java @@ -25,6 +25,7 @@ import org.apache.hadoop.ozone.common.ChunkBuffer; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; @@ -33,6 +34,7 @@ import org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.ratis.statemachine.StateMachine; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -73,6 +75,20 @@ public void writeChunk(Container container, BlockID blockID, ChunkInfo info, .writeChunk(container, blockID, info, data, dispatcherContext); } + public String streamInit(Container container, BlockID blockID) + throws StorageContainerException { + return selectHandler(container) + .streamInit(container, blockID); + } + + @Override + public StateMachine.DataChannel getStreamDataChannel( + Container container, BlockID blockID, ContainerMetrics metrics) + throws StorageContainerException { + return selectHandler(container) + .getStreamDataChannel(container, blockID, metrics); + } + @Override public void finishWriteChunks(KeyValueContainer kvContainer, BlockData blockData) throws IOException { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java index 51cd5708d3e6..23db342da030 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java @@ -32,6 +32,7 @@ import org.apache.hadoop.ozone.common.utils.BufferUtils; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; @@ -42,6 +43,7 @@ import org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.ratis.statemachine.StateMachine; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,6 +91,24 @@ private static void checkLayoutVersion(Container container) { container.getContainerData().getLayoutVersion() == FILE_PER_BLOCK); } + @Override + public String streamInit(Container container, BlockID blockID) + throws StorageContainerException { + checkLayoutVersion(container); + File chunkFile = getChunkFile(container, blockID, null); + return chunkFile.getAbsolutePath(); + } + + @Override + public StateMachine.DataChannel getStreamDataChannel( + Container container, BlockID blockID, ContainerMetrics metrics) + throws StorageContainerException { + checkLayoutVersion(container); + File chunkFile = getChunkFile(container, blockID, null); + return new KeyValueStreamDataChannel(chunkFile, + container.getContainerData(), metrics); + } + @Override public void writeChunk(Container container, BlockID blockID, ChunkInfo info, ChunkBuffer data, DispatcherContext dispatcherContext) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/KeyValueStreamDataChannel.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/KeyValueStreamDataChannel.java new file mode 100644 index 000000000000..99dc40f5d002 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/KeyValueStreamDataChannel.java @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue.impl; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; +import org.apache.hadoop.hdds.ratis.ContainerCommandRequestMessage; +import org.apache.hadoop.hdds.ratis.RatisHelper; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.storage.BlockDataStreamOutput; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; +import org.apache.ratis.thirdparty.io.netty.buffer.Unpooled; +import org.apache.ratis.util.ReferenceCountedObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Deque; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +/** + * This class is used to get the DataChannel for streaming. + */ +public class KeyValueStreamDataChannel extends StreamDataChannelBase { + public static final Logger LOG = + LoggerFactory.getLogger(KeyValueStreamDataChannel.class); + + /** + * Keep the last {@link Buffers#max} bytes in the buffer + * in order to create putBlockRequest + * at {@link #closeBuffers(Buffers, WriteMethod)}}. + */ + static class Buffers { + private final Deque> deque + = new LinkedList<>(); + private final int max; + private int length; + + Buffers(int max) { + this.max = max; + } + + private boolean isExtra(int n) { + return length - n >= max; + } + + private boolean hasExtraBuffer() { + return Optional.ofNullable(deque.peek()) + .map(ReferenceCountedObject::get) + .filter(b -> isExtra(b.remaining())) + .isPresent(); + } + + /** + * @return extra buffers which are safe to be written. + */ + Iterable> offer( + ReferenceCountedObject ref) { + final ByteBuffer buffer = ref.retain(); + LOG.debug("offer {}", buffer); + final boolean offered = deque.offer(ref); + Preconditions.checkState(offered, "Failed to offer"); + length += buffer.remaining(); + + return () -> new Iterator>() { + @Override + public boolean hasNext() { + return hasExtraBuffer(); + } + + @Override + public ReferenceCountedObject next() { + final ReferenceCountedObject polled = poll(); + length -= polled.get().remaining(); + Preconditions.checkState(length >= max); + return polled; + } + }; + } + + ReferenceCountedObject poll() { + final ReferenceCountedObject polled + = Objects.requireNonNull(deque.poll()); + RatisHelper.debug(polled.get(), "polled", LOG); + return polled; + } + + ReferenceCountedObject pollAll() { + Preconditions.checkState(!deque.isEmpty(), "The deque is empty"); + final ByteBuffer[] array = new ByteBuffer[deque.size()]; + final List> refs + = new ArrayList<>(deque.size()); + for (int i = 0; i < array.length; i++) { + final ReferenceCountedObject ref = poll(); + refs.add(ref); + array[i] = ref.get(); + } + final ByteBuf buf = Unpooled.wrappedBuffer(array).asReadOnly(); + return ReferenceCountedObject.wrap(buf, () -> { + }, () -> { + buf.release(); + refs.forEach(ReferenceCountedObject::release); + }); + } + } + + interface WriteMethod { + int applyAsInt(ByteBuffer src) throws IOException; + } + + private final Buffers buffers = new Buffers( + BlockDataStreamOutput.PUT_BLOCK_REQUEST_LENGTH_MAX); + private final AtomicReference putBlockRequest + = new AtomicReference<>(); + private final AtomicBoolean closed = new AtomicBoolean(); + + KeyValueStreamDataChannel(File file, ContainerData containerData, + ContainerMetrics metrics) + throws StorageContainerException { + super(file, containerData, metrics); + } + + @Override + ContainerProtos.Type getType() { + return ContainerProtos.Type.StreamWrite; + } + + @Override + public int write(ReferenceCountedObject referenceCounted) + throws IOException { + assertOpen(); + return writeBuffers(referenceCounted, buffers, super::writeFileChannel); + } + + static int writeBuffers(ReferenceCountedObject src, + Buffers buffers, WriteMethod writeMethod) + throws IOException { + for (ReferenceCountedObject b : buffers.offer(src)) { + try { + writeFully(b.get(), writeMethod); + } finally { + b.release(); + } + } + return src.get().remaining(); + } + + private static void writeFully(ByteBuffer b, WriteMethod writeMethod) + throws IOException { + for (; b.remaining() > 0;) { + final int written = writeMethod.applyAsInt(b); + if (written <= 0) { + throw new IOException("Unable to write"); + } + } + } + + public ContainerCommandRequestProto getPutBlockRequest() { + return Objects.requireNonNull(putBlockRequest.get(), + () -> "putBlockRequest == null, " + this); + } + + void assertOpen() throws IOException { + if (closed.get()) { + throw new IOException("Already closed: " + this); + } + } + + @Override + public void close() throws IOException { + if (closed.compareAndSet(false, true)) { + putBlockRequest.set(closeBuffers(buffers, super::writeFileChannel)); + super.close(); + } + } + + static ContainerCommandRequestProto closeBuffers( + Buffers buffers, WriteMethod writeMethod) throws IOException { + final ReferenceCountedObject ref = buffers.pollAll(); + final ByteBuf buf = ref.retain(); + final ContainerCommandRequestProto putBlockRequest; + try { + putBlockRequest = readPutBlockRequest(buf); + // write the remaining data + writeFully(buf.nioBuffer(), writeMethod); + } finally { + ref.release(); + } + return putBlockRequest; + } + + private static int readProtoLength(ByteBuf b, int lengthIndex) { + final int readerIndex = b.readerIndex(); + LOG.debug("{}, lengthIndex = {}, readerIndex = {}", + b, lengthIndex, readerIndex); + if (lengthIndex > readerIndex) { + b.readerIndex(lengthIndex); + } else { + Preconditions.checkState(lengthIndex == readerIndex); + } + RatisHelper.debug(b, "readProtoLength", LOG); + return b.nioBuffer().getInt(); + } + + static ContainerCommandRequestProto readPutBlockRequest(ByteBuf b) + throws IOException { + // readerIndex protoIndex lengthIndex readerIndex+readableBytes + // V V V V + // format: |--- data ---|--- proto ---|--- proto length (4 bytes) ---| + final int readerIndex = b.readerIndex(); + final int lengthIndex = readerIndex + b.readableBytes() - 4; + final int protoLength = readProtoLength(b.duplicate(), lengthIndex); + final int protoIndex = lengthIndex - protoLength; + + final ContainerCommandRequestProto proto; + try { + proto = readPutBlockRequest(b.slice(protoIndex, protoLength).nioBuffer()); + } catch (Throwable t) { + RatisHelper.debug(b, "catch", LOG); + throw new IOException("Failed to readPutBlockRequest from " + b + + ": readerIndex=" + readerIndex + + ", protoIndex=" + protoIndex + + ", protoLength=" + protoLength + + ", lengthIndex=" + lengthIndex, t); + } + + // set index for reading data + b.writerIndex(protoIndex); + + return proto; + } + + private static ContainerCommandRequestProto readPutBlockRequest(ByteBuffer b) + throws IOException { + RatisHelper.debug(b, "readPutBlockRequest", LOG); + final ByteString byteString = ByteString.copyFrom(b); + + final ContainerCommandRequestProto request = + ContainerCommandRequestMessage.toProto(byteString, null); + + if (!request.hasPutBlock()) { + throw new StorageContainerException( + "Malformed PutBlock request. trace ID: " + request.getTraceID(), + ContainerProtos.Result.MALFORMED_REQUEST); + } + return request; + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/StreamDataChannelBase.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/StreamDataChannelBase.java new file mode 100644 index 000000000000..982903324848 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/StreamDataChannelBase.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue.impl; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.ratis.statemachine.StateMachine; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; + +/** + * For write state machine data. + */ +abstract class StreamDataChannelBase implements StateMachine.DataChannel { + private final RandomAccessFile randomAccessFile; + + private final File file; + + private final ContainerData containerData; + private final ContainerMetrics metrics; + + StreamDataChannelBase(File file, ContainerData containerData, + ContainerMetrics metrics) + throws StorageContainerException { + try { + this.file = file; + this.randomAccessFile = new RandomAccessFile(file, "rw"); + } catch (FileNotFoundException e) { + throw new StorageContainerException("BlockFile not exists with " + + "container Id " + containerData.getContainerID() + + " file " + file.getAbsolutePath(), + ContainerProtos.Result.IO_EXCEPTION); + } + this.containerData = containerData; + this.metrics = metrics; + } + + abstract ContainerProtos.Type getType(); + + private FileChannel getChannel() { + return randomAccessFile.getChannel(); + } + + @Override + public final void force(boolean metadata) throws IOException { + getChannel().force(metadata); + } + + @Override + public final boolean isOpen() { + return getChannel().isOpen(); + } + + @Override + public void close() throws IOException { + randomAccessFile.close(); + } + + final int writeFileChannel(ByteBuffer src) throws IOException { + final int writeBytes = getChannel().write(src); + metrics.incContainerBytesStats(getType(), writeBytes); + containerData.updateWriteStats(writeBytes, false); + return writeBytes; + } + + @Override + public String toString() { + return getClass().getSimpleName() + "{" + + "File=" + file.getAbsolutePath() + + ", containerID=" + containerData.getContainerID() + + '}'; + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java index 15ff9d6b9d61..7a64f076281b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java @@ -25,9 +25,11 @@ import org.apache.hadoop.ozone.common.ChunkBuffer; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; +import org.apache.ratis.statemachine.StateMachine; import java.io.IOException; import java.nio.ByteBuffer; @@ -104,6 +106,17 @@ default void finishWriteChunks(KeyValueContainer kvContainer, // no-op } + default String streamInit(Container container, BlockID blockID) + throws StorageContainerException { + return null; + } + + default StateMachine.DataChannel getStreamDataChannel( + Container container, BlockID blockID, ContainerMetrics metrics) + throws StorageContainerException { + return null; + } + static long getBufferCapacityForChunkRead(ChunkInfo chunkInfo, long defaultReadBufferCapacity) { long bufferCapacity = 0; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java index c8cffc380ecf..0f8b80cf0dfe 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java @@ -440,7 +440,8 @@ public ContainerBackgroundTaskResult deleteViaSchema3( Deleter schema3Deleter = (table, batch, tid) -> { Table delTxTable = (Table) table; - delTxTable.deleteWithBatch(batch, containerData.deleteTxnKey(tid)); + delTxTable.deleteWithBatch(batch, + containerData.getDeleteTxnKey(tid)); }; Table deleteTxns = ((DeleteTransactionStore) meta.getStore()) @@ -502,7 +503,7 @@ private ContainerBackgroundTaskResult deleteViaTransactionStore( deleter.apply(deleteTxns, batch, delTx.getTxID()); for (Long blk : delTx.getLocalIDList()) { blockDataTable.deleteWithBatch(batch, - containerData.blockKey(blk)); + containerData.getBlockKey(blk)); } } @@ -550,7 +551,7 @@ private DeleteTransactionStats deleteTransactions( long bytesReleased = 0; for (DeletedBlocksTransaction entry : delBlocks) { for (Long blkLong : entry.getLocalIDList()) { - String blk = containerData.blockKey(blkLong); + String blk = containerData.getBlockKey(blkLong); BlockData blkInfo = blockDataTable.get(blk); LOG.debug("Deleting block {}", blkLong); if (blkInfo == null) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java index 171303dc0b4d..4087483d723f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java @@ -156,8 +156,7 @@ public void closeContainer(final long containerId) throws IOException { public Container importContainer( final ContainerData containerData, final InputStream rawContainerStream, - final TarContainerPacker packer) - throws IOException { + final TarContainerPacker packer) throws IOException { return handlers.get(containerData.getContainerType()) .importContainer(containerData, rawContainerStream, packer); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerDownloader.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerDownloader.java index cd4ebffd6163..45c70f8e72b5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerDownloader.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerDownloader.java @@ -34,6 +34,6 @@ public interface ContainerDownloader extends Closeable { Path getContainerDataFromReplicas(long containerId, - List sources); + List sources, Path downloadDir); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerReplicationSource.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerReplicationSource.java index 69582f799f8b..72f32b208932 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerReplicationSource.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerReplicationSource.java @@ -40,10 +40,11 @@ public interface ContainerReplicationSource { * Copy the container data to an output stream. * * @param containerId Container to replicate - * @param destination The destination stream to copy all the container data. + * @param destination The destination stream to copy all the container data. + * @param compression Compression algorithm. * @throws IOException */ - void copyData(long containerId, OutputStream destination) + void copyData(long containerId, OutputStream destination, String compression) throws IOException; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/CopyContainerCompression.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/CopyContainerCompression.java new file mode 100644 index 000000000000..67f0d6fc8548 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/CopyContainerCompression.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.replication; + +import com.google.common.collect.ImmutableMap; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +import java.util.HashMap; +import java.util.Map; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPLICATION_COMPRESSION; + +/** + * Defines compression algorithm for container replication. + */ +public enum CopyContainerCompression { + + NO_COMPRESSION, + GZIP, + LZ4, + SNAPPY, + ZSTD; + + private static final Logger LOG = + LoggerFactory.getLogger(CopyContainerCompression.class); + + private static final CopyContainerCompression DEFAULT_COMPRESSION = + CopyContainerCompression.NO_COMPRESSION; + private static final Map + COMPRESSION_MAPPING = ImmutableMap.copyOf(getMapping()); + + private static Map getMapping() { + return new HashMap() { { + put(NO_COMPRESSION, "no_compression"); + put(GZIP, CompressorStreamFactory.GZIP); + put(LZ4, CompressorStreamFactory.LZ4_FRAMED); + put(SNAPPY, CompressorStreamFactory.SNAPPY_FRAMED); + put(ZSTD, CompressorStreamFactory.ZSTANDARD); + }}; + } + + public static Map getCompressionMapping() { + return COMPRESSION_MAPPING; + } + + public static CopyContainerCompression getConf(ConfigurationSource conf) { + try { + return conf.getEnum(HDDS_CONTAINER_REPLICATION_COMPRESSION, + DEFAULT_COMPRESSION); + } catch (IllegalArgumentException e) { + LOG.warn("Unsupported compression codec. Skip compression."); + return DEFAULT_COMPRESSION; + } + } + + public static CopyContainerCompression getDefaultCompression() { + return NO_COMPRESSION; + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/DownloadAndImportReplicator.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/DownloadAndImportReplicator.java index c97bd27e93ab..c1d99c9148dd 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/DownloadAndImportReplicator.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/DownloadAndImportReplicator.java @@ -21,13 +21,22 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.List; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; +import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; import org.apache.hadoop.ozone.container.replication.ReplicationTask.Status; @@ -35,6 +44,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_CHOOSING_POLICY; + /** * Default replication implementation. *

@@ -46,36 +57,59 @@ public class DownloadAndImportReplicator implements ContainerReplicator { public static final Logger LOG = LoggerFactory.getLogger(DownloadAndImportReplicator.class); - private final ContainerSet containerSet; + public static final String CONTAINER_COPY_DIR = "container-copy"; + public static final String CONTAINER_COPY_TMP_DIR = "tmp"; + private final ContainerSet containerSet; private final ContainerController controller; - private final ContainerDownloader downloader; - private final TarContainerPacker packer; + private final MutableVolumeSet volumeSet; + private final VolumeChoosingPolicy volumeChoosingPolicy; + private final long containerSize; public DownloadAndImportReplicator( + ConfigurationSource conf, ContainerSet containerSet, ContainerController controller, ContainerDownloader downloader, - TarContainerPacker packer) { + TarContainerPacker packer, + MutableVolumeSet volumeSet) { this.containerSet = containerSet; this.controller = controller; this.downloader = downloader; this.packer = packer; + this.volumeSet = volumeSet; + try { + this.volumeChoosingPolicy = conf.getClass( + HDDS_DATANODE_VOLUME_CHOOSING_POLICY, RoundRobinVolumeChoosingPolicy + .class, VolumeChoosingPolicy.class).newInstance(); + } catch (Exception e) { + throw new RuntimeException(e); + } + this.containerSize = (long) conf.getStorageSize( + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + } - public void importContainer(long containerID, Path tarFilePath) - throws IOException { + public void importContainer(long containerID, Path tarFilePath, + HddsVolume hddsVolume) throws IOException { + + HddsVolume targetVolume = hddsVolume; + if (targetVolume == null) { + targetVolume = chooseNextVolume(); + } + KeyValueContainerData originalContainerData; try { - ContainerData originalContainerData; - try (FileInputStream tempContainerTarStream = new FileInputStream( + try (FileInputStream tmpContainerTarStream = new FileInputStream( tarFilePath.toFile())) { byte[] containerDescriptorYaml = - packer.unpackContainerDescriptor(tempContainerTarStream); - originalContainerData = ContainerDataYaml.readContainer( - containerDescriptorYaml); + packer.unpackContainerDescriptor(tmpContainerTarStream); + originalContainerData = (KeyValueContainerData) ContainerDataYaml + .readContainer(containerDescriptorYaml); } + originalContainerData.setVolume(targetVolume); try (FileInputStream tempContainerTarStream = new FileInputStream( tarFilePath.toFile())) { @@ -105,26 +139,46 @@ public void replicate(ReplicationTask task) { LOG.info("Starting replication of container {} from {}", containerID, sourceDatanodes); - // Wait for the download. This thread pool is limiting the parallel - // downloads, so it's ok to block here and wait for the full download. - Path path = - downloader.getContainerDataFromReplicas(containerID, sourceDatanodes); - if (path == null) { - task.setStatus(Status.FAILED); - } else { - try { - long bytes = Files.size(path); - LOG.info("Container {} is downloaded with size {}, starting to import.", - containerID, bytes); - task.setTransferredBytes(bytes); - - importContainer(containerID, path); - LOG.info("Container {} is replicated successfully", containerID); - task.setStatus(Status.DONE); - } catch (IOException e) { - LOG.error("Container {} replication was unsuccessful.", containerID, e); + try { + HddsVolume targetVolume = chooseNextVolume(); + // Wait for the download. This thread pool is limiting the parallel + // downloads, so it's ok to block here and wait for the full download. + Path tarFilePath = + downloader.getContainerDataFromReplicas(containerID, sourceDatanodes, + getUntarDirectory(targetVolume)); + if (tarFilePath == null) { task.setStatus(Status.FAILED); + return; } + long bytes = Files.size(tarFilePath); + LOG.info("Container {} is downloaded with size {}, starting to import.", + containerID, bytes); + task.setTransferredBytes(bytes); + + importContainer(containerID, tarFilePath, targetVolume); + + LOG.info("Container {} is replicated successfully", containerID); + task.setStatus(Status.DONE); + } catch (IOException e) { + LOG.error("Container {} replication was unsuccessful.", containerID, e); + task.setStatus(Status.FAILED); } } + + private HddsVolume chooseNextVolume() throws IOException { + // Choose volume that can hold both container in tmp and dest directory + return volumeChoosingPolicy.chooseVolume( + StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), + containerSize * 2); + } + + public static Path getUntarDirectory(HddsVolume hddsVolume) + throws IOException { + return Paths.get(hddsVolume.getVolumeRootDir()) + .resolve(CONTAINER_COPY_TMP_DIR).resolve(CONTAINER_COPY_DIR); + } + + private List getHddsVolumesList() { + return StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java index 023b251a524f..b8c2c6fbb473 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java @@ -27,16 +27,18 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.CopyContainerRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.CopyContainerResponseProto; import org.apache.hadoop.hdds.protocol.datanode.proto.IntraDatanodeProtocolServiceGrpc; import org.apache.hadoop.hdds.protocol.datanode.proto.IntraDatanodeProtocolServiceGrpc.IntraDatanodeProtocolServiceStub; +import org.apache.hadoop.hdds.security.ssl.KeyStoresFactory; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.hdds.utils.HAUtils; import org.apache.hadoop.ozone.OzoneConsts; import com.google.common.base.Preconditions; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.ratis.thirdparty.io.grpc.ManagedChannel; import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts; import org.apache.ratis.thirdparty.io.grpc.netty.NettyChannelBuilder; @@ -60,10 +62,13 @@ public class GrpcReplicationClient implements AutoCloseable { private final Path workingDirectory; + private final ContainerProtos.CopyContainerCompressProto compression; + public GrpcReplicationClient( String host, int port, Path workingDir, - SecurityConfig secConfig, CertificateClient certClient - ) throws IOException { + SecurityConfig secConfig, CertificateClient certClient, + String compression) + throws IOException { NettyChannelBuilder channelBuilder = NettyChannelBuilder.forAddress(host, port) .usePlaintext() @@ -74,12 +79,11 @@ public GrpcReplicationClient( SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient(); if (certClient != null) { + KeyStoresFactory factory = certClient.getClientKeyStoresFactory(); sslContextBuilder - .trustManager(HAUtils.buildCAX509List(certClient, - secConfig.getConfiguration())) + .trustManager(factory.getTrustManagers()[0]) .clientAuth(ClientAuth.REQUIRE) - .keyManager(certClient.getPrivateKey(), - certClient.getCertificate()); + .keyManager(factory.getKeyManagers()[0]); } if (secConfig.useTestCert()) { channelBuilder.overrideAuthority("localhost"); @@ -89,6 +93,8 @@ public GrpcReplicationClient( channel = channelBuilder.build(); client = IntraDatanodeProtocolServiceGrpc.newStub(channel); workingDirectory = workingDir; + this.compression = + ContainerProtos.CopyContainerCompressProto.valueOf(compression); } public CompletableFuture download(long containerId) { @@ -97,12 +103,13 @@ public CompletableFuture download(long containerId) { .setContainerID(containerId) .setLen(-1) .setReadOffset(0) + .setCompression(compression) .build(); CompletableFuture response = new CompletableFuture<>(); - Path destinationPath = - getWorkingDirectory().resolve("container-" + containerId + ".tar.gz"); + Path destinationPath = getWorkingDirectory() + .resolve(ContainerUtils.getContainerTarGzName(containerId)); client.download(request, new StreamDownloader(containerId, response, destinationPath)); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationService.java index 60897a5db897..7246ae99f0e5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationService.java @@ -49,11 +49,15 @@ public GrpcReplicationService(ContainerReplicationSource source) { public void download(CopyContainerRequestProto request, StreamObserver responseObserver) { long containerID = request.getContainerID(); - LOG.info("Streaming container data ({}) to other datanode", containerID); + String compression = request.hasCompression() ? + request.getCompression().toString() : CopyContainerCompression + .getDefaultCompression().toString(); + LOG.info("Streaming container data ({}) to other datanode " + + "with compression {}", containerID, compression); try { GrpcOutputStream outputStream = new GrpcOutputStream(responseObserver, containerID, BUFFER_SIZE); - source.copyData(containerID, outputStream); + source.copyData(containerID, outputStream, compression); } catch (IOException e) { LOG.error("Error streaming container {}", containerID, e); responseObserver.onError(e); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/OnDemandContainerReplicationSource.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/OnDemandContainerReplicationSource.java index 58485ef12f96..d17e29b0970a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/OnDemandContainerReplicationSource.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/OnDemandContainerReplicationSource.java @@ -19,6 +19,8 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.HashMap; +import java.util.Map; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.container.common.interfaces.Container; @@ -37,11 +39,16 @@ public class OnDemandContainerReplicationSource private final ContainerController controller; - private final TarContainerPacker packer = new TarContainerPacker(); + private Map packer = new HashMap<>(); public OnDemandContainerReplicationSource( ContainerController controller) { this.controller = controller; + for (Map.Entry entry : + CopyContainerCompression.getCompressionMapping().entrySet()) { + packer.put( + entry.getKey().toString(), new TarContainerPacker(entry.getValue())); + } } @Override @@ -50,7 +57,8 @@ public void prepare(long containerId) { } @Override - public void copyData(long containerId, OutputStream destination) + public void copyData(long containerId, OutputStream destination, + String compression) throws IOException { Container container = controller.getContainer(containerId); @@ -60,8 +68,13 @@ public void copyData(long containerId, OutputStream destination) " is not found.", CONTAINER_NOT_FOUND); } + if (!packer.containsKey(compression)) { + throw new IOException("Can't compress the container. Compression " + + compression + " is not found."); + } controller.exportContainer( - container.getContainerType(), containerId, destination, packer); + container.getContainerType(), containerId, destination, + packer.get(compression)); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationServer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationServer.java index bf8d6f102565..fcad690d4f15 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationServer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationServer.java @@ -27,7 +27,6 @@ import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.tracing.GrpcServerInterceptor; -import org.apache.hadoop.hdds.utils.HAUtils; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; @@ -61,12 +60,9 @@ public class ReplicationServer { private int port; - public ReplicationServer( - ContainerController controller, - ReplicationConfig replicationConfig, - SecurityConfig secConf, - CertificateClient caClient - ) { + public ReplicationServer(ContainerController controller, + ReplicationConfig replicationConfig, SecurityConfig secConf, + CertificateClient caClient) { this.secConf = secConf; this.caClient = caClient; this.controller = controller; @@ -81,17 +77,17 @@ public void init() { new OnDemandContainerReplicationSource(controller) ), new GrpcServerInterceptor())); - if (secConf.isSecurityEnabled()) { + if (secConf.isSecurityEnabled() && secConf.isGrpcTlsEnabled()) { try { SslContextBuilder sslContextBuilder = SslContextBuilder.forServer( - caClient.getPrivateKey(), caClient.getCertificate()); + caClient.getServerKeyStoresFactory().getKeyManagers()[0]); sslContextBuilder = GrpcSslContexts.configure( sslContextBuilder, secConf.getGrpcSslProvider()); sslContextBuilder.clientAuth(ClientAuth.REQUIRE); - sslContextBuilder.trustManager(HAUtils.buildCAX509List(caClient, - secConf.getConfiguration())); + sslContextBuilder.trustManager( + caClient.getServerKeyStoresFactory().getTrustManagers()[0]); nettyServerBuilder.sslContext(sslContextBuilder.build()); } catch (IOException ex) { @@ -106,14 +102,8 @@ public void init() { public void start() throws IOException { server.start(); - - if (port == 0) { - LOG.info("{} is started using port {}", getClass().getSimpleName(), - server.getPort()); - } - port = server.getPort(); - + LOG.info("{} is started using port {}", getClass().getSimpleName(), port); } public void stop() { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java index 5432656e0363..3f612738852c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.ozone.container.replication; +import java.time.Clock; +import java.util.OptionalLong; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap.KeySetView; import java.util.concurrent.ExecutorService; @@ -49,10 +51,12 @@ public class ReplicationSupervisor { private final ContainerReplicator replicator; private final ExecutorService executor; private final StateContext context; + private final Clock clock; private final AtomicLong requestCounter = new AtomicLong(); private final AtomicLong successCounter = new AtomicLong(); private final AtomicLong failureCounter = new AtomicLong(); + private final AtomicLong timeoutCounter = new AtomicLong(); /** * A set of container IDs that are currently being downloaded @@ -64,35 +68,27 @@ public class ReplicationSupervisor { @VisibleForTesting ReplicationSupervisor( ContainerSet containerSet, StateContext context, - ContainerReplicator replicator, ExecutorService executor) { + ContainerReplicator replicator, ExecutorService executor, + Clock clock) { this.containerSet = containerSet; this.replicator = replicator; this.containersInFlight = ConcurrentHashMap.newKeySet(); this.executor = executor; this.context = context; + this.clock = clock; } public ReplicationSupervisor( ContainerSet containerSet, StateContext context, - ContainerReplicator replicator, ReplicationConfig replicationConfig) { - this(containerSet, context, replicator, - replicationConfig.getReplicationMaxStreams()); - } - - public ReplicationSupervisor( - ContainerSet containerSet, StateContext context, - ContainerReplicator replicator, int poolSize) { + ContainerReplicator replicator, ReplicationConfig replicationConfig, + Clock clock) { this(containerSet, context, replicator, new ThreadPoolExecutor( - poolSize, poolSize, 60, TimeUnit.SECONDS, + replicationConfig.getReplicationMaxStreams(), + replicationConfig.getReplicationMaxStreams(), 60, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("ContainerReplicationThread-%d") - .build())); - } - - public ReplicationSupervisor(ContainerSet containerSet, - ContainerReplicator replicator, int poolSize) { - this(containerSet, null, replicator, poolSize); + .build()), clock); } /** @@ -148,13 +144,28 @@ public void run() { try { requestCounter.incrementAndGet(); + if (task.getDeadline() > 0 && clock.millis() > task.getDeadline()) { + LOG.info("Ignoring" + + " {} since the current time {}ms is past the deadline {}ms", + this, clock.millis(), task.getDeadline()); + timeoutCounter.incrementAndGet(); + return; + } + if (context != null) { DatanodeDetails dn = context.getParent().getDatanodeDetails(); - if (dn.getPersistedOpState() != + if (dn != null && dn.getPersistedOpState() != HddsProtos.NodeOperationalState.IN_SERVICE) { - LOG.info("Dn is of {} state. Ignore this replicate container " + - "command for container {}", dn.getPersistedOpState(), - containerId); + LOG.info("Dn is of {} state. Ignore {}", + dn.getPersistedOpState(), this); + return; + } + + final OptionalLong currentTerm = context.getTermOfLeaderSCM(); + final long taskTerm = task.getTerm(); + if (currentTerm.isPresent() && taskTerm < currentTerm.getAsLong()) { + LOG.info("Ignoring {} since SCM leader has new term ({} < {})", + this, taskTerm, currentTerm.getAsLong()); return; } } @@ -185,6 +196,12 @@ public void run() { containersInFlight.remove(containerId); } } + + @Override + public String toString() { + return "replicate container command for container " + + task.getContainerId(); + } } public long getReplicationRequestCount() { @@ -206,4 +223,9 @@ public long getReplicationSuccessCount() { public long getReplicationFailureCount() { return failureCounter.get(); } + + public long getReplicationTimeoutCount() { + return timeoutCounter.get(); + } + } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java index df48abda4f3c..0576308bd2e0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java @@ -66,6 +66,9 @@ public void getMetrics(MetricsCollector collector, boolean all) { supervisor.getQueueSize()) .addGauge(Interns.info("numRequestedReplications", "Number of requested replications"), - supervisor.getReplicationRequestCount()); + supervisor.getReplicationRequestCount()) + .addGauge(Interns.info("numTimeoutReplications", + "Number of replication requests timed out before being processed"), + supervisor.getReplicationTimeoutCount()); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationTask.java index e6e0d0526b5b..7c57a73b336b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationTask.java @@ -22,6 +22,7 @@ import java.util.Objects; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; /** * The task to download a container from the sources. @@ -32,21 +33,42 @@ public class ReplicationTask { private final long containerId; - private List sources; + private final List sources; private final Instant queued = Instant.now(); + private final long deadlineMsSinceEpoch; + + private final long term; + /** * Counter for the transferred bytes. */ private long transferredBytes; - public ReplicationTask( + public ReplicationTask(ReplicateContainerCommand cmd) { + this.containerId = cmd.getContainerID(); + this.sources = cmd.getSourceDatanodes(); + this.deadlineMsSinceEpoch = cmd.getDeadline(); + this.term = cmd.getTerm(); + } + + /** + * Intended to only be used in tests. + */ + protected ReplicationTask( long containerId, List sources ) { - this.containerId = containerId; - this.sources = sources; + this(new ReplicateContainerCommand(containerId, sources)); + } + + /** + * Returns any deadline set on this task, in milliseconds since the epoch. + * A returned value of zero indicates no deadline. + */ + public long getDeadline() { + return deadlineMsSinceEpoch; } @Override @@ -104,6 +126,10 @@ public void setTransferredBytes(long transferredBytes) { this.transferredBytes = transferredBytes; } + long getTerm() { + return term; + } + /** * Status of the replication. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java index 8e661635feb6..329fd64fa2f4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java @@ -32,12 +32,14 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.ozone.OzoneConfigKeys; import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.ozone.container.replication.DownloadAndImportReplicator.CONTAINER_COPY_DIR; + + /** * Simple ContainerDownloaderImplementation to download the missing container * from the first available datanode. @@ -47,32 +49,29 @@ */ public class SimpleContainerDownloader implements ContainerDownloader { - private static final Logger LOG = + public static final Logger LOG = LoggerFactory.getLogger(SimpleContainerDownloader.class); - private final Path workingDirectory; private final SecurityConfig securityConfig; private final CertificateClient certClient; + private final String compression; public SimpleContainerDownloader( ConfigurationSource conf, CertificateClient certClient) { - - String workDirString = - conf.get(OzoneConfigKeys.OZONE_CONTAINER_COPY_WORKDIR); - - if (workDirString == null) { - workingDirectory = Paths.get(System.getProperty("java.io.tmpdir")) - .resolve("container-copy"); - } else { - workingDirectory = Paths.get(workDirString); - } securityConfig = new SecurityConfig(conf); this.certClient = certClient; + this.compression = CopyContainerCompression.getConf(conf).toString(); } @Override public Path getContainerDataFromReplicas( - long containerId, List sourceDatanodes) { + long containerId, List sourceDatanodes, + Path downloadDir) { + + if (downloadDir == null) { + downloadDir = Paths.get(System.getProperty("java.io.tmpdir")) + .resolve(CONTAINER_COPY_DIR); + } final List shuffledDatanodes = shuffleDatanodes(sourceDatanodes); @@ -80,7 +79,7 @@ public Path getContainerDataFromReplicas( for (DatanodeDetails datanode : shuffledDatanodes) { try { CompletableFuture result = - downloadContainer(containerId, datanode); + downloadContainer(containerId, datanode, downloadDir); return result.get(); } catch (ExecutionException | IOException e) { LOG.error("Error on replicating container: {} from {}/{}", containerId, @@ -113,12 +112,14 @@ protected List shuffleDatanodes( @VisibleForTesting protected CompletableFuture downloadContainer( - long containerId, DatanodeDetails datanode) throws IOException { + long containerId, DatanodeDetails datanode, Path downloadDir) + throws IOException { CompletableFuture result; GrpcReplicationClient grpcReplicationClient = new GrpcReplicationClient(datanode.getIpAddress(), datanode.getPort(Name.REPLICATION).getValue(), - workingDirectory, securityConfig, certClient); + downloadDir, securityConfig, certClient, compression); + result = grpcReplicationClient.download(containerId) .whenComplete((r, ex) -> { try { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java index ded0464ef4ba..edc551688ea6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java @@ -79,4 +79,14 @@ public long getContainerID() { public PipelineID getPipelineID() { return pipelineID; } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getType()) + .append(": containerID: ").append(getContainerID()) + .append(", pipelineID: ").append(getPipelineID()) + .append(", force: ").append(force); + return sb.toString(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DeleteContainerCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DeleteContainerCommand.java index bb9035f2961e..6b76803dbf8a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DeleteContainerCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DeleteContainerCommand.java @@ -105,4 +105,14 @@ public static DeleteContainerCommand getFromProtobuf( public int getReplicaIndex() { return replicaIndex; } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getType()) + .append(": containerID: ").append(getContainerID()) + .append(", replicaIndex: ").append(getReplicaIndex()) + .append(", force: ").append(force); + return sb.toString(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java index 203e5e6bed07..f12b976c4d31 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReconstructECContainersCommand.java @@ -134,6 +134,21 @@ public ECReplicationConfig getEcReplicationConfig() { return ecReplicationConfig; } + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getType()) + .append(": containerID: ").append(containerID) + .append(", replicationConfig: ").append(ecReplicationConfig) + .append(", sources: [").append(getSources().stream() + .map(a -> a.dnDetails + + " replicaIndex: " + a.getReplicaIndex()) + .collect(Collectors.joining(", "))).append("]") + .append(", targets: ").append(getTargetDatanodes()) + .append(", missingIndexes: ").append( + Arrays.toString(missingContainerIndexes)); + return sb.toString(); + } /** * To store the datanode details with replica index. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReplicateContainerCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReplicateContainerCommand.java index f824557778fb..3f5959a28182 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReplicateContainerCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReplicateContainerCommand.java @@ -109,4 +109,14 @@ public List getSourceDatanodes() { public int getReplicaIndex() { return replicaIndex; } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getType()); + sb.append(": containerId: ").append(getContainerID()); + sb.append(", replicaIndex: ").append(getReplicaIndex()); + sb.append(", sourceNodes: ").append(sourceDatanodes); + return sb.toString(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SCMCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SCMCommand.java index 744118e3013f..ab214ef2f670 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SCMCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SCMCommand.java @@ -38,6 +38,8 @@ public abstract class SCMCommand implements private String encodedToken = ""; + private long deadlineMsSinceEpoch = 0; + SCMCommand() { this.id = HddsIdFactory.getLongId(); } @@ -88,4 +90,37 @@ public String getEncodedToken() { public void setEncodedToken(String encodedToken) { this.encodedToken = encodedToken; } + + /** + * Allows a deadline to be set on the command. The deadline is set as the + * milliseconds since the epoch when the command must have been completed by. + * It is up to the code processing the command to enforce the deadline by + * calling the hasExpired() method, and the code sending the command to set + * the deadline. The default deadline is zero, which means no deadline. + * @param deadlineMs The ms since epoch when the command must have completed + * by. + */ + public void setDeadline(long deadlineMs) { + this.deadlineMsSinceEpoch = deadlineMs; + } + + /** + * @return The deadline set for this command, or zero if no command has been + * set. + */ + public long getDeadline() { + return deadlineMsSinceEpoch; + } + + /** + * If a deadline has been set to a non zero value, test if the current time + * passed is beyond the deadline or not. + * @param currentEpochMs current time in milliseconds since the epoch. + * @return false if there is no deadline, or it has not expired. True if the + * set deadline has expired. + */ + public boolean hasExpired(long currentEpochMs) { + return deadlineMsSinceEpoch > 0 && + currentEpochMs > deadlineMsSinceEpoch; + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java index 39e36baae02b..00f9cb0dd92f 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java @@ -21,17 +21,23 @@ import java.security.KeyPair; import java.security.PrivateKey; import java.security.PublicKey; -import java.security.cert.X509Certificate; +import java.security.cert.CertificateExpiredException; +import java.time.Duration; +import java.time.LocalDateTime; import java.util.concurrent.Callable; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.DFSConfigKeysLegacy; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos; +import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.x509.SecurityConfig; -import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.client.DNCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.certificates.utils.SelfSignedCertificate; import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.ozone.test.GenericTestUtils; @@ -39,10 +45,18 @@ import org.apache.hadoop.util.ServicePlugin; import org.apache.commons.io.FileUtils; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_RENEW_GRACE_DURATION; import static org.apache.hadoop.ozone.HddsDatanodeService.getLogger; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; +import static org.mockito.ArgumentMatchers.anyObject; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + import org.bouncycastle.cert.X509CertificateHolder; import org.bouncycastle.pkcs.PKCS10CertificationRequest; +import org.junit.Assert; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -53,7 +67,6 @@ * Test class for {@link HddsDatanodeService}. */ public class TestHddsSecureDatanodeInit { - private static File testDir; private static OzoneConfiguration conf; private static HddsDatanodeService service; @@ -66,8 +79,10 @@ public class TestHddsSecureDatanodeInit { private static CertificateCodec certCodec; private static X509CertificateHolder certHolder; private static final String DN_COMPONENT = DNCertificateClient.COMPONENT_NAME; + private static final int CERT_LIFETIME = 15; // seconds - private CertificateClient client; + private DNCertificateClient client; + private static DatanodeDetails datanodeDetails; @BeforeAll public static void setUp() throws Exception { @@ -82,6 +97,7 @@ public static void setUp() throws Exception { conf.setClass(OzoneConfigKeys.HDDS_DATANODE_PLUGINS_KEY, TestHddsDatanodeService.MockService.class, ServicePlugin.class); + conf.set(HDDS_X509_RENEW_GRACE_DURATION, "PT5S"); // 5s securityConfig = new SecurityConfig(conf); service = HddsDatanodeService.createHddsDatanodeService(args); @@ -91,7 +107,7 @@ public static void setUp() throws Exception { return null; }); callQuietly(() -> { - service.initializeCertificateClient(conf); + service.initializeCertificateClient(service.getCertificateClient()); return null; }); certCodec = new CertificateCodec(securityConfig, DN_COMPONENT); @@ -99,13 +115,10 @@ public static void setUp() throws Exception { dnLogs.clearOutput(); privateKey = service.getCertificateClient().getPrivateKey(); publicKey = service.getCertificateClient().getPublicKey(); - X509Certificate x509Certificate = null; - - x509Certificate = KeyStoreTestUtil.generateCertificate( - "CN=Test", new KeyPair(publicKey, privateKey), 365, - securityConfig.getSignatureAlgo()); - certHolder = new X509CertificateHolder(x509Certificate.getEncoded()); + certHolder = generateX509CertHolder(new KeyPair(publicKey, privateKey), + null, Duration.ofSeconds(CERT_LIFETIME)); + datanodeDetails = MockDatanodeDetails.randomDatanodeDetails(); } @AfterAll @@ -126,8 +139,8 @@ public void setUpDNCertClient() { .getCertificateLocation(DN_COMPONENT).toString(), securityConfig.getCertificateFileName()).toFile()); dnLogs.clearOutput(); - client = new DNCertificateClient(securityConfig, - certHolder.getSerialNumber().toString()); + client = new DNCertificateClient(securityConfig, datanodeDetails, + certHolder.getSerialNumber().toString(), null, null); service.setCertificateClient(client); } @@ -137,7 +150,7 @@ public void testSecureDnStartupCase0() throws Exception { // Case 0: When keypair as well as certificate is missing. Initial keypair // boot-up. Get certificate will fail as no SCM is not running. LambdaTestUtils.intercept(Exception.class, "", - () -> service.initializeCertificateClient(conf)); + () -> service.initializeCertificateClient(client)); Assertions.assertNotNull(client.getPrivateKey()); Assertions.assertNotNull(client.getPublicKey()); @@ -153,7 +166,7 @@ public void testSecureDnStartupCase1() throws Exception { certCodec.writeCertificate(certHolder); LambdaTestUtils.intercept(RuntimeException.class, "DN security" + " initialization failed", - () -> service.initializeCertificateClient(conf)); + () -> service.initializeCertificateClient(client)); Assertions.assertNull(client.getPrivateKey()); Assertions.assertNull(client.getPublicKey()); Assertions.assertNotNull(client.getCertificate()); @@ -167,7 +180,7 @@ public void testSecureDnStartupCase2() throws Exception { keyCodec.writePublicKey(publicKey); LambdaTestUtils.intercept(RuntimeException.class, "DN security" + " initialization failed", - () -> service.initializeCertificateClient(conf)); + () -> service.initializeCertificateClient(client)); Assertions.assertNull(client.getPrivateKey()); Assertions.assertNotNull(client.getPublicKey()); Assertions.assertNull(client.getCertificate()); @@ -182,7 +195,7 @@ public void testSecureDnStartupCase3() throws Exception { certCodec.writeCertificate(certHolder); LambdaTestUtils.intercept(RuntimeException.class, "DN security" + " initialization failed", - () -> service.initializeCertificateClient(conf)); + () -> service.initializeCertificateClient(client)); Assertions.assertNull(client.getPrivateKey()); Assertions.assertNotNull(client.getPublicKey()); Assertions.assertNotNull(client.getCertificate()); @@ -196,7 +209,7 @@ public void testSecureDnStartupCase4() throws Exception { keyCodec.writePrivateKey(privateKey); LambdaTestUtils.intercept(RuntimeException.class, " DN security" + " initialization failed", - () -> service.initializeCertificateClient(conf)); + () -> service.initializeCertificateClient(client)); Assertions.assertNotNull(client.getPrivateKey()); Assertions.assertNull(client.getPublicKey()); Assertions.assertNull(client.getCertificate()); @@ -210,7 +223,7 @@ public void testSecureDnStartupCase5() throws Exception { // Case 5: If private key and certificate is present. certCodec.writeCertificate(certHolder); keyCodec.writePrivateKey(privateKey); - service.initializeCertificateClient(conf); + service.initializeCertificateClient(client); Assertions.assertNotNull(client.getPrivateKey()); Assertions.assertNotNull(client.getPublicKey()); Assertions.assertNotNull(client.getCertificate()); @@ -224,7 +237,7 @@ public void testSecureDnStartupCase6() throws Exception { keyCodec.writePublicKey(publicKey); keyCodec.writePrivateKey(privateKey); LambdaTestUtils.intercept(Exception.class, "", - () -> service.initializeCertificateClient(conf)); + () -> service.initializeCertificateClient(client)); Assertions.assertNotNull(client.getPrivateKey()); Assertions.assertNotNull(client.getPublicKey()); Assertions.assertNull(client.getCertificate()); @@ -239,7 +252,7 @@ public void testSecureDnStartupCase7() throws Exception { keyCodec.writePrivateKey(privateKey); certCodec.writeCertificate(certHolder); - service.initializeCertificateClient(conf); + service.initializeCertificateClient(client); Assertions.assertNotNull(client.getPrivateKey()); Assertions.assertNotNull(client.getPublicKey()); Assertions.assertNotNull(client.getCertificate()); @@ -266,17 +279,175 @@ public void testGetCSR() throws Exception { keyCodec.writePrivateKey(privateKey); service.setCertificateClient(client); PKCS10CertificationRequest csr = - service.getCSR(conf); + client.getCSRBuilder().build(); Assertions.assertNotNull(csr); - csr = service.getCSR(conf); + csr = client.getCSRBuilder().build(); Assertions.assertNotNull(csr); - csr = service.getCSR(conf); + csr = client.getCSRBuilder().build(); Assertions.assertNotNull(csr); - csr = service.getCSR(conf); + csr = client.getCSRBuilder().build(); Assertions.assertNotNull(csr); } + @Test + public void testCertificateRotation() throws Exception { + // save the certificate on dn + certCodec.writeCertificate(certHolder); + + // prepare a mocked scmClient to certificate signing + SCMSecurityProtocolClientSideTranslatorPB scmClient = + mock(SCMSecurityProtocolClientSideTranslatorPB.class); + client.setSecureScmClient(scmClient); + + Duration gracePeriod = securityConfig.getRenewalGracePeriod(); + X509CertificateHolder newCertHolder = generateX509CertHolder(null, + LocalDateTime.now().plus(gracePeriod), + Duration.ofSeconds(CERT_LIFETIME)); + String pemCert = CertificateCodec.getPEMEncodedString(newCertHolder); + SCMSecurityProtocolProtos.SCMGetCertResponseProto responseProto = + SCMSecurityProtocolProtos.SCMGetCertResponseProto + .newBuilder().setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .setX509CACertificate(pemCert) + .setX509RootCACertificate(pemCert) + .build(); + when(scmClient.getDataNodeCertificateChain(anyObject(), anyString())) + .thenReturn(responseProto); + + // check that new cert ID should not equal to current cert ID + String certId = newCertHolder.getSerialNumber().toString(); + Assert.assertFalse(certId.equals( + client.getCertificate().getSerialNumber().toString())); + + // start monitor task to renew key and cert + client.startCertificateMonitor(); + + // check after renew, client will have the new cert ID + GenericTestUtils.waitFor(() -> { + String newCertId = client.getCertificate().getSerialNumber().toString(); + return newCertId.equals(certId); + }, 1000, CERT_LIFETIME * 1000); + PrivateKey privateKey1 = client.getPrivateKey(); + PublicKey publicKey1 = client.getPublicKey(); + String caCertId1 = client.getCACertificate().getSerialNumber().toString(); + String rootCaCertId1 = + client.getRootCACertificate().getSerialNumber().toString(); + + // test the second time certificate rotation, generate a new cert + newCertHolder = generateX509CertHolder(null, null, + Duration.ofSeconds(CERT_LIFETIME)); + pemCert = CertificateCodec.getPEMEncodedString(newCertHolder); + responseProto = SCMSecurityProtocolProtos.SCMGetCertResponseProto + .newBuilder().setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .setX509CACertificate(pemCert) + .setX509RootCACertificate(pemCert) + .build(); + when(scmClient.getDataNodeCertificateChain(anyObject(), anyString())) + .thenReturn(responseProto); + String certId2 = newCertHolder.getSerialNumber().toString(); + + // check after renew, client will have the new cert ID + GenericTestUtils.waitFor(() -> { + String newCertId = client.getCertificate().getSerialNumber().toString(); + return newCertId.equals(certId2); + }, 1000, CERT_LIFETIME * 1000); + Assert.assertFalse(client.getPrivateKey().equals(privateKey1)); + Assert.assertFalse(client.getPublicKey().equals(publicKey1)); + Assert.assertFalse(client.getCACertificate().getSerialNumber() + .toString().equals(caCertId1)); + Assert.assertFalse(client.getRootCACertificate().getSerialNumber() + .toString().equals(rootCaCertId1)); + } + + /** + * Test unexpected SCMGetCertResponseProto returned from SCM. + */ + @Test + public void testCertificateRotationRecoverableFailure() throws Exception { + // save the certificate on dn + certCodec.writeCertificate(certHolder); + + // prepare a mocked scmClient to certificate signing + SCMSecurityProtocolClientSideTranslatorPB scmClient = + mock(SCMSecurityProtocolClientSideTranslatorPB.class); + client.setSecureScmClient(scmClient); + + Duration gracePeriod = securityConfig.getRenewalGracePeriod(); + X509CertificateHolder newCertHolder = generateX509CertHolder(null, + LocalDateTime.now().plus(gracePeriod), + Duration.ofSeconds(CERT_LIFETIME)); + String pemCert = CertificateCodec.getPEMEncodedString(newCertHolder); + // provide an invalid SCMGetCertResponseProto. Without + // setX509CACertificate(pemCert), signAndStoreCert will throw exception. + SCMSecurityProtocolProtos.SCMGetCertResponseProto responseProto = + SCMSecurityProtocolProtos.SCMGetCertResponseProto + .newBuilder().setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .build(); + when(scmClient.getDataNodeCertificateChain(anyObject(), anyString())) + .thenReturn(responseProto); + + // check that new cert ID should not equal to current cert ID + String certId = newCertHolder.getSerialNumber().toString(); + Assert.assertFalse(certId.equals( + client.getCertificate().getSerialNumber().toString())); + + // start monitor task to renew key and cert + client.startCertificateMonitor(); + + // certificate failed to renew, client still hold the old expired cert. + Thread.sleep(CERT_LIFETIME * 1000); + Assert.assertFalse(certId.equals( + client.getCertificate().getSerialNumber().toString())); + try { + client.getCertificate().checkValidity(); + } catch (Exception e) { + Assert.assertTrue(e instanceof CertificateExpiredException); + } + + // provide a new valid SCMGetCertResponseProto + newCertHolder = generateX509CertHolder(null, null, + Duration.ofSeconds(CERT_LIFETIME)); + pemCert = CertificateCodec.getPEMEncodedString(newCertHolder); + responseProto = SCMSecurityProtocolProtos.SCMGetCertResponseProto + .newBuilder().setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .setX509CACertificate(pemCert) + .build(); + when(scmClient.getDataNodeCertificateChain(anyObject(), anyString())) + .thenReturn(responseProto); + String certId2 = newCertHolder.getSerialNumber().toString(); + + // check after renew, client will have the new cert ID + GenericTestUtils.waitFor(() -> { + String newCertId = client.getCertificate().getSerialNumber().toString(); + return newCertId.equals(certId2); + }, 1000, CERT_LIFETIME * 1000); + } + + private static X509CertificateHolder generateX509CertHolder(KeyPair keyPair, + LocalDateTime startDate, Duration certLifetime) throws Exception { + if (keyPair == null) { + keyPair = KeyStoreTestUtil.generateKeyPair("RSA"); + } + LocalDateTime start = startDate == null ? LocalDateTime.now() : startDate; + LocalDateTime end = start.plus(certLifetime); + return SelfSignedCertificate.newBuilder() + .setBeginDate(start) + .setEndDate(end) + .setClusterID("cluster") + .setKey(keyPair) + .setSubject("localhost") + .setConfiguration(conf) + .setScmID("test") + .build(); + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index a27a675a3a36..dfdcef0ee868 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -224,7 +224,8 @@ private void createPendingDeleteBlocksSchema1(int numOfBlocksPerContainer, try (DBHandle metadata = BlockUtils.getDB(data, conf)) { for (int j = 0; j < numOfBlocksPerContainer; j++) { blockID = ContainerTestHelper.getTestBlockID(containerID); - String deleteStateName = data.deletingBlockKey(blockID.getLocalID()); + String deleteStateName = data.getDeletingBlockKey( + blockID.getLocalID()); BlockData kd = new BlockData(blockID); List chunks = Lists.newArrayList(); putChunksInBlock(numOfChunksPerBlock, j, chunks, buffer, chunkManager, @@ -256,7 +257,7 @@ private void createPendingDeleteBlocksViaTxn(int numOfBlocksPerContainer, container, blockID); kd.setChunks(chunks); try (DBHandle metadata = BlockUtils.getDB(data, conf)) { - String blockKey = data.blockKey(blockID.getLocalID()); + String blockKey = data.getBlockKey(blockID.getLocalID()); metadata.getStore().getBlockDataTable().put(blockKey, kd); } catch (IOException exception) { LOG.info("Exception = " + exception); @@ -291,7 +292,7 @@ private void createTxn(KeyValueContainerData data, List containerBlocks, DatanodeStoreSchemaThreeImpl dnStoreThreeImpl = (DatanodeStoreSchemaThreeImpl) ds; dnStoreThreeImpl.getDeleteTransactionTable() - .putWithBatch(batch, data.deleteTxnKey(txnID), dtx); + .putWithBatch(batch, data.getDeleteTxnKey(txnID), dtx); } else { DatanodeStoreSchemaTwoImpl dnStoreTwoImpl = (DatanodeStoreSchemaTwoImpl) ds; @@ -344,12 +345,12 @@ private void updateMetaData(KeyValueContainerData data, container.getContainerData().setBlockCount(numOfBlocksPerContainer); // Set block count, bytes used and pending delete block count. metadata.getStore().getMetadataTable() - .put(data.blockCountKey(), (long) numOfBlocksPerContainer); + .put(data.getBlockCountKey(), (long) numOfBlocksPerContainer); metadata.getStore().getMetadataTable() - .put(data.bytesUsedKey(), + .put(data.getBytesUsedKey(), chunkLength * numOfChunksPerBlock * numOfBlocksPerContainer); metadata.getStore().getMetadataTable() - .put(data.pendingDeleteBlockCountKey(), + .put(data.getPendingDeleteBlockCountKey(), (long) numOfBlocksPerContainer); } catch (IOException exception) { LOG.warn("Meta Data update was not successful for container: " @@ -465,7 +466,7 @@ public void testBlockDeletion() throws Exception { // Ensure there are 3 blocks under deletion and 0 deleted blocks Assert.assertEquals(3, getUnderDeletionBlocksCount(meta, data)); Assert.assertEquals(3, meta.getStore().getMetadataTable() - .get(data.pendingDeleteBlockCountKey()).longValue()); + .get(data.getPendingDeleteBlockCountKey()).longValue()); // Container contains 3 blocks. So, space used by the container // should be greater than zero. @@ -495,9 +496,9 @@ public void testBlockDeletion() throws Exception { // Check finally DB counters. // Not checking bytes used, as handler is a mock call. Assert.assertEquals(0, meta.getStore().getMetadataTable() - .get(data.pendingDeleteBlockCountKey()).longValue()); + .get(data.getPendingDeleteBlockCountKey()).longValue()); Assert.assertEquals(0, - meta.getStore().getMetadataTable().get(data.blockCountKey()) + meta.getStore().getMetadataTable().get(data.getBlockCountKey()) .longValue()); Assert.assertEquals(3, deletingServiceMetrics.getSuccessCount() diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java index 3107e16c2a0a..f2faeaa3a358 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java @@ -81,6 +81,10 @@ public void setUp() throws Exception { TimeUnit.MILLISECONDS); conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_RANDOM_PORT, true); conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_IPC_RANDOM_PORT, true); + conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, + true); + conf.setBoolean( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT, true); serverAddresses = new ArrayList<>(); scmServers = new ArrayList<>(); mockServers = new ArrayList<>(); @@ -215,7 +219,6 @@ public void testDatanodeStateContext() throws IOException, OzoneConfigKeys.DFS_CONTAINER_IPC_PORT_DEFAULT); datanodeDetails.setPort(port); ContainerUtils.writeDatanodeDetailsTo(datanodeDetails, idPath); - try (DatanodeStateMachine stateMachine = new DatanodeStateMachine(datanodeDetails, conf, null, null, null)) { @@ -424,6 +427,8 @@ private DatanodeDetails getNewDatanodeDetails() { DatanodeDetails.Port.Name.RATIS, 0); DatanodeDetails.Port restPort = DatanodeDetails.newPort( DatanodeDetails.Port.Name.REST, 0); + DatanodeDetails.Port streamPort = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.RATIS_DATASTREAM, 0); return DatanodeDetails.newBuilder() .setUuid(UUID.randomUUID()) .setHostName("localhost") @@ -431,6 +436,7 @@ private DatanodeDetails getNewDatanodeDetails() { .addPort(containerPort) .addPort(ratisPort) .addPort(restPort) + .addPort(streamPort) .build(); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestSchemaOneBackwardsCompatibility.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestSchemaOneBackwardsCompatibility.java index 7aab0af64ea8..da9dd88c4176 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestSchemaOneBackwardsCompatibility.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestSchemaOneBackwardsCompatibility.java @@ -239,14 +239,14 @@ public void testReadWithoutMetadata() throws Exception { try (DBHandle db = BlockUtils.getDB(cData, conf)) { Table metadataTable = db.getStore().getMetadataTable(); - metadataTable.delete(cData.blockCountKey()); - assertNull(metadataTable.get(cData.blockCountKey())); + metadataTable.delete(cData.getBlockCountKey()); + assertNull(metadataTable.get(cData.getBlockCountKey())); - metadataTable.delete(cData.bytesUsedKey()); - assertNull(metadataTable.get(cData.bytesUsedKey())); + metadataTable.delete(cData.getBytesUsedKey()); + assertNull(metadataTable.get(cData.getBytesUsedKey())); - metadataTable.delete(cData.pendingDeleteBlockCountKey()); - assertNull(metadataTable.get(cData.pendingDeleteBlockCountKey())); + metadataTable.delete(cData.getPendingDeleteBlockCountKey()); + assertNull(metadataTable.get(cData.getPendingDeleteBlockCountKey())); } // Create a new container data object, and fill in its metadata by @@ -317,7 +317,7 @@ public void testDelete() throws Exception { Table metadataTable = refCountedDB.getStore().getMetadataTable(); assertEquals(expectedRegularBlocks + expectedDeletingBlocks, - (long)metadataTable.get(cData.blockCountKey())); + (long)metadataTable.get(cData.getBlockCountKey())); } } @@ -401,7 +401,7 @@ public void testReadBlockData() throws Exception { // Test encoding keys and decoding database values. for (String blockID: TestDB.BLOCK_IDS) { - String blockKey = cData.blockKey(Long.parseLong(blockID)); + String blockKey = cData.getBlockKey(Long.parseLong(blockID)); BlockData blockData = blockDataTable.get(blockKey); Assert.assertEquals(Long.toString(blockData.getLocalID()), blockID); } @@ -444,7 +444,8 @@ public void testReadDeletingBlockData() throws Exception { refCountedDB.getStore().getBlockDataTable(); for (String blockID: TestDB.DELETING_BLOCK_IDS) { - String blockKey = cData.deletingBlockKey(Long.parseLong(blockID)); + String blockKey = cData.getDeletingBlockKey( + Long.parseLong(blockID)); BlockData blockData = blockDataTable.get(blockKey); Assert.assertEquals(Long.toString(blockData.getLocalID()), blockID); } @@ -464,7 +465,7 @@ public void testReadDeletingBlockData() throws Exception { // Apply the deleting prefix to the saved block IDs so we can compare // them to the retrieved keys. List expectedKeys = TestDB.DELETING_BLOCK_IDS.stream() - .map(key -> cData.deletingBlockKey(Long.parseLong(key))) + .map(key -> cData.getDeletingBlockKey(Long.parseLong(key))) .collect(Collectors.toList()); Assert.assertEquals(expectedKeys, decodedKeys); @@ -496,11 +497,11 @@ public void testReadMetadata() throws Exception { refCountedDB.getStore().getMetadataTable(); Assert.assertEquals(TestDB.KEY_COUNT, - metadataTable.get(cData.blockCountKey()).longValue()); + metadataTable.get(cData.getBlockCountKey()).longValue()); Assert.assertEquals(TestDB.BYTES_USED, - metadataTable.get(cData.bytesUsedKey()).longValue()); + metadataTable.get(cData.getBytesUsedKey()).longValue()); Assert.assertEquals(TestDB.NUM_PENDING_DELETION_BLOCKS, - metadataTable.get(cData.pendingDeleteBlockCountKey()) + metadataTable.get(cData.getPendingDeleteBlockCountKey()) .longValue()); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestSchemaTwoBackwardsCompatibility.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestSchemaTwoBackwardsCompatibility.java index d2741dc9cd02..3624309d9d4d 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestSchemaTwoBackwardsCompatibility.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestSchemaTwoBackwardsCompatibility.java @@ -317,9 +317,9 @@ private KeyValueContainer createTestContainer() throws IOException { // update delete related metadata db.getStore().getMetadataTable().putWithBatch(batch, - cData.latestDeleteTxnKey(), txn.getTxID()); + cData.getLatestDeleteTxnKey(), txn.getTxID()); db.getStore().getMetadataTable().putWithBatch(batch, - cData.pendingDeleteBlockCountKey(), + cData.getPendingDeleteBlockCountKey(), cData.getNumPendingDeletionBlocks() + BLOCKS_PER_TXN); db.getStore().getBatchHandler().commitBatchOperation(batch); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java index 6ae0461fc904..51b929eb2b45 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java @@ -24,6 +24,7 @@ import org.apache.hadoop.ozone.common.ChunkBuffer; import org.junit.jupiter.api.Test; +import java.io.IOException; import java.nio.ByteBuffer; import static java.nio.charset.StandardCharsets.UTF_8; @@ -57,4 +58,13 @@ public void redactsDataBuffers() { assertEquals("", dataBuffers.getBuffers(0).toString(UTF_8)); } + @Test + public void testTarGzName() throws IOException { + long containerId = 100; + String tarGzName = "container-100.tar.gz"; + assertEquals(tarGzName, ContainerUtils.getContainerTarGzName(containerId)); + + assertEquals(containerId, + ContainerUtils.retrieveContainerIdFromTarGzName(tarGzName)); + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java index 5c5ee4e705c9..331d9f2bd961 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java @@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.container.common.statemachine; import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService; +import static java.util.Collections.emptyList; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ClosePipelineInfo; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerAction.Action.CLOSE; import static org.apache.ozone.test.GenericTestUtils.waitFor; @@ -37,6 +38,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.OptionalLong; import java.util.concurrent.Callable; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executors; @@ -62,6 +64,7 @@ import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand; import org.apache.hadoop.ozone.protocol.commands.ClosePipelineCommand; import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.ozone.test.LambdaTestUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -137,19 +140,7 @@ public void testPutBackReports() { @Test public void testReportQueueWithAddReports() throws IOException { - OzoneConfiguration conf = new OzoneConfiguration(); - DatanodeStateMachine datanodeStateMachineMock = - mock(DatanodeStateMachine.class); - OzoneContainer o = mock(OzoneContainer.class); - ContainerSet s = mock(ContainerSet.class); - when(datanodeStateMachineMock.getContainer()).thenReturn(o); - when(o.getContainerSet()).thenReturn(s); - when(s.getContainerReport()) - .thenReturn( - StorageContainerDatanodeProtocolProtos - .ContainerReportsProto.getDefaultInstance()); - StateContext ctx = new StateContext(conf, DatanodeStates.getInitState(), - datanodeStateMachineMock); + StateContext ctx = createSubject(); InetSocketAddress scm1 = new InetSocketAddress("scm1", 9001); ctx.addEndpoint(scm1); InetSocketAddress scm2 = new InetSocketAddress("scm2", 9001); @@ -623,19 +614,7 @@ public void testGetReports() { @Test public void testCommandQueueSummary() throws IOException { - OzoneConfiguration conf = new OzoneConfiguration(); - DatanodeStateMachine datanodeStateMachineMock = - mock(DatanodeStateMachine.class); - OzoneContainer o = mock(OzoneContainer.class); - ContainerSet s = mock(ContainerSet.class); - when(datanodeStateMachineMock.getContainer()).thenReturn(o); - when(o.getContainerSet()).thenReturn(s); - when(s.getContainerReport()) - .thenReturn( - StorageContainerDatanodeProtocolProtos - .ContainerReportsProto.getDefaultInstance()); - StateContext ctx = new StateContext(conf, DatanodeStates.getInitState(), - datanodeStateMachineMock); + StateContext ctx = createSubject(); ctx.addCommand(new ReplicateContainerCommand(1, null)); ctx.addCommand(new ClosePipelineCommand(PipelineID.randomId())); ctx.addCommand(new ReplicateContainerCommand(2, null)); @@ -652,4 +631,58 @@ public void testCommandQueueSummary() throws IOException { summary.get(SCMCommandProto.Type.closeContainerCommand).intValue()); } + @Test + void updatesTermForCommandWithNewerTerm() throws IOException { + final long originalTerm = 1; + final long commandTerm = 2; + StateContext subject = createSubject(); + SCMCommand commandWithNewTerm = someCommand(); + subject.setTermOfLeaderSCM(originalTerm); + commandWithNewTerm.setTerm(commandTerm); + + subject.addCommand(commandWithNewTerm); + + OptionalLong termOfLeaderSCM = subject.getTermOfLeaderSCM(); + assertTrue(termOfLeaderSCM.isPresent()); + assertEquals(commandTerm, termOfLeaderSCM.getAsLong()); + assertEquals(commandWithNewTerm, subject.getNextCommand()); + } + + @Test + void keepsExistingTermForCommandWithOlderTerm() throws IOException { + final long originalTerm = 2; + final long commandTerm = 1; + StateContext subject = createSubject(); + SCMCommand commandWithNewTerm = someCommand(); + subject.setTermOfLeaderSCM(originalTerm); + commandWithNewTerm.setTerm(commandTerm); + + subject.addCommand(commandWithNewTerm); + + OptionalLong termOfLeaderSCM = subject.getTermOfLeaderSCM(); + assertTrue(termOfLeaderSCM.isPresent()); + assertEquals(originalTerm, termOfLeaderSCM.getAsLong()); + assertNull(subject.getNextCommand()); + } + + private static StateContext createSubject() throws IOException { + OzoneConfiguration conf = new OzoneConfiguration(); + DatanodeStateMachine datanodeStateMachineMock = + mock(DatanodeStateMachine.class); + OzoneContainer o = mock(OzoneContainer.class); + ContainerSet s = mock(ContainerSet.class); + when(datanodeStateMachineMock.getContainer()).thenReturn(o); + when(o.getContainerSet()).thenReturn(s); + when(s.getContainerReport()) + .thenReturn( + StorageContainerDatanodeProtocolProtos + .ContainerReportsProto.getDefaultInstance()); + return new StateContext(conf, DatanodeStates.getInitState(), + datanodeStateMachineMock); + } + + private static SCMCommand someCommand() { + return new ReplicateContainerCommand(1, emptyList()); + } + } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerCommandHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerCommandHandler.java new file mode 100644 index 000000000000..2deda906fc24 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerCommandHandler.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.common.statemachine.commandhandler; + +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; +import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; +import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand; +import org.apache.ozone.test.TestClock; +import org.junit.Before; +import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.mockito.Mockito; + +import java.io.IOException; +import java.time.Instant; +import java.time.ZoneId; +import java.util.OptionalLong; + +import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; + +/** + * Test for the DeleteContainerCommandHandler. + */ +public class TestDeleteContainerCommandHandler { + + private TestClock clock; + private OzoneContainer ozoneContainer; + private ContainerController controller; + private StateContext context; + + @Before + public void setup() { + clock = new TestClock(Instant.now(), ZoneId.systemDefault()); + ozoneContainer = mock(OzoneContainer.class); + controller = mock(ContainerController.class); + when(ozoneContainer.getController()).thenReturn(controller); + context = mock(StateContext.class); + when(context.getTermOfLeaderSCM()) + .thenReturn(OptionalLong.of(0)); + } + + @Test + public void testExpiredCommandsAreNotProcessed() throws IOException { + DeleteContainerCommandHandler handler = createSubject(clock); + + DeleteContainerCommand command1 = new DeleteContainerCommand(1L); + command1.setDeadline(clock.millis() + 10000); + DeleteContainerCommand command2 = new DeleteContainerCommand(2L); + command2.setDeadline(clock.millis() + 20000); + DeleteContainerCommand command3 = new DeleteContainerCommand(3L); + // No deadline on the 3rd command + + clock.fastForward(15000); + handler.handle(command1, ozoneContainer, null, null); + Assertions.assertEquals(1, handler.getTimeoutCount()); + handler.handle(command2, ozoneContainer, null, null); + handler.handle(command3, ozoneContainer, null, null); + Assertions.assertEquals(1, handler.getTimeoutCount()); + Assertions.assertEquals(3, handler.getInvocationCount()); + Mockito.verify(controller, times(0)) + .deleteContainer(1L, false); + Mockito.verify(controller, times(1)) + .deleteContainer(2L, false); + Mockito.verify(controller, times(1)) + .deleteContainer(3L, false); + } + + @Test + public void testCommandForCurrentTermIsExecuted() throws IOException { + // GIVEN + DeleteContainerCommand command = new DeleteContainerCommand(1L); + command.setTerm(1); + + when(context.getTermOfLeaderSCM()) + .thenReturn(OptionalLong.of(command.getTerm())); + + DeleteContainerCommandHandler subject = createSubject(); + + // WHEN + subject.handle(command, ozoneContainer, context, null); + + // THEN + Mockito.verify(controller, times(1)) + .deleteContainer(1L, false); + } + + @Test + public void testCommandForOldTermIsDropped() throws IOException { + // GIVEN + DeleteContainerCommand command = new DeleteContainerCommand(1L); + command.setTerm(1); + + when(context.getTermOfLeaderSCM()) + .thenReturn(OptionalLong.of(command.getTerm() + 1)); + + DeleteContainerCommandHandler subject = createSubject(); + + // WHEN + subject.handle(command, ozoneContainer, context, null); + + // THEN + Mockito.verify(controller, never()) + .deleteContainer(1L, false); + } + + private static DeleteContainerCommandHandler createSubject() { + TestClock clock = new TestClock(Instant.now(), ZoneId.systemDefault()); + return createSubject(clock); + } + + private static DeleteContainerCommandHandler createSubject(TestClock clock) { + return new DeleteContainerCommandHandler(clock, newDirectExecutorService()); + } + +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java index eeb373671f5e..a9a933d4c980 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java @@ -28,6 +28,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.OptionalLong; import java.util.UUID; import org.apache.hadoop.hdds.client.ECReplicationConfig; @@ -107,6 +108,7 @@ public void handlesReconstructContainerCommand() throws Exception { @Test public void testheartbeatWithoutReports() throws Exception { + final long termInSCM = 42; StorageContainerDatanodeProtocolClientSideTranslatorPB scm = Mockito.mock( StorageContainerDatanodeProtocolClientSideTranslatorPB.class); @@ -118,9 +120,15 @@ public void testheartbeatWithoutReports() throws Exception { .setDatanodeUUID( ((SCMHeartbeatRequestProto)invocation.getArgument(0)) .getDatanodeDetails().getUuid()) + .setTerm(termInSCM) .build()); - HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask(scm); + OzoneConfiguration conf = new OzoneConfiguration(); + StateContext context = new StateContext(conf, DatanodeStates.RUNNING, + Mockito.mock(DatanodeStateMachine.class)); + context.setTermOfLeaderSCM(1); + HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask( + conf, context, scm); endpointTask.call(); SCMHeartbeatRequestProto heartbeat = argument.getValue(); Assertions.assertTrue(heartbeat.hasDatanodeDetails()); @@ -128,6 +136,9 @@ public void testheartbeatWithoutReports() throws Exception { Assertions.assertFalse(heartbeat.hasContainerReport()); Assertions.assertTrue(heartbeat.getCommandStatusReportsCount() == 0); Assertions.assertFalse(heartbeat.hasContainerActions()); + OptionalLong termInDatanode = context.getTermOfLeaderSCM(); + Assertions.assertTrue(termInDatanode.isPresent()); + Assertions.assertEquals(termInSCM, termInDatanode.getAsLong()); } @Test @@ -314,22 +325,6 @@ public void testheartbeatWithAllReports() throws Exception { } } - /** - * Creates HeartbeatEndpointTask for the given StorageContainerManager proxy. - * - * @param proxy StorageContainerDatanodeProtocolClientSideTranslatorPB - * - * @return HeartbeatEndpointTask - */ - private HeartbeatEndpointTask getHeartbeatEndpointTask( - StorageContainerDatanodeProtocolClientSideTranslatorPB proxy) { - OzoneConfiguration conf = new OzoneConfiguration(); - StateContext context = new StateContext(conf, DatanodeStates.RUNNING, - Mockito.mock(DatanodeStateMachine.class)); - return getHeartbeatEndpointTask(conf, context, proxy); - - } - /** * Creates HeartbeatEndpointTask with the given conf, context and * StorageContainerManager client side proxy. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeIOStatsWithPrometheusSink.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeIOStatsWithPrometheusSink.java index 8600de542430..875f8ce63644 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeIOStatsWithPrometheusSink.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeIOStatsWithPrometheusSink.java @@ -42,7 +42,7 @@ public class TestVolumeIOStatsWithPrometheusSink { public void init() { metrics = DefaultMetricsSystem.instance(); metrics.init("test"); - sink = new PrometheusMetricsSink(); + sink = new PrometheusMetricsSink("random"); metrics.register("Prometheus", "Prometheus", sink); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java index c40ceb2ea36c..9d78605c1f93 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ec/reconstruction/TestECReconstructionSupervisor.java @@ -21,20 +21,41 @@ import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.TestClock; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.mockito.Mockito; import java.io.IOException; +import java.time.Instant; +import java.time.ZoneId; +import java.util.OptionalLong; import java.util.SortedMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeoutException; +import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; + /** * Tests the ECReconstructionSupervisor. */ public class TestECReconstructionSupervisor { + private TestClock clock; + + @BeforeEach + public void setup() { + clock = new TestClock(Instant.now(), ZoneId.systemDefault()); + } + + @Test public void testAddTaskShouldExecuteTheGivenTask() throws InterruptedException, TimeoutException, IOException { @@ -43,7 +64,7 @@ public void testAddTaskShouldExecuteTheGivenTask() ECReconstructionSupervisor supervisor = new ECReconstructionSupervisor(null, null, 5, new ECReconstructionCoordinator(new OzoneConfiguration(), null, - ECReconstructionMetrics.create()) { + null, ECReconstructionMetrics.create()) { @Override public void reconstructECContainerGroup(long containerID, ECReplicationConfig repConfig, @@ -58,15 +79,80 @@ public void reconstructECContainerGroup(long containerID, super.reconstructECContainerGroup(containerID, repConfig, sourceNodeMap, targetNodeMap); } - }) { + }, clock) { }; - supervisor.addTask( - new ECReconstructionCommandInfo(1, new ECReplicationConfig(3, 2), - new byte[0], ImmutableList.of(), ImmutableList.of())); + ReconstructECContainersCommand command = createCommand(1L); + supervisor.addTask(new ECReconstructionCommandInfo(command)); runnableInvoked.await(); Assertions.assertEquals(1, supervisor.getInFlightReplications()); holdProcessing.countDown(); GenericTestUtils .waitFor(() -> supervisor.getInFlightReplications() == 0, 100, 15000); } + + @Test + public void testTasksWithDeadlineExceededAreNotRun() throws IOException { + ECReconstructionCoordinator coordinator = + Mockito.mock(ECReconstructionCoordinator.class); + ECReconstructionSupervisor supervisor = + new ECReconstructionSupervisor(null, null, + newDirectExecutorService(), coordinator, clock); + + ReconstructECContainersCommand command = createCommand(1); + ECReconstructionCommandInfo task1 = + new ECReconstructionCommandInfo(command); + + command = createCommand(2); + command.setDeadline(clock.millis() + 10000); + ECReconstructionCommandInfo task2 = + new ECReconstructionCommandInfo(command); + + command = createCommand(3); + command.setDeadline(clock.millis() + 20000); + ECReconstructionCommandInfo task3 = + new ECReconstructionCommandInfo(command); + + clock.fastForward(15000); + supervisor.addTask(task1); + supervisor.addTask(task2); + supervisor.addTask(task3); + + // No deadline for container 1, it should run. + Mockito.verify(coordinator, times(1)) + .reconstructECContainerGroup(eq(1L), any(), any(), any()); + // Deadline passed for container 2, it should not run. + Mockito.verify(coordinator, times(0)) + .reconstructECContainerGroup(eq(2L), any(), any(), any()); + // Deadline not passed for container 3, it should run. + Mockito.verify(coordinator, times(1)) + .reconstructECContainerGroup(eq(3L), any(), any(), any()); + } + + @Test + void dropsTaskWithObsoleteTerm() throws IOException { + final long commandTerm = 1; + final long currentTerm = 2; + ECReconstructionCoordinator coordinator = + Mockito.mock(ECReconstructionCoordinator.class); + when(coordinator.getTermOfLeaderSCM()) + .thenReturn(OptionalLong.of(currentTerm)); + ECReconstructionSupervisor supervisor = + new ECReconstructionSupervisor(null, null, + newDirectExecutorService(), coordinator, clock); + + ReconstructECContainersCommand command = createCommand(1); + command.setTerm(commandTerm); + supervisor.addTask(new ECReconstructionCommandInfo(command)); + + Mockito.verify(coordinator, times(0)) + .reconstructECContainerGroup(eq(1L), any(), any(), any()); + } + + private static ReconstructECContainersCommand createCommand( + long containerID) { + return new ReconstructECContainersCommand( + containerID, ImmutableList.of(), ImmutableList.of(), new byte[0], + new ECReplicationConfig(3, 2)); + } + } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java index b23600ffd888..62a432e514a6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java @@ -46,6 +46,7 @@ import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil; import org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore; import org.apache.hadoop.ozone.container.metadata.DatanodeStore; +import org.apache.hadoop.ozone.container.replication.CopyContainerCompression; import org.apache.ozone.test.GenericTestUtils; import org.apache.hadoop.util.DiskChecker; @@ -220,83 +221,85 @@ public void testContainerImportExport() throws Exception { //destination path File folderToExport = folder.newFile("exported.tar.gz"); + for (Map.Entry entry : + CopyContainerCompression.getCompressionMapping().entrySet()) { + TarContainerPacker packer = new TarContainerPacker(entry.getValue()); + + //export the container + try (FileOutputStream fos = new FileOutputStream(folderToExport)) { + keyValueContainer + .exportContainerData(fos, packer); + } - TarContainerPacker packer = new TarContainerPacker(); + //delete the original one + keyValueContainer.delete(); - //export the container - try (FileOutputStream fos = new FileOutputStream(folderToExport)) { - keyValueContainer - .exportContainerData(fos, packer); - } + //create a new one + KeyValueContainerData containerData = + new KeyValueContainerData(containerId, + keyValueContainerData.getLayoutVersion(), + keyValueContainerData.getMaxSize(), UUID.randomUUID().toString(), + datanodeId.toString()); + containerData.setSchemaVersion(keyValueContainerData.getSchemaVersion()); + KeyValueContainer container = new KeyValueContainer(containerData, CONF); - //delete the original one - keyValueContainer.delete(); + HddsVolume containerVolume = volumeChoosingPolicy.chooseVolume( + StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), 1); - //create a new one - KeyValueContainerData containerData = - new KeyValueContainerData(containerId, - keyValueContainerData.getLayoutVersion(), - keyValueContainerData.getMaxSize(), UUID.randomUUID().toString(), - datanodeId.toString()); - containerData.setSchemaVersion(keyValueContainerData.getSchemaVersion()); - KeyValueContainer container = new KeyValueContainer(containerData, CONF); - - HddsVolume containerVolume = volumeChoosingPolicy.chooseVolume( - StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), 1); - - container.populatePathFields(scmId, containerVolume); - try (FileInputStream fis = new FileInputStream(folderToExport)) { - container.importContainerData(fis, packer); - } - - assertEquals("value1", containerData.getMetadata().get("key1")); - assertEquals(keyValueContainerData.getContainerDBType(), - containerData.getContainerDBType()); - assertEquals(keyValueContainerData.getState(), - containerData.getState()); - assertEquals(numberOfKeysToWrite, - containerData.getBlockCount()); - assertEquals(keyValueContainerData.getLayoutVersion(), - containerData.getLayoutVersion()); - assertEquals(keyValueContainerData.getMaxSize(), - containerData.getMaxSize()); - assertEquals(keyValueContainerData.getBytesUsed(), - containerData.getBytesUsed()); - - //Can't overwrite existing container - try { + container.populatePathFields(scmId, containerVolume); try (FileInputStream fis = new FileInputStream(folderToExport)) { container.importContainerData(fis, packer); } - fail("Container is imported twice. Previous files are overwritten"); - } catch (IOException ex) { - //all good - assertTrue(container.getContainerFile().exists()); - } - //Import failure should cleanup the container directory - containerData = - new KeyValueContainerData(containerId + 1, - keyValueContainerData.getLayoutVersion(), - keyValueContainerData.getMaxSize(), UUID.randomUUID().toString(), - datanodeId.toString()); - containerData.setSchemaVersion(keyValueContainerData.getSchemaVersion()); - container = new KeyValueContainer(containerData, CONF); - - containerVolume = volumeChoosingPolicy.chooseVolume( - StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), 1); - container.populatePathFields(scmId, containerVolume); - try { - FileInputStream fis = new FileInputStream(folderToExport); - fis.close(); - container.importContainerData(fis, packer); - fail("Container import should fail"); - } catch (Exception ex) { - assertTrue(ex instanceof IOException); - } finally { - File directory = - new File(container.getContainerData().getContainerPath()); - assertFalse(directory.exists()); + assertEquals("value1", containerData.getMetadata().get("key1")); + assertEquals(keyValueContainerData.getContainerDBType(), + containerData.getContainerDBType()); + assertEquals(keyValueContainerData.getState(), + containerData.getState()); + assertEquals(numberOfKeysToWrite, + containerData.getBlockCount()); + assertEquals(keyValueContainerData.getLayoutVersion(), + containerData.getLayoutVersion()); + assertEquals(keyValueContainerData.getMaxSize(), + containerData.getMaxSize()); + assertEquals(keyValueContainerData.getBytesUsed(), + containerData.getBytesUsed()); + + //Can't overwrite existing container + try { + try (FileInputStream fis = new FileInputStream(folderToExport)) { + container.importContainerData(fis, packer); + } + fail("Container is imported twice. Previous files are overwritten"); + } catch (IOException ex) { + //all good + assertTrue(container.getContainerFile().exists()); + } + + //Import failure should cleanup the container directory + containerData = + new KeyValueContainerData(containerId + 1, + keyValueContainerData.getLayoutVersion(), + keyValueContainerData.getMaxSize(), UUID.randomUUID().toString(), + datanodeId.toString()); + containerData.setSchemaVersion(keyValueContainerData.getSchemaVersion()); + container = new KeyValueContainer(containerData, CONF); + + containerVolume = volumeChoosingPolicy.chooseVolume( + StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), 1); + container.populatePathFields(scmId, containerVolume); + try { + FileInputStream fis = new FileInputStream(folderToExport); + fis.close(); + container.importContainerData(fis, packer); + fail("Container import should fail"); + } catch (Exception ex) { + assertTrue(ex instanceof IOException); + } finally { + File directory = + new File(container.getContainerData().getContainerPath()); + assertFalse(directory.exists()); + } } } @@ -330,14 +333,14 @@ private void populate(long numberOfKeysToWrite) throws IOException { metadataStore.getStore().getBlockDataTable(); for (long i = 0; i < numberOfKeysToWrite; i++) { - blockDataTable.put(cData.blockKey(i), + blockDataTable.put(cData.getBlockKey(i), new BlockData(new BlockID(i, i))); } // As now when we put blocks, we increment block count and update in DB. // As for test, we are doing manually so adding key count to DB. metadataStore.getStore().getMetadataTable() - .put(cData.blockCountKey(), numberOfKeysToWrite); + .put(cData.getBlockCountKey(), numberOfKeysToWrite); } Map metadata = new HashMap<>(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerIntegrityChecks.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerIntegrityChecks.java index 51e72839fad9..cf18fa8948db 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerIntegrityChecks.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerIntegrityChecks.java @@ -172,10 +172,10 @@ protected KeyValueContainer createContainerWithBlocks(long containerId, blockData.setChunks(chunkList); // normal key - String key = containerData.blockKey(blockID.getLocalID()); + String key = containerData.getBlockKey(blockID.getLocalID()); if (i >= normalBlocks) { // deleted key - key = containerData.deletingBlockKey(blockID.getLocalID()); + key = containerData.getDeletingBlockKey(blockID.getLocalID()); } metadataStore.getStore().getBlockDataTable().put(key, blockData); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java index 8195e6f4eb1f..aea451bc3a0e 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java @@ -293,8 +293,8 @@ public void setDBBlockAndByteCounts(KeyValueContainerData containerData, try (DBHandle db = BlockUtils.getDB(containerData, getConf())) { Table metadataTable = db.getStore().getMetadataTable(); // Don't care about in memory state. Just change the DB values. - metadataTable.put(containerData.blockCountKey(), blockCount); - metadataTable.put(containerData.bytesUsedKey(), byteCount); + metadataTable.put(containerData.getBlockCountKey(), blockCount); + metadataTable.put(containerData.getBytesUsedKey(), byteCount); } } @@ -303,10 +303,10 @@ public void checkDBBlockAndByteCounts(KeyValueContainerData containerData, try (DBHandle db = BlockUtils.getDB(containerData, getConf())) { Table metadataTable = db.getStore().getMetadataTable(); - long bytesUsed = metadataTable.get(containerData.bytesUsedKey()); + long bytesUsed = metadataTable.get(containerData.getBytesUsedKey()); Assert.assertEquals(expectedBytesUsed, bytesUsed); - long blockCount = metadataTable.get(containerData.blockCountKey()); + long blockCount = metadataTable.get(containerData.getBlockCountKey()); Assert.assertEquals(expectedBlockCount, blockCount); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java index c15841b0018f..9e16fb88bc7e 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java @@ -22,10 +22,13 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.OutputStreamWriter; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -34,18 +37,15 @@ import org.apache.commons.compress.archivers.ArchiveOutputStream; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; -import org.apache.commons.compress.compressors.CompressorOutputStream; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerPacker; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.CompressorException; -import org.apache.commons.compress.compressors.CompressorInputStream; -import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.ozone.container.replication.CopyContainerCompression; import org.apache.ozone.test.LambdaTestUtils; import org.junit.AfterClass; import org.junit.Assert; @@ -55,7 +55,6 @@ import org.junit.runners.Parameterized; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.commons.compress.compressors.CompressorStreamFactory.GZIP; /** * Test the tar/untar for a given container. @@ -73,8 +72,7 @@ public class TestTarContainerPacker { private static final String TEST_DESCRIPTOR_FILE_CONTENT = "descriptor"; - private final ContainerPacker packer - = new TarContainerPacker(); + private TarContainerPacker packer; private static final Path SOURCE_CONTAINER_ROOT = Paths.get("target/test/data/packer-source-dir"); @@ -91,16 +89,29 @@ public class TestTarContainerPacker { private final String schemaVersion; private OzoneConfiguration conf; - public TestTarContainerPacker(ContainerTestVersionInfo versionInfo) { + public TestTarContainerPacker( + ContainerTestVersionInfo versionInfo, String compression) { this.layout = versionInfo.getLayout(); this.schemaVersion = versionInfo.getSchemaVersion(); this.conf = new OzoneConfiguration(); ContainerTestVersionInfo.setTestSchemaVersion(schemaVersion, conf); + packer = new TarContainerPacker(compression); + } @Parameterized.Parameters public static Iterable parameters() { - return ContainerTestVersionInfo.versionParameters(); + List layoutList = + ContainerTestVersionInfo.getLayoutList(); + List parameterList = new ArrayList<>(); + for (ContainerTestVersionInfo containerTestVersionInfo : layoutList) { + for (Map.Entry entry : + CopyContainerCompression.getCompressionMapping().entrySet()) { + parameterList.add( + new Object[]{containerTestVersionInfo, entry.getValue()}); + } + } + return parameterList; } @BeforeClass @@ -125,15 +136,22 @@ private static void initDir(Path path) throws IOException { } private KeyValueContainerData createContainer(Path dir) throws IOException { + return createContainer(dir, true); + } + + private KeyValueContainerData createContainer(Path dir, boolean createDir) + throws IOException { long id = CONTAINER_ID.getAndIncrement(); - Path containerDir = dir.resolve("container" + id); + Path containerDir = dir.resolve(String.valueOf(id)); Path dbDir = containerDir.resolve("db"); - Path dataDir = containerDir.resolve("data"); + Path dataDir = containerDir.resolve("chunks"); Path metaDir = containerDir.resolve("metadata"); - Files.createDirectories(metaDir); - Files.createDirectories(dbDir); - Files.createDirectories(dataDir); + if (createDir) { + Files.createDirectories(metaDir); + Files.createDirectories(dbDir); + Files.createDirectories(dataDir); + } KeyValueContainerData containerData = new KeyValueContainerData( id, layout, @@ -174,8 +192,7 @@ public void pack() throws IOException, CompressorException { //THEN: check the result TarArchiveInputStream tarStream = null; try (FileInputStream input = new FileInputStream(targetFile.toFile())) { - CompressorInputStream uncompressed = new CompressorStreamFactory() - .createCompressorInputStream(GZIP, input); + InputStream uncompressed = packer.decompress(input); tarStream = new TarArchiveInputStream(uncompressed); TarArchiveEntry entry; @@ -201,7 +218,7 @@ public void pack() throws IOException, CompressorException { } KeyValueContainerData destinationContainerData = - createContainer(DEST_CONTAINER_ROOT); + createContainer(DEST_CONTAINER_ROOT, false); KeyValueContainer destinationContainer = new KeyValueContainer(destinationContainerData, conf); @@ -211,7 +228,10 @@ public void pack() throws IOException, CompressorException { //unpackContainerData try (FileInputStream input = new FileInputStream(targetFile.toFile())) { descriptor = - new String(packer.unpackContainerData(destinationContainer, input), + new String(packer.unpackContainerData(destinationContainer, input, + TEMP_DIR, + DEST_CONTAINER_ROOT.resolve(String.valueOf( + destinationContainer.getContainerData().getContainerID()))), UTF_8); } @@ -306,9 +326,10 @@ public void unpackContainerDataWithInvalidRelativeChunkFilePath() private KeyValueContainerData unpackContainerData(File containerFile) throws IOException { try (FileInputStream input = new FileInputStream(containerFile)) { - KeyValueContainerData data = createContainer(DEST_CONTAINER_ROOT); + KeyValueContainerData data = createContainer(DEST_CONTAINER_ROOT, false); KeyValueContainer container = new KeyValueContainer(data, conf); - packer.unpackContainerData(container, input); + packer.unpackContainerData(container, input, TEMP_DIR, + DEST_CONTAINER_ROOT.resolve(String.valueOf(data.getContainerID()))); return data; } } @@ -353,9 +374,8 @@ private File packContainerWithSingleFile(File file, String entryName) throws Exception { File targetFile = TEMP_DIR.resolve("container.tar.gz").toFile(); try (FileOutputStream output = new FileOutputStream(targetFile); - CompressorOutputStream gzipped = new CompressorStreamFactory() - .createCompressorOutputStream(GZIP, output); - ArchiveOutputStream archive = new TarArchiveOutputStream(gzipped)) { + OutputStream compressed = packer.compress(output); + ArchiveOutputStream archive = new TarArchiveOutputStream(compressed)) { TarContainerPacker.includeFile(file, entryName, archive); } return targetFile; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestKeyValueStreamDataChannel.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestKeyValueStreamDataChannel.java new file mode 100644 index 000000000000..ddbd4b39f4fa --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestKeyValueStreamDataChannel.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue.impl; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.BlockData; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.DatanodeBlockID; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.PutBlockRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; +import org.apache.hadoop.hdds.ratis.ContainerCommandRequestMessage; +import org.apache.hadoop.ozone.container.keyvalue.impl.KeyValueStreamDataChannel.Buffers; +import org.apache.hadoop.ozone.container.keyvalue.impl.KeyValueStreamDataChannel.WriteMethod; +import org.apache.ratis.client.api.DataStreamOutput; +import org.apache.ratis.io.FilePositionCount; +import org.apache.ratis.io.StandardWriteOption; +import org.apache.ratis.io.WriteOption; +import org.apache.ratis.proto.RaftProtos.CommitInfoProto; +import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto; +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.DataStreamReply; +import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; +import org.apache.ratis.thirdparty.io.netty.buffer.Unpooled; +import org.apache.ratis.util.ReferenceCountedObject; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Random; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadLocalRandom; + +import static org.apache.hadoop.hdds.scm.storage.BlockDataStreamOutput.PUT_BLOCK_REQUEST_LENGTH_MAX; +import static org.apache.hadoop.hdds.scm.storage.BlockDataStreamOutput.executePutBlockClose; +import static org.apache.hadoop.hdds.scm.storage.BlockDataStreamOutput.getProtoLength; +import static org.apache.hadoop.ozone.container.keyvalue.impl.KeyValueStreamDataChannel.closeBuffers; +import static org.apache.hadoop.ozone.container.keyvalue.impl.KeyValueStreamDataChannel.readPutBlockRequest; +import static org.apache.hadoop.ozone.container.keyvalue.impl.KeyValueStreamDataChannel.writeBuffers; + +/** For testing {@link KeyValueStreamDataChannel}. */ +public class TestKeyValueStreamDataChannel { + public static final Logger LOG = + LoggerFactory.getLogger(TestKeyValueStreamDataChannel.class); + + static final ContainerCommandRequestProto PUT_BLOCK_PROTO + = ContainerCommandRequestProto.newBuilder() + .setCmdType(Type.PutBlock) + .setPutBlock(PutBlockRequestProto.newBuilder().setBlockData( + BlockData.newBuilder().setBlockID(DatanodeBlockID.newBuilder() + .setContainerID(222).setLocalID(333).build()).build())) + .setDatanodeUuid("datanodeId") + .setContainerID(111L) + .build(); + static final int PUT_BLOCK_PROTO_SIZE = PUT_BLOCK_PROTO.toByteString().size(); + static { + LOG.info("PUT_BLOCK_PROTO_SIZE = {}", PUT_BLOCK_PROTO_SIZE); + } + + @Test + public void testSerialization() throws Exception { + final int max = PUT_BLOCK_REQUEST_LENGTH_MAX; + final ByteBuffer putBlockBuf = ContainerCommandRequestMessage.toMessage( + PUT_BLOCK_PROTO, null).getContent().asReadOnlyByteBuffer(); + final ByteBuffer protoLengthBuf = getProtoLength(putBlockBuf, max); + + // random data size + final int dataSize = ThreadLocalRandom.current().nextInt(1000) + 100; + final byte[] data = new byte[dataSize]; + + //serialize + final ByteBuf buf = Unpooled.buffer(max); + buf.writeBytes(data); + buf.writeBytes(putBlockBuf); + buf.writeBytes(protoLengthBuf); + + final ContainerCommandRequestProto proto = readPutBlockRequest(buf); + Assert.assertEquals(PUT_BLOCK_PROTO, proto); + } + + @Test + public void testBuffers() throws Exception { + final ExecutorService executor = Executors.newFixedThreadPool(32); + final List> futures = new ArrayList<>(); + + final int min = PUT_BLOCK_PROTO_SIZE + 4; + final int[] maxValues = {min, 2 * min, 10 * min}; + final int[] dataSizes = {0, 10, 100, 10_000}; + for (int max : maxValues) { + for (int dataSize : dataSizes) { + futures.add(CompletableFuture.supplyAsync( + () -> runTestBuffers(dataSize, max), executor)); + } + } + + for (CompletableFuture f : futures) { + f.get(); + } + } + + static String runTestBuffers(int dataSize, int max) { + final int seed = ThreadLocalRandom.current().nextInt(); + final String name = String.format("[dataSize=%d,max=%d,seed=%H]", + dataSize, max, seed); + LOG.info(name); + try { + runTestBuffers(dataSize, max, seed, name); + } catch (Throwable t) { + throw new CompletionException("Failed " + name, t); + } + return name; + } + + static void runTestBuffers(int dataSize, int max, int seed, String name) + throws Exception { + Assert.assertTrue(max >= PUT_BLOCK_PROTO_SIZE); + + // random data + final byte[] data = new byte[dataSize]; + final Random random = new Random(seed); + random.nextBytes(data); + + // write output + final Buffers buffers = new Buffers(max); + final Output out = new Output(buffers); + for (int offset = 0; offset < dataSize;) { + final int randomLength = random.nextInt(4 * max); + final int length = Math.min(randomLength, dataSize - offset); + LOG.info("{}: offset = {}, length = {}", name, offset, length); + final ByteBuffer b = ByteBuffer.wrap(data, offset, length); + final DataStreamReply writeReply = out.writeAsync(b).get(); + assertReply(writeReply, length, null); + offset += length; + } + + // close + final DataStreamReply closeReply = executePutBlockClose( + PUT_BLOCK_PROTO, max, out).get(); + assertReply(closeReply, 0, PUT_BLOCK_PROTO); + + // check output + final ByteBuf outBuf = out.getOutBuf(); + LOG.info("outBuf = {}", outBuf); + Assert.assertEquals(dataSize, outBuf.readableBytes()); + for (int i = 0; i < dataSize; i++) { + Assert.assertEquals(data[i], outBuf.readByte()); + } + outBuf.release(); + } + + static void assertReply(DataStreamReply reply, int byteWritten, + ContainerCommandRequestProto proto) { + Assert.assertTrue(reply.isSuccess()); + Assert.assertEquals(byteWritten, reply.getBytesWritten()); + Assert.assertEquals(proto, ((Reply)reply).getPutBlockRequest()); + } + + static class Output implements DataStreamOutput { + private final Buffers buffers; + private final ByteBuf outBuf = Unpooled.buffer(); + private final WriteMethod writeMethod = src -> { + final int remaining = src.remaining(); + outBuf.writeBytes(src); + return remaining; + }; + + Output(Buffers buffers) { + this.buffers = buffers; + } + + ByteBuf getOutBuf() { + return outBuf; + } + + @Override + public CompletableFuture writeAsync( + ByteBuffer src, Iterable writeOptions) { + final int written; + try { + written = writeBuffers( + ReferenceCountedObject.wrap(src, () -> { }, () -> { }), + buffers, writeMethod); + } catch (IOException e) { + return completeExceptionally(e); + } + if (WriteOption.containsOption(writeOptions, StandardWriteOption.CLOSE)) { + return closeAsync(); + } + return CompletableFuture.completedFuture( + new Reply(true, written)); + } + + @Override + public CompletableFuture closeAsync() { + final ContainerCommandRequestProto putBlockRequest; + try { + putBlockRequest = closeBuffers(buffers, writeMethod); + } catch (IOException e) { + return completeExceptionally(e); + } + return CompletableFuture.completedFuture( + new Reply(true, 0, putBlockRequest)); + } + + @Override + public CompletableFuture writeAsync( + FilePositionCount filePositionCount, WriteOption... writeOptions) { + throw new UnsupportedOperationException(); + } + + @Override + public CompletableFuture getRaftClientReplyFuture() { + throw new UnsupportedOperationException(); + } + + @Override + public WritableByteChannel getWritableByteChannel() { + throw new UnsupportedOperationException(); + } + } + + static class Reply implements DataStreamReply { + private final boolean success; + private final long bytesWritten; + private final ContainerCommandRequestProto putBlockRequest; + + Reply(boolean success, long bytesWritten) { + this(success, bytesWritten, null); + } + + Reply(boolean success, long bytesWritten, + ContainerCommandRequestProto putBlockRequest) { + this.success = success; + this.bytesWritten = bytesWritten; + this.putBlockRequest = putBlockRequest; + } + + ContainerCommandRequestProto getPutBlockRequest() { + return putBlockRequest; + } + + @Override + public boolean isSuccess() { + return success; + } + + @Override + public long getBytesWritten() { + return bytesWritten; + } + + @Override + public Collection getCommitInfos() { + throw new UnsupportedOperationException(); + } + + @Override + public ClientId getClientId() { + throw new UnsupportedOperationException(); + } + + @Override + public DataStreamPacketHeaderProto.Type getType() { + throw new UnsupportedOperationException(); + } + + @Override + public long getStreamId() { + throw new UnsupportedOperationException(); + } + + @Override + public long getStreamOffset() { + throw new UnsupportedOperationException(); + } + + @Override + public long getDataLength() { + throw new UnsupportedOperationException(); + } + } + + static CompletableFuture completeExceptionally(Throwable t) { + final CompletableFuture f = new CompletableFuture<>(); + f.completeExceptionally(t); + return f; + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java index a17a5f7f8cb8..229e883ad842 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java @@ -161,11 +161,11 @@ private void markBlocksForDelete(KeyValueContainer keyValueContainer, metadataStore.getStore().getBlockDataTable(); Long localID = blockNames.get(i); - String blk = cData.blockKey(localID); + String blk = cData.getBlockKey(localID); BlockData blkInfo = blockDataTable.get(blk); blockDataTable.delete(blk); - blockDataTable.put(cData.deletingBlockKey(localID), blkInfo); + blockDataTable.put(cData.getDeletingBlockKey(localID), blkInfo); } if (setMetaData) { @@ -173,7 +173,8 @@ private void markBlocksForDelete(KeyValueContainer keyValueContainer, // and bytes used metadata values, so those do not change. Table metadataTable = metadataStore.getStore().getMetadataTable(); - metadataTable.put(cData.pendingDeleteBlockCountKey(), (long)count); + metadataTable.put(cData.getPendingDeleteBlockCountKey(), + (long)count); } } @@ -201,14 +202,14 @@ private List addBlocks(KeyValueContainer keyValueContainer, blockData.setChunks(chunkList); blkNames.add(localBlockID); metadataStore.getStore().getBlockDataTable() - .put(cData.blockKey(localBlockID), blockData); + .put(cData.getBlockKey(localBlockID), blockData); } if (setMetaData) { metadataStore.getStore().getMetadataTable() - .put(cData.blockCountKey(), (long)blockCount); + .put(cData.getBlockCountKey(), (long)blockCount); metadataStore.getStore().getMetadataTable() - .put(cData.bytesUsedKey(), blockCount * blockLen); + .put(cData.getBytesUsedKey(), blockCount * blockLen); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java index 5ae3bad21f1e..cceed793902f 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java @@ -320,12 +320,13 @@ private long addBlocks(KeyValueContainer container, chunkList.add(info.getProtoBufMessage()); } blockData.setChunks(chunkList); - blockDataTable.put(cData.blockKey(blockID.getLocalID()), blockData); + blockDataTable.put(cData.getBlockKey(blockID.getLocalID()), + blockData); } // Set Block count and used bytes. - metadataTable.put(cData.blockCountKey(), (long) blocks); - metadataTable.put(cData.bytesUsedKey(), usedBytes); + metadataTable.put(cData.getBlockCountKey(), (long) blocks); + metadataTable.put(cData.getBytesUsedKey(), usedBytes); } // remaining available capacity of the container return (freeBytes - usedBytes); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorScheduling.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorScheduling.java index 457f0a58ae46..613f46318535 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorScheduling.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorScheduling.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.ozone.container.replication; +import java.time.Clock; +import java.time.ZoneId; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -24,6 +26,7 @@ import java.util.Random; import java.util.UUID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; @@ -43,6 +46,9 @@ public class ReplicationSupervisorScheduling { @Test public void test() throws InterruptedException { + OzoneConfiguration conf = new OzoneConfiguration(); + ReplicationServer.ReplicationConfig replicationConfig + = conf.getObject(ReplicationServer.ReplicationConfig.class); List datanodes = new ArrayList<>(); datanodes.add(MockDatanodeDetails.randomDatanodeDetails()); datanodes.add(MockDatanodeDetails.randomDatanodeDetails()); @@ -69,7 +75,7 @@ public void test() throws InterruptedException { ContainerSet cs = new ContainerSet(1000); - ReplicationSupervisor rs = new ReplicationSupervisor(cs, + ReplicationSupervisor rs = new ReplicationSupervisor(cs, null, //simplified executor emulating the current sequential download + //import. @@ -107,7 +113,7 @@ public void test() throws InterruptedException { } } - }, 10); + }, replicationConfig, Clock.system(ZoneId.systemDefault())); final long start = System.currentTimeMillis(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java index 51f78440bb4c..2ea47f597262 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java @@ -18,8 +18,12 @@ package org.apache.hadoop.ozone.container.replication; +import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Collections; +import java.time.Instant; +import java.time.ZoneId; import java.util.List; import java.util.UUID; import java.util.concurrent.AbstractExecutorService; @@ -34,11 +38,17 @@ import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.keyvalue.ContainerLayoutTestInfo; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.TestClock; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -58,6 +68,8 @@ @RunWith(Parameterized.class) public class TestReplicationSupervisor { + private static final long CURRENT_TERM = 1; + private final ContainerReplicator noopReplicator = task -> { }; private final ContainerReplicator throwingReplicator = task -> { throw new RuntimeException("testing replication failure"); @@ -77,6 +89,9 @@ public class TestReplicationSupervisor { private final ContainerLayoutVersion layout; + private StateContext context; + private TestClock clock; + public TestReplicationSupervisor(ContainerLayoutVersion layout) { this.layout = layout; } @@ -88,7 +103,13 @@ public static Iterable parameters() { @Before public void setUp() throws Exception { + clock = new TestClock(Instant.now(), ZoneId.systemDefault()); set = new ContainerSet(1000); + context = new StateContext( + new OzoneConfiguration(), + DatanodeStateMachine.DatanodeStates.getInitState(), + Mockito.mock(DatanodeStateMachine.class)); + context.setTermOfLeaderSCM(CURRENT_TERM); } @After @@ -106,9 +127,9 @@ public void normal() { try { //WHEN - supervisor.addTask(new ReplicationTask(1L, emptyList())); - supervisor.addTask(new ReplicationTask(2L, emptyList())); - supervisor.addTask(new ReplicationTask(5L, emptyList())); + supervisor.addTask(createTask(1L)); + supervisor.addTask(createTask(2L)); + supervisor.addTask(createTask(5L)); Assert.assertEquals(3, supervisor.getReplicationRequestCount()); Assert.assertEquals(3, supervisor.getReplicationSuccessCount()); @@ -134,10 +155,10 @@ public void duplicateMessage() { try { //WHEN - supervisor.addTask(new ReplicationTask(6L, emptyList())); - supervisor.addTask(new ReplicationTask(6L, emptyList())); - supervisor.addTask(new ReplicationTask(6L, emptyList())); - supervisor.addTask(new ReplicationTask(6L, emptyList())); + supervisor.addTask(createTask(6L)); + supervisor.addTask(createTask(6L)); + supervisor.addTask(createTask(6L)); + supervisor.addTask(createTask(6L)); //THEN Assert.assertEquals(4, supervisor.getReplicationRequestCount()); @@ -159,7 +180,7 @@ public void failureHandling() { try { //WHEN - ReplicationTask task = new ReplicationTask(1L, emptyList()); + ReplicationTask task = createTask(1L); supervisor.addTask(task); //THEN @@ -183,9 +204,9 @@ public void stalledDownload() { try { //WHEN - supervisor.addTask(new ReplicationTask(1L, emptyList())); - supervisor.addTask(new ReplicationTask(2L, emptyList())); - supervisor.addTask(new ReplicationTask(3L, emptyList())); + supervisor.addTask(createTask(1L)); + supervisor.addTask(createTask(2L)); + supervisor.addTask(createTask(3L)); //THEN Assert.assertEquals(0, supervisor.getReplicationRequestCount()); @@ -208,9 +229,9 @@ public void slowDownload() { try { //WHEN - supervisor.addTask(new ReplicationTask(1L, emptyList())); - supervisor.addTask(new ReplicationTask(2L, emptyList())); - supervisor.addTask(new ReplicationTask(3L, emptyList())); + supervisor.addTask(createTask(1L)); + supervisor.addTask(createTask(2L)); + supervisor.addTask(createTask(3L)); //THEN Assert.assertEquals(3, supervisor.getInFlightReplications()); @@ -228,34 +249,88 @@ public void slowDownload() { } @Test - public void testDownloadAndImportReplicatorFailure() { + public void testDownloadAndImportReplicatorFailure() throws IOException { ReplicationSupervisor supervisor = - new ReplicationSupervisor(set, null, mutableReplicator, - newDirectExecutorService()); + new ReplicationSupervisor(set, context, mutableReplicator, + newDirectExecutorService(), clock); + OzoneConfiguration conf = new OzoneConfiguration(); // Mock to fetch an exception in the importContainer method. SimpleContainerDownloader moc = Mockito.mock(SimpleContainerDownloader.class); Path res = Paths.get("file:/tmp/no-such-file"); Mockito.when( - moc.getContainerDataFromReplicas(Mockito.anyLong(), Mockito.anyList())) + moc.getContainerDataFromReplicas(Mockito.anyLong(), Mockito.anyList(), + Mockito.any(Path.class))) .thenReturn(res); + final String testDir = GenericTestUtils.getTempPath( + TestReplicationSupervisor.class.getSimpleName() + + "-" + UUID.randomUUID().toString()); + MutableVolumeSet volumeSet = Mockito.mock(MutableVolumeSet.class); + Mockito.when(volumeSet.getVolumesList()) + .thenReturn(Collections.singletonList( + new HddsVolume.Builder(testDir).conf(conf).build())); ContainerReplicator replicatorFactory = - new DownloadAndImportReplicator(set, null, moc, null); + new DownloadAndImportReplicator(conf, set, null, moc, null, volumeSet); replicatorRef.set(replicatorFactory); GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer .captureLogs(DownloadAndImportReplicator.LOG); - supervisor.addTask(new ReplicationTask(1L, emptyList())); + supervisor.addTask(createTask(1L)); Assert.assertEquals(1, supervisor.getReplicationFailureCount()); Assert.assertEquals(0, supervisor.getReplicationSuccessCount()); Assert.assertTrue(logCapturer.getOutput() .contains("Container 1 replication was unsuccessful.")); } + @Test + public void testTaskBeyondDeadline() { + ReplicationSupervisor supervisor = + supervisorWithReplicator(FakeReplicator::new); + + ReplicateContainerCommand cmd = createCommand(1); + cmd.setDeadline(clock.millis() + 10000); + ReplicationTask task1 = new ReplicationTask(cmd); + cmd = createCommand(2); + cmd.setDeadline(clock.millis() + 20000); + ReplicationTask task2 = new ReplicationTask(cmd); + cmd = createCommand(3); + // No deadline set + ReplicationTask task3 = new ReplicationTask(cmd); + // no deadline set + + clock.fastForward(15000); + + supervisor.addTask(task1); + supervisor.addTask(task2); + supervisor.addTask(task3); + + Assert.assertEquals(3, supervisor.getReplicationRequestCount()); + Assert.assertEquals(2, supervisor.getReplicationSuccessCount()); + Assert.assertEquals(0, supervisor.getReplicationFailureCount()); + Assert.assertEquals(0, supervisor.getInFlightReplications()); + Assert.assertEquals(0, supervisor.getQueueSize()); + Assert.assertEquals(1, supervisor.getReplicationTimeoutCount()); + Assert.assertEquals(2, set.containerCount()); + + } + + @Test + public void taskWithObsoleteTermIsDropped() { + final long newTerm = 2; + ReplicationSupervisor supervisor = + supervisorWithReplicator(FakeReplicator::new); + + context.setTermOfLeaderSCM(newTerm); + supervisor.addTask(createTask(1L)); + + Assert.assertEquals(1, supervisor.getReplicationRequestCount()); + Assert.assertEquals(0, supervisor.getReplicationSuccessCount()); + } + private ReplicationSupervisor supervisorWithReplicator( Function replicatorFactory) { return supervisorWith(replicatorFactory, newDirectExecutorService()); @@ -265,11 +340,24 @@ private ReplicationSupervisor supervisorWith( Function replicatorFactory, ExecutorService executor) { ReplicationSupervisor supervisor = - new ReplicationSupervisor(set, null, mutableReplicator, executor); + new ReplicationSupervisor(set, context, mutableReplicator, executor, + clock); replicatorRef.set(replicatorFactory.apply(supervisor)); return supervisor; } + private static ReplicationTask createTask(long containerId) { + ReplicateContainerCommand cmd = createCommand(containerId); + return new ReplicationTask(cmd); + } + + private static ReplicateContainerCommand createCommand(long containerId) { + ReplicateContainerCommand cmd = + new ReplicateContainerCommand(containerId, emptyList()); + cmd.setTerm(CURRENT_TERM); + return cmd; + } + /** * A fake replicator that simulates successful download of containers. */ diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestSimpleContainerDownloader.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestSimpleContainerDownloader.java index bc83bf9bcb3c..42d44a7c2399 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestSimpleContainerDownloader.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestSimpleContainerDownloader.java @@ -18,11 +18,14 @@ package org.apache.hadoop.ozone.container.replication; +import java.io.File; +import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -31,8 +34,14 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; +import org.apache.hadoop.ozone.container.common.volume.StorageVolume; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; /** @@ -40,6 +49,9 @@ */ public class TestSimpleContainerDownloader { + @Rule + public final TemporaryFolder tempDir = new TemporaryFolder(); + private static final String SUCCESS_PATH = "downloaded"; @Test @@ -53,7 +65,8 @@ public void testGetContainerDataFromReplicasHappyPath() throws Exception { //WHEN final Path result = - downloader.getContainerDataFromReplicas(1L, datanodes); + downloader.getContainerDataFromReplicas(1L, datanodes, + tempDir.newFolder().toPath()); //THEN Assertions.assertEquals(datanodes.get(0).getUuidString(), @@ -72,7 +85,8 @@ public void testGetContainerDataFromReplicasDirectFailure() //WHEN final Path result = - downloader.getContainerDataFromReplicas(1L, datanodes); + downloader.getContainerDataFromReplicas(1L, datanodes, + tempDir.newFolder().toPath()); //THEN //first datanode is failed, second worked @@ -91,7 +105,8 @@ public void testGetContainerDataFromReplicasAsyncFailure() throws Exception { //WHEN final Path result = - downloader.getContainerDataFromReplicas(1L, datanodes); + downloader.getContainerDataFromReplicas(1L, datanodes, + tempDir.newFolder().toPath()); //THEN //first datanode is failed, second worked @@ -105,7 +120,7 @@ public void testGetContainerDataFromReplicasAsyncFailure() throws Exception { @Test @Timeout(10) public void testRandomSelection() - throws ExecutionException, InterruptedException { + throws ExecutionException, InterruptedException, IOException { //GIVEN final List datanodes = createDatanodes(); @@ -115,7 +130,7 @@ public void testRandomSelection() @Override protected CompletableFuture downloadContainer( - long containerId, DatanodeDetails datanode + long containerId, DatanodeDetails datanode, Path downloadPath ) { //download is always successful. return CompletableFuture @@ -126,7 +141,8 @@ protected CompletableFuture downloadContainer( //WHEN executed, THEN at least once the second datanode should be //returned. for (int i = 0; i < 10000; i++) { - Path path = downloader.getContainerDataFromReplicas(1L, datanodes); + Path path = downloader.getContainerDataFromReplicas(1L, datanodes, + tempDir.newFolder().toPath()); if (path.toString().equals(datanodes.get(1).getUuidString())) { return; } @@ -167,8 +183,7 @@ protected List shuffleDatanodes( @Override protected CompletableFuture downloadContainer( - long containerId, - DatanodeDetails datanode + long containerId, DatanodeDetails datanode, Path downloadPath ) { if (datanodes.contains(datanode)) { @@ -197,4 +212,21 @@ private List createDatanodes() { datanodes.add(MockDatanodeDetails.randomDatanodeDetails()); return datanodes; } + + private VolumeSet getVolumeSet(DatanodeDetails datanodeDetails, + OzoneConfiguration conf) throws IOException { + String clusterId = UUID.randomUUID().toString(); + int volumeNum = 3; + File[] hddsVolumeDirs = new File[volumeNum]; + StringBuilder hddsDirs = new StringBuilder(); + for (int i = 0; i < volumeNum; i++) { + hddsVolumeDirs[i] = tempDir.newFolder(); + hddsDirs.append(hddsVolumeDirs[i]).append(","); + } + conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, hddsDirs.toString()); + VolumeSet hddsVolumeSet = new MutableVolumeSet( + datanodeDetails.getUuidString(), clusterId, conf, null, + StorageVolume.VolumeType.DATA_VOLUME, null); + return hddsVolumeSet; + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToSchemaV3.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToSchemaV3.java index 517842f2d35d..7af39e5e7f19 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToSchemaV3.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToSchemaV3.java @@ -105,6 +105,10 @@ public TestDatanodeUpgradeToSchemaV3(Boolean enable) { conf = new OzoneConfiguration(); conf.setBoolean(DatanodeConfiguration.CONTAINER_SCHEMA_V3_ENABLED, this.schemaV3Enabled); + conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, + true); + conf.setBoolean( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT, true); } @Before diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java index 94a83ccc49fc..1e01d0808043 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java @@ -33,6 +33,7 @@ import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.ozone.container.common.ScmTestMock; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine; import org.apache.hadoop.ozone.container.common.states.endpoint.VersionEndpointTask; @@ -655,7 +656,7 @@ public File exportContainer(long containerId) throws Exception { File destination = tempFolder.newFile(); try (FileOutputStream fos = new FileOutputStream(destination)) { - replicationSource.copyData(containerId, fos); + replicationSource.copyData(containerId, fos, "NO_COMPRESSION"); } return destination; } @@ -666,15 +667,17 @@ public File exportContainer(long containerId) throws Exception { */ public void importContainer(long containerID, File source) throws Exception { DownloadAndImportReplicator replicator = - new DownloadAndImportReplicator(dsm.getContainer().getContainerSet(), + new DownloadAndImportReplicator(dsm.getConf(), + dsm.getContainer().getContainerSet(), dsm.getContainer().getController(), new SimpleContainerDownloader(conf, null), - new TarContainerPacker()); + new TarContainerPacker(), dsm.getContainer().getVolumeSet()); - File tempFile = tempFolder.newFile(); + File tempFile = tempFolder.newFile( + ContainerUtils.getContainerTarGzName(containerID)); Files.copy(source.toPath(), tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - replicator.importContainer(containerID, tempFile.toPath()); + replicator.importContainer(containerID, tempFile.toPath(), null); } public void dispatchRequest( diff --git a/hadoop-hdds/docs/content/concept/Recon.md b/hadoop-hdds/docs/content/concept/Recon.md index b5d47313f66b..5f7e84d3d3f9 100644 --- a/hadoop-hdds/docs/content/concept/Recon.md +++ b/hadoop-hdds/docs/content/concept/Recon.md @@ -152,6 +152,7 @@ ozone.recon.db.dir | none | Directory where the Recon Server stores its metadata ozone.recon.om.db.dir | none | Directory where the Recon Server stores its OM snapshot DB. ozone.recon.om.snapshot
.task.interval.delay | 10m | Interval in MINUTES by Recon to request OM DB Snapshot / delta updates. ozone.recon.task
.missingcontainer.interval | 300s | Time interval of the periodic check for Unhealthy Containers in the cluster. +ozone.recon.task
.safemode.wait.threshold | 300s | Time interval of the periodic check for Recon to exit out of safe or warmup mode. ozone.recon.sql.db.jooq.dialect | DERBY | Please refer to [SQL Dialect](https://www.jooq.org/javadoc/latest/org.jooq/org/jooq/SQLDialect.html) to specify a different dialect. ozone.recon.sql.db.jdbc.url | jdbc:derby:${ozone.recon.db.dir}
/ozone_recon_derby.db | Recon SQL database jdbc url. ozone.recon.sql.db.username | none | Recon SQL database username. diff --git a/hadoop-hdds/docs/content/design/trash.md b/hadoop-hdds/docs/content/design/trash.md index b936aaecfb45..bcf81111e73c 100644 --- a/hadoop-hdds/docs/content/design/trash.md +++ b/hadoop-hdds/docs/content/design/trash.md @@ -23,8 +23,3 @@ author: Matthew Sharp The design doc is uploaded to the JIRA: https://issues.apache.org/jira/secure/attachment/12985273/Ozone_Trash_Feature.docx - -## Special note - -Trash is disabled for both o3fs and ofs even if `fs.trash.interval` is set -on purpose. (HDDS-3982) diff --git a/hadoop-hdds/docs/content/feature/Streaming-Write-Pipeline.md b/hadoop-hdds/docs/content/feature/Streaming-Write-Pipeline.md new file mode 100644 index 000000000000..5f55afebc3c8 --- /dev/null +++ b/hadoop-hdds/docs/content/feature/Streaming-Write-Pipeline.md @@ -0,0 +1,136 @@ +--- +title: "Streaming Write Pipeline" +weight: 1 +menu: + main: + parent: Features +summary: A new write pipeline using Ratis Streaming. +--- + + +This document discusses the new Streaming Write Pipeline feature in Ozone. +It is implemented with the Ratis Streaming API. +Note that the existing Ozone Write Pipeline is implemented with the Ratis Async API. +We refer the new Streaming Write Pipeline as Write Pipeline V2 +and the existing Async Write Pipeline as Write Pipeline V1. + +The Streaming Write Pipeline V2 increases the performance +by providing better network topology awareness +and removing the performance bottlenecks in V1. +The V2 implementation also avoids unnecessary buffer copying +(by Netty zero copy) +and has a better utilization of the CPUs and the disks in each datanode. + +## Configuration Properties + +Set the following properties to the Ozone configuration file `ozone-site.xml`. + +- To enable the Streaming Write Pipeline feature, set the following property to true. +```XML + + dfs.container.ratis.datastream.enabled + false + OZONE, CONTAINER, RATIS, DATASTREAM + It specifies whether to enable data stream of container. + +``` +- Datanodes listen to the following port for the streaming traffic. +```XML + + dfs.container.ratis.datastream.port + 9855 + OZONE, CONTAINER, RATIS, DATASTREAM + The datastream port number of container. + +``` +- To use Streaming in FileSystem API, set the following property to true. +```XML + + ozone.fs.datastream.enabled + false + OZONE, DATANODE + + To enable/disable filesystem write via ratis streaming. + + +``` + +## Client APIs + +### OzoneDataStreamOutput + +The new `OzoneDataStreamOutput` class is very similar to the existing `OzoneOutputStream` class, +except that `OzoneDataStreamOutput` uses `ByteBuffer` as a parameter in the `write` methods +while `OzoneOutputStream` uses `byte[]`. +The reason of using a `ByteBuffer`, instead of a `byte[]`, +is to support zero buffer copying. +A typical `write` method is shown below: + +- OzoneDataStreamOutput +```java + public void write(ByteBuffer b, int off, int len) throws IOException; +``` + +- OzoneOutputStream +```java + public void write(byte[] b, int off, int len) throws IOException; +``` +### OzoneBucket + +The following new methods are added to `OzoneBucket` +for creating keys using the Streaming Write Pipeline. + +- createStreamKey +```java + public OzoneDataStreamOutput createStreamKey(String key, long size) + throws IOException; +``` + +```java + public OzoneDataStreamOutput createStreamKey(String key, long size, + ReplicationConfig replicationConfig, Map keyMetadata) + throws IOException; +``` +- createMultipartStreamKey +```java + public OzoneDataStreamOutput createMultipartStreamKey(String key, long size, + int partNumber, String uploadID) throws IOException; +``` + +Note that the methods above have the same parameter list +as the existing `createKey` and `createMultipartKey` methods. + +Below is an example to create a key from a local file using a memory-mapped buffer. +```java + // Create a memory-mapped buffer from a local file: + final FileChannel channel = ... // local file channel + final long length = ... // length of the data + final ByteBuffer mapped = channel.map(FileChannel.MapMode.READ_ONLY, 0, length); + + // Create an OzoneDataStreamOutput + final OzoneBucket bucket = ... // an Ozone bucket + final String key = ... // the key name + final OzoneDataStreamOutput out = bucket.createStreamKey(key, length); + + // Write the memory-mapped buffer to the key output + out.write(mapped); + + // close + out.close(); // In practice, use try-with-resource to close it. + channel.close(); // In practice, use try-with-resource to close it. +``` \ No newline at end of file diff --git a/hadoop-hdds/docs/content/interface/Ofs.md b/hadoop-hdds/docs/content/interface/Ofs.md index 7a3f892281e2..0ad5339411b1 100644 --- a/hadoop-hdds/docs/content/interface/Ofs.md +++ b/hadoop-hdds/docs/content/interface/Ofs.md @@ -117,7 +117,7 @@ For more usage, see: https://issues.apache.org/jira/secure/attachment/12987636/D OFS doesn't allow creating keys(files) directly under root or volumes. Users will receive an error message when they try to do that: -``` +```bash $ ozone fs -touch /volume1/key1 touch: Cannot create file under root or volume. ``` @@ -128,17 +128,17 @@ With OFS, fs.defaultFS (in core-site.xml) no longer needs to have a specific volume and bucket in its path like o3fs did. Simply put the OM host or service ID (in case of HA): -``` +```xml -fs.defaultFS -ofs://omservice + fs.defaultFS + ofs://omservice ``` The client would then be able to access every volume and bucket on the cluster without specifying the hostname or service ID. -``` +```bash $ ozone fs -mkdir -p /volume1/bucket1 ``` @@ -148,14 +148,14 @@ Admins can create and delete volumes and buckets easily with Hadoop FS shell. Volumes and buckets are treated similar to directories so they will be created if they don't exist with `-p`: -``` +```bash $ ozone fs -mkdir -p ofs://omservice/volume1/bucket1/dir1/ ``` Note that the supported volume and bucket name character set rule still applies. For instance, bucket and volume names don't take underscore(`_`): -``` +```bash $ ozone fs -mkdir -p /volume_1 mkdir: Bucket or Volume name has an unsupported character : _ ``` @@ -170,7 +170,7 @@ Important: To use it, first, an **admin** needs to create the volume tmp (the volume name is hardcoded for now) and set its ACL to world ALL access. Namely: -``` +```bash $ ozone sh volume create tmp $ ozone sh volume setacl tmp -al world::a ``` @@ -180,7 +180,7 @@ These commands only needs to be done **once per cluster**. Then, **each user** needs to mkdir first to initialize their own temp bucket once. -``` +```bash $ ozone fs -mkdir /tmp 2020-06-04 00:00:00,050 [main] INFO rpc.RpcClient: Creating Bucket: tmp/0238 ... ``` @@ -188,7 +188,7 @@ $ ozone fs -mkdir /tmp After that they can write to it just like they would do to a regular directory. e.g.: -``` +```bash $ ozone fs -touch /tmp/key1 ``` @@ -198,12 +198,12 @@ In order to enable trash in Ozone, Please add these configs to core-site.xml {{< highlight xml >}} -fs.trash.interval -10 + fs.trash.interval + 10 -fs.trash.classname -org.apache.hadoop.ozone.om.TrashPolicyOzone + fs.trash.classname + org.apache.hadoop.ozone.om.TrashPolicyOzone {{< /highlight >}} @@ -212,7 +212,7 @@ When keys are deleted with trash enabled, they are moved to a trash directory under each bucket, because keys aren't allowed to be moved(renamed) between buckets in Ozone. -``` +```bash $ ozone fs -rm /volume1/bucket1/key1 2020-06-04 00:00:00,100 [main] INFO fs.TrashPolicyDefault: Moved: 'ofs://id1/volume1/bucket1/key1' to trash at: ofs://id1/volume1/bucket1/.Trash/hadoop/Current/volume1/bucket1/key1 ``` @@ -230,7 +230,7 @@ This is very similar to how the HDFS encryption zone handles trash location. OFS supports recursive volume, bucket and key listing. -i.e. `ozone fs -ls -R ofs://omservice/`` will recursively list all volumes, +i.e. `ozone fs -ls -R ofs://omservice/` will recursively list all volumes, buckets and keys the user has LIST permission to if ACL is enabled. If ACL is disabled, the command would just list literally everything on that cluster. diff --git a/hadoop-hdds/docs/content/interface/S3.zh.md b/hadoop-hdds/docs/content/interface/S3.zh.md index a73e07485826..8e574e5f31c4 100644 --- a/hadoop-hdds/docs/content/interface/S3.zh.md +++ b/hadoop-hdds/docs/content/interface/S3.zh.md @@ -89,7 +89,7 @@ HEAD 对象 | 已实现 | 如果不启用安全机制,你可以*使用***任何** AWS_ACCESS_KEY_ID 和 AWS_SECRET_ACCESS_KEY 来访问 Ozone 的 S3 服务。 -在启用了安全机制的情况下,你可以通过 `ozone s3 gesecret` 命令获取 key 和 secret(需要进行 Kerberos 认证)。 +在启用了安全机制的情况下,你可以通过 `ozone s3 getsecret` 命令获取 key 和 secret(需要进行 Kerberos 认证)。 ```bash kinit -kt /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM diff --git a/hadoop-hdds/docs/content/security/SecuringTDE.md b/hadoop-hdds/docs/content/security/SecuringTDE.md index 8ddedc4390f6..3b75bee1bfd5 100644 --- a/hadoop-hdds/docs/content/security/SecuringTDE.md +++ b/hadoop-hdds/docs/content/security/SecuringTDE.md @@ -121,7 +121,7 @@ logins using configured The below two configurations must be added to the kms-site.xml to allow the S3Gateway principal to act as a proxy for other users. In this example, "ozone.s3g.kerberos.principal" is assumed to be "s3g" -``` +```xml hadoop.kms.proxyuser.s3g.users user1,user2,user3 diff --git a/hadoop-hdds/docs/content/security/SecurityAcls.md b/hadoop-hdds/docs/content/security/SecurityAcls.md index da4b28af8537..0bf32f5f5a16 100644 --- a/hadoop-hdds/docs/content/security/SecurityAcls.md +++ b/hadoop-hdds/docs/content/security/SecurityAcls.md @@ -26,8 +26,13 @@ icon: transfer --> Ozone supports a set of native ACLs. These ACLs can be used independently -of ozone ACL plugin such as Ranger. If Apache Ranger plugin for Ozone is -enabled, then ACL will be checked with Ranger. +of ozone ACL plugin such as Ranger. +Add the following properties to the ozone-site.xml to enable native ACLs. + +Property|Value +--------|------------------------------------------------------------ +ozone.acl.enabled | true +ozone.acl.authorizer.class| org.apache.ranger.authorization.ozone.authorizer.OzoneNativeAuthorizer Ozone ACLs are a super set of Posix and S3 ACLs. diff --git a/hadoop-hdds/docs/content/security/SecurityAcls.zh.md b/hadoop-hdds/docs/content/security/SecurityAcls.zh.md index e0b0e88911a1..0d2661ceb9f1 100644 --- a/hadoop-hdds/docs/content/security/SecurityAcls.zh.md +++ b/hadoop-hdds/docs/content/security/SecurityAcls.zh.md @@ -25,7 +25,12 @@ icon: transfer limitations under the License. --> -Ozone 既支持原生的 ACL,也支持类似 Ranger 这样的 ACL 插件,如果启用了 Ranger 插件,则以 Ranger 中的 ACL 为准。 +Ozone 既支持类似 Ranger 这样的 ACL 插件,也支持原生的 ACL。如果需要启用原生的 ACL,在 ozone-site.xml 中添加下面的参数: + +Property|Value +--------|------------------------------------------------------------ +ozone.acl.enabled | true +ozone.acl.authorizer.class| org.apache.ranger.authorization.ozone.authorizer.OzoneNativeAuthorizer Ozone 的 ACL 是 Posix ACL 和 S3 ACL 的超集。 diff --git a/hadoop-hdds/docs/content/security/SecurityWithRanger.md b/hadoop-hdds/docs/content/security/SecurityWithRanger.md index 9428f93ec06f..779183f8284a 100644 --- a/hadoop-hdds/docs/content/security/SecurityWithRanger.md +++ b/hadoop-hdds/docs/content/security/SecurityWithRanger.md @@ -47,7 +47,7 @@ ozone.acl.enabled | true ozone.acl.authorizer.class| org.apache.ranger.authorization.ozone.authorizer.RangerOzoneAuthorizer To use the RangerOzoneAuthorizer, you also need to add the following environment variables to ozone-env.sh: -``` +```bash export OZONE_CLASSPATH="${OZONE_HOME}/share/ozone/lib/libext/*" ``` * The location of the ranger-ozone-plugin jars depends on where the Ranger Plugin is installed. diff --git a/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md b/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md index 9fd0d033ecf6..ecd1f38bd58b 100644 --- a/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md +++ b/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md @@ -38,7 +38,7 @@ ozone.acl.enabled | true ozone.acl.authorizer.class| org.apache.ranger.authorization.ozone.authorizer.RangerOzoneAuthorizer 为了使用 RangerOzoneAuthorizer,还需要在 ozone-env.sh 中增加下面环境变量: -``` +```bash export OZONE_CLASSPATH="${OZONE_HOME}/share/ozone/lib/libext/*" ``` * ranger-ozone-plugin jars 具体路径取决于 Ranger Ozone plugin 安装配置。 diff --git a/hadoop-hdds/docs/pom.xml b/hadoop-hdds/docs/pom.xml index 9a6a33066d10..7af99668c794 100644 --- a/hadoop-hdds/docs/pom.xml +++ b/hadoop-hdds/docs/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-docs - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone/HDDS Documentation Apache Ozone/HDDS Documentation jar diff --git a/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/header.html b/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/header.html index 8f475b61f8d1..eec45e7cc35b 100644 --- a/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/header.html +++ b/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/header.html @@ -31,4 +31,26 @@ + + + + diff --git a/hadoop-hdds/erasurecode/pom.xml b/hadoop-hdds/erasurecode/pom.xml index ca2e3cb87c7d..df282a0a55a6 100644 --- a/hadoop-hdds/erasurecode/pom.xml +++ b/hadoop-hdds/erasurecode/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-erasurecode - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Earsurecode utils Apache Ozone HDDS Erasurecode diff --git a/hadoop-hdds/framework/dev-support/findbugsExcludeFile.xml b/hadoop-hdds/framework/dev-support/findbugsExcludeFile.xml index 6251188ecc17..a111802321e3 100644 --- a/hadoop-hdds/framework/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdds/framework/dev-support/findbugsExcludeFile.xml @@ -21,10 +21,6 @@ - - - - diff --git a/hadoop-hdds/framework/pom.xml b/hadoop-hdds/framework/pom.xml index 270d68fddde1..49cd738f54eb 100644 --- a/hadoop-hdds/framework/pom.xml +++ b/hadoop-hdds/framework/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-server-framework - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Server Framework Apache Ozone HDDS Server Framework diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/conf/DatanodeRatisServerConfig.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/conf/DatanodeRatisServerConfig.java index 25ed4776b7d8..058932e76902 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/conf/DatanodeRatisServerConfig.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/conf/DatanodeRatisServerConfig.java @@ -23,6 +23,7 @@ import java.time.Duration; import static org.apache.hadoop.hdds.conf.ConfigTag.DATANODE; +import static org.apache.hadoop.hdds.conf.ConfigTag.DATASTREAM; import static org.apache.hadoop.hdds.conf.ConfigTag.OZONE; import static org.apache.hadoop.hdds.conf.ConfigTag.PERFORMANCE; import static org.apache.hadoop.hdds.conf.ConfigTag.RATIS; @@ -123,6 +124,40 @@ public void setLeaderNumPendingRequests(int leaderNumPendingRequests) { this.leaderNumPendingRequests = leaderNumPendingRequests; } + @Config(key = "datastream.request.threads", + defaultValue = "20", + type = ConfigType.INT, + tags = {OZONE, DATANODE, RATIS, DATASTREAM}, + description = "Maximum number of threads in the thread pool for " + + "datastream request." + ) + private int streamRequestThreads; + + public int getStreamRequestThreads() { + return streamRequestThreads; + } + + public void setStreamRequestThreads(int streamRequestThreads) { + this.streamRequestThreads = streamRequestThreads; + } + + @Config(key = "datastream.client.pool.size", + defaultValue = "10", + type = ConfigType.INT, + tags = {OZONE, DATANODE, RATIS, DATASTREAM}, + description = "Maximum number of client proxy in NettyServerStreamRpc " + + "for datastream write." + ) + private int clientPoolSize; + + public int getClientPoolSize() { + return clientPoolSize; + } + + public void setClientPoolSize(int clientPoolSize) { + this.clientPoolSize = clientPoolSize; + } + @Config(key = "delete.ratis.log.directory", defaultValue = "true", type = ConfigType.BOOLEAN, diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 488d970cf2bb..c973eb980930 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -1018,6 +1018,19 @@ public long getContainerCount() throws IOException { return response.getContainerCount(); } + @Override + public long getContainerCount(HddsProtos.LifeCycleState state) + throws IOException { + GetContainerCountRequestProto request = + GetContainerCountRequestProto.newBuilder().build(); + + GetContainerCountResponseProto response = + submitRequest(Type.GetClosedContainerCount, + builder -> builder.setGetContainerCountRequest(request)) + .getGetContainerCountResponse(); + return response.getContainerCount(); + } + @Override public Object getUnderlyingProxyObject() { return rpcProxy; @@ -1027,4 +1040,11 @@ public Object getUnderlyingProxyObject() { public void close() { RPC.stopProxy(rpcProxy); } + + @Override + public List getListOfContainers( + long startContainerID, int count, HddsProtos.LifeCycleState state) + throws IOException { + return listContainer(startContainerID, count, state); + } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/KeyStoresFactory.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/KeyStoresFactory.java index fb5ac50da4cf..8ad7625b69d9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/KeyStoresFactory.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/KeyStoresFactory.java @@ -17,9 +17,8 @@ */ package org.apache.hadoop.hdds.security.ssl; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.security.ssl.SSLFactory; +import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.hdds.annotation.InterfaceStability; import javax.net.ssl.KeyManager; import javax.net.ssl.TrustManager; @@ -34,6 +33,11 @@ @InterfaceStability.Evolving public interface KeyStoresFactory { + /** + * Factory mode. + */ + enum Mode { CLIENT, SERVER } + /** * Initializes the keystores of the factory. * @@ -45,8 +49,8 @@ public interface KeyStoresFactory { * @throws GeneralSecurityException thrown if the keystores could not be * initialized due to an security error. */ - void init(SSLFactory.Mode mode, boolean requireClientAuth) - throws IOException, GeneralSecurityException; + void init(Mode mode, boolean requireClientAuth) throws IOException, + GeneralSecurityException; /** * Releases any resources being used. diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/MonitoringTimerTask.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/MonitoringTimerTask.java index ca3143795c81..392b74195803 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/MonitoringTimerTask.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/MonitoringTimerTask.java @@ -17,10 +17,9 @@ */ package org.apache.hadoop.hdds.security.ssl; -import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,7 +32,6 @@ * can optionally also be specified in the constructor, otherwise any * exception occurring during process will be logged using this class' logger. */ -@InterfaceAudience.Private public class MonitoringTimerTask extends TimerTask { static final Logger LOG = LoggerFactory.getLogger(MonitoringTimerTask.class); @@ -66,15 +64,13 @@ public MonitoringTimerTask(CertificateClient caClient, @Override public void run() { - if (caClient.isCertificateRenewed()) { - try { - onReload.accept(caClient); - } catch (Throwable t) { - if (onReloadFailure != null) { - onReloadFailure.accept(t); - } else { - LOG.error(PROCESS_ERROR_MESSAGE, t); - } + try { + onReload.accept(caClient); + } catch (Throwable t) { + if (onReloadFailure != null) { + onReloadFailure.accept(t); + } else { + LOG.error(PROCESS_ERROR_MESSAGE, t); } } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/PemFileBasedKeyStoresFactory.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/PemFileBasedKeyStoresFactory.java index d63bdcd0239e..1a8b33f403b4 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/PemFileBasedKeyStoresFactory.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/PemFileBasedKeyStoresFactory.java @@ -18,11 +18,10 @@ package org.apache.hadoop.hdds.security.ssl; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.security.ssl.SSLFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,7 +77,7 @@ public PemFileBasedKeyStoresFactory(SecurityConfig securityConfig, * to reload truststore. * @param mode client or server */ - private void createTrustManagers(SSLFactory.Mode mode) throws + private void createTrustManagers(Mode mode) throws GeneralSecurityException, IOException { long truststoreReloadInterval = secConfig.getSslTruststoreReloadInterval(); LOG.info(mode.toString() + " TrustStore reloading at " + @@ -105,7 +104,7 @@ private void createTrustManagers(SSLFactory.Mode mode) throws * to reload keystores. * @param mode client or server */ - private void createKeyManagers(SSLFactory.Mode mode) throws + private void createKeyManagers(Mode mode) throws GeneralSecurityException, IOException { long keystoreReloadInterval = secConfig.getSslKeystoreReloadInterval(); LOG.info(mode.toString() + " KeyStore reloading at " + @@ -139,20 +138,20 @@ private void createKeyManagers(SSLFactory.Mode mode) throws * @throws GeneralSecurityException thrown if the keystores could not be * initialized due to a security error. */ - public synchronized void init(SSLFactory.Mode mode, boolean requireClientAuth) + public synchronized void init(Mode mode, boolean requireClientAuth) throws IOException, GeneralSecurityException { monitoringTimer = new Timer(caClient.getComponentName() + "-" + SSL_MONITORING_THREAD_NAME, true); // key manager - if (requireClientAuth || mode == SSLFactory.Mode.SERVER) { + if (requireClientAuth || mode == Mode.SERVER) { createKeyManagers(mode); } else { KeyStore keystore = KeyStore.getInstance(DEFAULT_KEYSTORE_TYPE); keystore.load(null, null); KeyManagerFactory keyMgrFactory = KeyManagerFactory - .getInstance(SSLFactory.SSLCERTIFICATE); + .getInstance(KeyManagerFactory.getDefaultAlgorithm()); keyMgrFactory.init(keystore, null); keyManagers = keyMgrFactory.getKeyManagers(); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java index 6d84ebbd985f..ee2805cb1cff 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java @@ -17,10 +17,9 @@ */ package org.apache.hadoop.hdds.security.ssl; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.security.ssl.SSLFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -163,7 +162,7 @@ private X509ExtendedKeyManager loadKeyManager(CertificateClient caClient) privateKey, EMPTY_PASSWORD, new Certificate[]{cert}); KeyManagerFactory keyMgrFactory = KeyManagerFactory.getInstance( - SSLFactory.SSLCERTIFICATE); + KeyManagerFactory.getDefaultAlgorithm()); keyMgrFactory.init(keystore, EMPTY_PASSWORD); for (KeyManager candidate: keyMgrFactory.getKeyManagers()) { if (candidate instanceof X509ExtendedKeyManager) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java index e209bdc56fbb..5252c278ddb7 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java @@ -17,10 +17,9 @@ */ package org.apache.hadoop.hdds.security.ssl; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.security.ssl.SSLFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -137,8 +136,8 @@ X509TrustManager loadTrustManager(CertificateClient caClient) ks.load(null, null); ks.setCertificateEntry(certId, cert); - TrustManagerFactory trustManagerFactory = - TrustManagerFactory.getInstance(SSLFactory.SSLCERTIFICATE); + TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm()); trustManagerFactory.init(ks); TrustManager[] trustManagers = trustManagerFactory.getTrustManagers(); for (TrustManager trustManager1 : trustManagers) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java index 454ac6c2f4f4..5abd72cabc0c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java @@ -49,6 +49,8 @@ import java.security.KeyPair; import java.security.NoSuchAlgorithmException; import java.security.NoSuchProviderException; +import java.security.PrivateKey; +import java.security.PublicKey; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.security.spec.InvalidKeySpecException; @@ -252,7 +254,6 @@ public Future requestCertificate( LOG.error("Certificate storage failed, retrying one more time.", e); xcert = signAndStoreCertificate(beginDate, endDate, csr, role); } - xcertHolder.complete(xcert); break; default: @@ -474,19 +475,7 @@ Consumer processVerificationStatus( break; case INITIALIZE: if (type == CAType.SELF_SIGNED_CA) { - consumer = (arg) -> { - try { - generateSelfSignedCA(arg); - } catch (NoSuchProviderException | NoSuchAlgorithmException - | IOException e) { - LOG.error("Unable to initialize CertificateServer.", e); - } - VerificationStatus newStatus = verifySelfSignedCA(arg); - if (newStatus != VerificationStatus.SUCCESS) { - LOG.error("Unable to initialize CertificateServer, failed in " + - "verification."); - } - }; + consumer = this::initRootCa; } else if (type == CAType.INTERMEDIARY_CA) { // For sub CA certificates are generated during bootstrap/init. If // both keys/certs are missing, init/bootstrap is missed to be @@ -506,6 +495,29 @@ Consumer processVerificationStatus( return consumer; } + private void initRootCa(SecurityConfig securityConfig) { + if (isExternalCaSpecified(securityConfig)) { + initWithExternalRootCa(securityConfig); + } else { + try { + generateSelfSignedCA(securityConfig); + } catch (NoSuchProviderException | NoSuchAlgorithmException + | IOException e) { + LOG.error("Unable to initialize CertificateServer.", e); + } + } + VerificationStatus newStatus = verifySelfSignedCA(securityConfig); + if (newStatus != VerificationStatus.SUCCESS) { + LOG.error("Unable to initialize CertificateServer, failed in " + + "verification."); + } + } + + private boolean isExternalCaSpecified(SecurityConfig conf) { + return !conf.getExternalRootCaCert().isEmpty() && + !conf.getExternalRootCaPrivateKeyPath().isEmpty(); + } + /** * Generates a KeyPair for the Certificate. * @@ -529,12 +541,13 @@ private KeyPair generateKeys(SecurityConfig securityConfig) * Generates a self-signed Root Certificate for CA. * * @param securityConfig - SecurityConfig - * @param key - KeyPair. + * @param key - KeyPair. * @throws IOException - on Error. * @throws SCMSecurityException - on Error. */ - private void generateRootCertificate(SecurityConfig securityConfig, - KeyPair key) throws IOException, SCMSecurityException { + private void generateRootCertificate( + SecurityConfig securityConfig, KeyPair key) + throws IOException, SCMSecurityException { Preconditions.checkNotNull(this.config); LocalDateTime beginDate = LocalDateTime.of(LocalDate.now(), LocalTime.MIDNIGHT); @@ -563,7 +576,7 @@ private void generateRootCertificate(SecurityConfig securityConfig, } catch (IOException e) { throw new org.apache.hadoop.hdds.security.x509 .exceptions.CertificateException( - "Error while adding ip to CA self signed certificate", e, + "Error while adding ip to CA self signed certificate", e, CSR_ERROR); } X509CertificateHolder selfSignedCertificate = builder.build(); @@ -573,6 +586,65 @@ private void generateRootCertificate(SecurityConfig securityConfig, certCodec.writeCertificate(selfSignedCertificate); } + private void initWithExternalRootCa(SecurityConfig conf) { + String externalRootCaLocation = conf.getExternalRootCaCert(); + Path extCertPath = Paths.get(externalRootCaLocation); + Path extPrivateKeyPath = Paths.get(conf.getExternalRootCaPrivateKeyPath()); + String externalPublicKeyLocation = conf.getExternalRootCaPublicKeyPath(); + + KeyCodec keyCodec = new KeyCodec(config, componentName); + CertificateCodec certificateCodec = + new CertificateCodec(config, componentName); + try { + Path extCertParent = extCertPath.getParent(); + Path extCertName = extCertPath.getFileName(); + if (extCertParent == null || extCertName == null) { + throw new IOException("External cert path is not correct: " + + extCertPath); + } + X509CertificateHolder certHolder = certificateCodec.readCertificate( + extCertParent, extCertName.toString()); + Path extPrivateKeyParent = extPrivateKeyPath.getParent(); + Path extPrivateKeyFileName = extPrivateKeyPath.getFileName(); + if (extPrivateKeyParent == null || extPrivateKeyFileName == null) { + throw new IOException("External private key path is not correct: " + + extPrivateKeyPath); + } + PrivateKey privateKey = keyCodec.readPrivateKey(extPrivateKeyParent, + extPrivateKeyFileName.toString()); + PublicKey publicKey; + publicKey = readPublicKeyWithExternalData( + externalPublicKeyLocation, keyCodec, certHolder); + keyCodec.writeKey(new KeyPair(publicKey, privateKey)); + certificateCodec.writeCertificate(certHolder); + } catch (IOException | CertificateException | NoSuchAlgorithmException | + InvalidKeySpecException e) { + LOG.error("External root CA certificate initialization failed", e); + } + } + + private PublicKey readPublicKeyWithExternalData( + String externalPublicKeyLocation, KeyCodec keyCodec, + X509CertificateHolder certHolder) + throws CertificateException, NoSuchAlgorithmException, + InvalidKeySpecException, IOException { + PublicKey publicKey; + if (externalPublicKeyLocation.isEmpty()) { + publicKey = CertificateCodec.getX509Certificate(certHolder) + .getPublicKey(); + } else { + Path publicKeyPath = Paths.get(externalPublicKeyLocation); + Path publicKeyPathFileName = publicKeyPath.getFileName(); + Path publicKeyParent = publicKeyPath.getParent(); + if (publicKeyPathFileName == null || publicKeyParent == null) { + throw new IOException("Public key path incorrect: " + publicKeyParent); + } + publicKey = keyCodec.readPublicKey( + publicKeyParent, publicKeyPathFileName.toString()); + } + return publicKey; + } + /** * This represents the verification status of the CA. Based on this enum * appropriate action is taken in the Init. diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClient.java index e2863b94265b..641b26edb1a2 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CertificateClient.java @@ -20,12 +20,17 @@ package org.apache.hadoop.hdds.security.x509.certificate.client; import org.apache.hadoop.hdds.security.OzoneSecurityException; +import org.apache.hadoop.hdds.security.ssl.KeyStoresFactory; import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.crl.CRLInfo; import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; +import java.io.Closeable; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Path; +import java.security.KeyPair; import java.security.PrivateKey; import java.security.PublicKey; import java.security.cert.CertStore; @@ -39,7 +44,7 @@ * Certificate client provides and interface to certificate operations that * needs to be performed by all clients in the Ozone eco-system. */ -public interface CertificateClient { +public interface CertificateClient extends Closeable { /** * Returns the private key of the specified component if it exists on the @@ -75,15 +80,6 @@ X509Certificate getCertificate(String certSerialId) */ X509Certificate getCertificate(); - /** - * Returns whether certificate of the specified component is renewed. - * - * @return true if it's renewed recently. - */ - default boolean isCertificateRenewed() { - return false; - } - /** * Return the latest CA certificate known to the client. * @return latest ca certificate known to the client. @@ -97,6 +93,12 @@ default boolean isCertificateRenewed() { */ boolean verifyCertificate(X509Certificate certificate); + /** + * Set the serial ID of default certificate for the specified component. + * @param certSerialId - certificate ID. + * */ + void setCertificateId(String certSerialId); + /** * Creates digital signature over the data stream using the components private * key. @@ -139,7 +141,35 @@ boolean verifySignature(byte[] data, byte[] signature, * * @return CertificateSignRequest.Builder */ - CertificateSignRequest.Builder getCSRBuilder() throws CertificateException; + CertificateSignRequest.Builder getCSRBuilder(KeyPair keyPair) + throws IOException; + + /** + * Returns a CSR builder that can be used to create a Certificate sigining + * request. + * + * @return CertificateSignRequest.Builder + */ + CertificateSignRequest.Builder getCSRBuilder() + throws CertificateException; + + /** + * Send request to SCM to sign the certificate and save certificates returned + * by SCM to PEM files on disk. + * + * @return the serial ID of the new certificate + */ + String signAndStoreCertificate(PKCS10CertificationRequest request, + Path certPath) throws CertificateException; + + /** + * Send request to SCM to sign the certificate and save certificates returned + * by SCM to PEM files on disk. + * + * @return the serial ID of the new certificate + */ + String signAndStoreCertificate(PKCS10CertificationRequest request) + throws CertificateException; /** * Get the certificate of well-known entity from SCM. @@ -323,4 +353,13 @@ default void assertValidKeysAndCertificate() throws OzoneSecurityException { */ boolean processCrl(CRLInfo crl); + /** + * Return the store factory for key manager and trust manager for server. + */ + KeyStoresFactory getServerKeyStoresFactory() throws CertificateException; + + /** + * Return the store factory for key manager and trust manager for client. + */ + KeyStoresFactory getClientKeyStoresFactory() throws CertificateException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CommonCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CommonCertificateClient.java index bb122955a526..32b2cbb32c01 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CommonCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/CommonCertificateClient.java @@ -22,6 +22,8 @@ import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; import org.slf4j.Logger; +import java.util.function.Consumer; + import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.FAILURE; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.GETCERT; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.RECOVER; @@ -31,13 +33,15 @@ /** * Common Certificate client. */ -public class CommonCertificateClient extends DefaultCertificateClient { +public abstract class CommonCertificateClient extends DefaultCertificateClient { private final Logger log; public CommonCertificateClient(SecurityConfig securityConfig, Logger log, - String certSerialId, String component) { - super(securityConfig, log, certSerialId, component); + String certSerialId, String component, + Consumer saveCertIdCallback, Runnable shutdownCallback) { + super(securityConfig, log, certSerialId, component, saveCertIdCallback, + shutdownCallback); this.log = log; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java index 40c5b0a7317a..613cca435546 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java @@ -19,12 +19,26 @@ package org.apache.hadoop.hdds.security.x509.certificate.client; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos; +import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; +import org.apache.hadoop.security.UserGroupInformation; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import java.io.IOException; +import java.net.InetAddress; +import java.nio.file.Path; +import java.security.KeyPair; +import java.util.function.Consumer; + +import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec.getX509Certificate; +import static org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString; +import static org.apache.hadoop.hdds.security.x509.exceptions.CertificateException.ErrorCode.CSR_ERROR; /** * Certificate client for DataNodes. @@ -35,14 +49,27 @@ public class DNCertificateClient extends DefaultCertificateClient { LoggerFactory.getLogger(DNCertificateClient.class); public static final String COMPONENT_NAME = "dn"; + private final DatanodeDetails dn; public DNCertificateClient(SecurityConfig securityConfig, - String certSerialId) { - super(securityConfig, LOG, certSerialId, COMPONENT_NAME); + DatanodeDetails datanodeDetails, String certSerialId, + Consumer saveCertId, Runnable shutdown) { + super(securityConfig, LOG, certSerialId, COMPONENT_NAME, + saveCertId, shutdown); + this.dn = datanodeDetails; } - public DNCertificateClient(SecurityConfig securityConfig) { - super(securityConfig, LOG, null, COMPONENT_NAME); + /** + * Returns a CSR builder that can be used to creates a Certificate signing + * request. + * The default flag is added to allow basic SSL handshake. + * + * @return CertificateSignRequest.Builder + */ + @Override + public CertificateSignRequest.Builder getCSRBuilder() + throws CertificateException { + return getCSRBuilder(new KeyPair(getPublicKey(), getPrivateKey())); } /** @@ -53,11 +80,67 @@ public DNCertificateClient(SecurityConfig securityConfig) { * @return CertificateSignRequest.Builder */ @Override - public CertificateSignRequest.Builder getCSRBuilder() + public CertificateSignRequest.Builder getCSRBuilder(KeyPair keyPair) throws CertificateException { - return super.getCSRBuilder() + CertificateSignRequest.Builder builder = super.getCSRBuilder() .setDigitalEncryption(true) .setDigitalSignature(true); + + try { + String hostname = InetAddress.getLocalHost().getCanonicalHostName(); + String subject = UserGroupInformation.getCurrentUser() + .getShortUserName() + "@" + hostname; + builder.setCA(false) + .setKey(keyPair) + .setConfiguration(getConfig()) + .setSubject(subject); + + LOG.info("Created csr for DN-> subject:{}", subject); + return builder; + } catch (Exception e) { + LOG.error("Failed to get hostname or current user", e); + throw new CertificateException("Failed to get hostname or current user", + e, CSR_ERROR); + } + } + + @Override + public String signAndStoreCertificate(PKCS10CertificationRequest csr, + Path certPath) throws CertificateException { + try { + // TODO: For SCM CA we should fetch certificate from multiple SCMs. + SCMSecurityProtocolProtos.SCMGetCertResponseProto response = + getScmSecureClient().getDataNodeCertificateChain( + dn.getProtoBufMessage(), getEncodedString(csr)); + + // Persist certificates. + if (response.hasX509CACertificate()) { + String pemEncodedCert = response.getX509Certificate(); + CertificateCodec certCodec = new CertificateCodec( + getSecurityConfig(), certPath); + // Certs will be added to cert map after reloadAllCertificate called + storeCertificate(pemEncodedCert, true, false, false, certCodec, false); + storeCertificate(response.getX509CACertificate(), true, true, + false, certCodec, false); + + // Store Root CA certificate. + if (response.hasX509RootCACertificate()) { + storeCertificate(response.getX509RootCACertificate(), true, false, + true, certCodec, false); + } + // Return the default certificate ID + String dnCertSerialId = getX509Certificate(pemEncodedCert). + getSerialNumber().toString(); + return dnCertSerialId; + } else { + throw new CertificateException("Unable to retrieve datanode " + + "certificate chain."); + } + } catch (IOException | java.security.cert.CertificateException e) { + LOG.error("Error while signing and storing SCM signed certificate.", e); + throw new CertificateException( + "Error while signing and storing SCM signed certificate.", e); + } } @Override diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index a55db9a427c4..8647c324f54d 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -26,6 +26,8 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; import java.security.InvalidKeyException; import java.security.KeyPair; import java.security.NoSuchAlgorithmException; @@ -38,19 +40,30 @@ import java.security.cert.CertStore; import java.security.cert.X509Certificate; import java.security.spec.InvalidKeySpecException; +import java.time.Duration; import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; import java.util.ArrayList; +import java.util.Date; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Random; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Consumer; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.SCMSecurityProtocol; +import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; +import org.apache.hadoop.hdds.security.ssl.KeyStoresFactory; import org.apache.hadoop.hdds.security.x509.crl.CRLInfo; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; @@ -58,6 +71,7 @@ import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; +import org.apache.hadoop.hdds.security.x509.keys.SecurityUtil; import org.apache.hadoop.ozone.OzoneSecurityUtil; import com.google.common.base.Preconditions; @@ -66,6 +80,8 @@ import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.validator.routines.DomainValidator; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.FAILURE; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.GETCERT; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.REINIT; @@ -75,10 +91,12 @@ import static org.apache.hadoop.hdds.security.x509.exceptions.CertificateException.ErrorCode.CRYPTO_SIGNATURE_VERIFICATION_ERROR; import static org.apache.hadoop.hdds.security.x509.exceptions.CertificateException.ErrorCode.CRYPTO_SIGN_ERROR; import static org.apache.hadoop.hdds.security.x509.exceptions.CertificateException.ErrorCode.CSR_ERROR; -import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmSecurityClient; +import static org.apache.hadoop.hdds.security.x509.exceptions.CertificateException.ErrorCode.RENEW_ERROR; +import static org.apache.hadoop.hdds.security.x509.exceptions.CertificateException.ErrorCode.ROLLBACK_ERROR; import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmSecurityClientWithMaxRetry; import org.bouncycastle.cert.X509CertificateHolder; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; /** @@ -108,10 +126,26 @@ public abstract class DefaultCertificateClient implements CertificateClient { private long localCrlId; private String component; private List pemEncodedCACerts = null; - private final Lock lock; + private KeyStoresFactory serverKeyStoresFactory; + private KeyStoresFactory clientKeyStoresFactory; + + // Lock to protect the certificate renew process, to make sure there is only + // one renew process is ongoing at one time. + // Certificate renew steps: + // 1. generate new keys and sign new certificate, persist all data to disk + // 2. switch on disk new keys and certificate with current ones + // 3. save new certificate ID into service VERSION file + // 4. refresh in memory certificate ID and reload all new certificates + private Lock renewLock = new ReentrantLock(); + + private ScheduledExecutorService executorService; + private Consumer certIdSaveCallback; + private Runnable shutdownCallback; + private SCMSecurityProtocolClientSideTranslatorPB scmSecurityProtocolClient; DefaultCertificateClient(SecurityConfig securityConfig, Logger log, - String certSerialId, String component) { + String certSerialId, String component, + Consumer saveCertId, Runnable shutdown) { Objects.requireNonNull(securityConfig); this.securityConfig = securityConfig; keyCodec = new KeyCodec(securityConfig, component); @@ -119,15 +153,24 @@ public abstract class DefaultCertificateClient implements CertificateClient { this.certificateMap = new ConcurrentHashMap<>(); this.certSerialId = certSerialId; this.component = component; - lock = new ReentrantLock(); + this.certIdSaveCallback = saveCertId; + this.shutdownCallback = shutdown; loadAllCertificates(); } + public synchronized void setCertificateId(String certId) { + Preconditions.checkArgument(certSerialId == null, + "certSerialId should only be set once if not renew"); + this.certSerialId = certId; + // reload all new certs + loadAllCertificates(); + } + /** * Load all certificates from configured location. * */ - private void loadAllCertificates() { + private synchronized void loadAllCertificates() { // See if certs directory exists in file system. Path certPath = securityConfig.getCertificateLocation(component); if (Files.exists(certPath) && Files.isDirectory(certPath)) { @@ -172,7 +215,7 @@ private void loadAllCertificates() { latestRootCaCertSerialId = tmpRootCaCertSerailId; } } - getLogger().info("Added certificate from file:{}.", + getLogger().info("Added certificate {} from file:{}.", cert, file.getAbsolutePath()); } else { getLogger().error("Error reading certificate from file:{}", @@ -190,6 +233,15 @@ private void loadAllCertificates() { if (latestRootCaCertSerialId != -1) { rootCaCertId = Long.toString(latestRootCaCertSerialId); } + + if (x509Certificate != null) { + if (executorService == null) { + startCertificateMonitor(); + } + } else { + getLogger().warn("CertificateLifetimeMonitor is not started this " + + "time because certificate is empty."); + } } } } @@ -201,7 +253,7 @@ private void loadAllCertificates() { * @return private key or Null if there is no data. */ @Override - public PrivateKey getPrivateKey() { + public synchronized PrivateKey getPrivateKey() { if (privateKey != null) { return privateKey; } @@ -249,7 +301,7 @@ public PublicKey getPublicKey() { * @return certificate or Null if there is no data. */ @Override - public X509Certificate getCertificate() { + public synchronized X509Certificate getCertificate() { if (x509Certificate != null) { return x509Certificate; } @@ -272,7 +324,7 @@ public X509Certificate getCertificate() { * @return latest ca certificate known to the client. */ @Override - public X509Certificate getCACertificate() { + public synchronized X509Certificate getCACertificate() { if (caCertId != null) { return certificateMap.get(caCertId); } @@ -287,7 +339,7 @@ public X509Certificate getCACertificate() { * @return certificate or Null if there is no data. */ @Override - public X509Certificate getCertificate(String certId) + public synchronized X509Certificate getCertificate(String certId) throws CertificateException { // Check if it is in cache. if (certificateMap.containsKey(certId)) { @@ -300,9 +352,7 @@ public X509Certificate getCertificate(String certId) @Override public List getCrls(List crlIds) throws IOException { try { - SCMSecurityProtocol scmSecurityProtocolClient = getScmSecurityClient( - securityConfig.getConfiguration()); - return scmSecurityProtocolClient.getCrls(crlIds); + return getScmSecureClient().getCrls(crlIds); } catch (Exception e) { getLogger().error("Error while getting CRL with " + "CRL ids:{} from scm.", crlIds, e); @@ -314,9 +364,7 @@ public List getCrls(List crlIds) throws IOException { @Override public long getLatestCrlId() throws IOException { try { - SCMSecurityProtocol scmSecurityProtocolClient = getScmSecurityClient( - securityConfig.getConfiguration()); - return scmSecurityProtocolClient.getLatestCrlId(); + return getScmSecureClient().getLatestCrlId(); } catch (Exception e) { getLogger().error("Error while getting latest CRL id from scm.", e); throw new CertificateException("Error while getting latest CRL id from" + @@ -335,11 +383,7 @@ private X509Certificate getCertificateFromScm(String certId) getLogger().info("Getting certificate with certSerialId:{}.", certId); try { - SCMSecurityProtocol scmSecurityProtocolClient = - getScmSecurityClientWithMaxRetry( - (OzoneConfiguration) securityConfig.getConfiguration()); - String pemEncodedCert = - scmSecurityProtocolClient.getCertificate(certId); + String pemEncodedCert = getScmSecureClient().getCertificate(certId); this.storeCertificate(pemEncodedCert, true); return CertificateCodec.getX509Certificate(pemEncodedCert); } catch (Exception e) { @@ -579,22 +623,33 @@ public void storeCertificate(String pemEncodedCert, boolean force, boolean caCert) throws CertificateException { CertificateCodec certificateCodec = new CertificateCodec(securityConfig, component); - try { - Path basePath = securityConfig.getCertificateLocation(component); + storeCertificate(pemEncodedCert, force, caCert, false, + certificateCodec, true); + } + public synchronized void storeCertificate(String pemEncodedCert, + boolean force, boolean isCaCert, boolean isRootCaCert, + CertificateCodec codec, boolean addToCertMap) + throws CertificateException { + try { X509Certificate cert = CertificateCodec.getX509Certificate(pemEncodedCert); String certName = String.format(CERT_FILE_NAME_FORMAT, cert.getSerialNumber().toString()); - if (caCert) { + if (isCaCert) { certName = CA_CERT_PREFIX + certName; caCertId = cert.getSerialNumber().toString(); + } else if (isRootCaCert) { + certName = ROOT_CA_CERT_PREFIX + certName; + rootCaCertId = cert.getSerialNumber().toString(); } - certificateCodec.writeCertificate(basePath, certName, + codec.writeCertificate(codec.getLocation(), certName, pemEncodedCert, force); - certificateMap.putIfAbsent(cert.getSerialNumber().toString(), cert); + if (addToCertMap) { + certificateMap.putIfAbsent(cert.getSerialNumber().toString(), cert); + } } catch (IOException | java.security.cert.CertificateException e) { throw new CertificateException("Error while storing certificate.", e, CERTIFICATE_ERROR); @@ -608,7 +663,7 @@ public void storeCertificate(String pemEncodedCert, boolean force, * @throws CertificateException - on Error. */ @Override - public synchronized void storeTrustChain(CertStore ks) + public void storeTrustChain(CertStore ks) throws CertificateException { throw new UnsupportedOperationException("Operation not supported."); } @@ -621,7 +676,7 @@ public synchronized void storeTrustChain(CertStore ks) * @throws CertificateException - on Error. */ @Override - public synchronized void storeTrustChain(List certificates) + public void storeTrustChain(List certificates) throws CertificateException { throw new UnsupportedOperationException("Operation not supported."); } @@ -895,18 +950,18 @@ protected void bootstrapClientKeys() throws CertificateException { "for certificate storage.", BOOTSTRAP_ERROR); } } - KeyPair keyPair = createKeyPair(); + KeyPair keyPair = createKeyPair(keyCodec); privateKey = keyPair.getPrivate(); publicKey = keyPair.getPublic(); } - protected KeyPair createKeyPair() throws CertificateException { + protected KeyPair createKeyPair(KeyCodec codec) throws CertificateException { HDDSKeyGenerator keyGenerator = new HDDSKeyGenerator(securityConfig); KeyPair keyPair = null; try { keyPair = keyGenerator.generateKey(); - keyCodec.writePublicKey(keyPair.getPublic()); - keyCodec.writePrivateKey(keyPair.getPrivate()); + codec.writePublicKey(keyPair.getPublic()); + codec.writePrivateKey(keyPair.getPrivate()); } catch (NoSuchProviderException | NoSuchAlgorithmException | IOException e) { getLogger().error("Error while bootstrapping certificate client.", e); @@ -925,7 +980,7 @@ public String getComponentName() { } @Override - public X509Certificate getRootCACertificate() { + public synchronized X509Certificate getRootCACertificate() { if (rootCaCertId != null) { return certificateMap.get(rootCaCertId); } @@ -937,65 +992,32 @@ public void storeRootCACertificate(String pemEncodedCert, boolean force) throws CertificateException { CertificateCodec certificateCodec = new CertificateCodec(securityConfig, component); - try { - Path basePath = securityConfig.getCertificateLocation(component); - - X509Certificate cert = - CertificateCodec.getX509Certificate(pemEncodedCert); - String certName = String.format(CERT_FILE_NAME_FORMAT, - cert.getSerialNumber().toString()); - - certName = ROOT_CA_CERT_PREFIX + certName; - rootCaCertId = cert.getSerialNumber().toString(); - - certificateCodec.writeCertificate(basePath, certName, - pemEncodedCert, force); - certificateMap.putIfAbsent(cert.getSerialNumber().toString(), cert); - } catch (IOException | java.security.cert.CertificateException e) { - throw new CertificateException("Error while storing Root CA " + - "certificate.", e, CERTIFICATE_ERROR); - } + storeCertificate(pemEncodedCert, force, false, true, + certificateCodec, true); } @Override - public List getCAList() { - lock.lock(); - try { - return pemEncodedCACerts; - } finally { - lock.unlock(); - } + public synchronized List getCAList() { + return pemEncodedCACerts; } @Override - public List listCA() throws IOException { - lock.lock(); - try { - if (pemEncodedCACerts == null) { - updateCAList(); - } - return pemEncodedCACerts; - } finally { - lock.unlock(); + public synchronized List listCA() throws IOException { + if (pemEncodedCACerts == null) { + updateCAList(); } + return pemEncodedCACerts; } @Override - public List updateCAList() throws IOException { - lock.lock(); + public synchronized List updateCAList() throws IOException { try { - SCMSecurityProtocol scmSecurityProtocolClient = - getScmSecurityClientWithMaxRetry( - (OzoneConfiguration) securityConfig.getConfiguration()); - pemEncodedCACerts = - scmSecurityProtocolClient.listCACertificate(); + pemEncodedCACerts = getScmSecureClient().listCACertificate(); return pemEncodedCACerts; } catch (Exception e) { getLogger().error("Error during updating CA list", e); throw new CertificateException("Error during updating CA list", e, CERTIFICATE_ERROR); - } finally { - lock.unlock(); } } @@ -1009,42 +1031,37 @@ public boolean processCrl(CRLInfo crl) { return reinitCert; } - - private boolean removeCertificates(List certIds) { - lock.lock(); + private synchronized boolean removeCertificates(List certIds) { boolean reInitCert = false; - try { - // For now, remove self cert and ca cert is not implemented - // both requires a restart of the service. - if ((certSerialId != null && certIds.contains(certSerialId)) || - (caCertId != null && certIds.contains(caCertId)) || - (rootCaCertId != null && certIds.contains(rootCaCertId))) { - reInitCert = true; - } - Path basePath = securityConfig.getCertificateLocation(component); - for (String certId : certIds) { - if (certificateMap.containsKey(certId)) { - // remove on disk - String certName = String.format(CERT_FILE_NAME_FORMAT, certId); - - if (certId.equals(caCertId)) { - certName = CA_CERT_PREFIX + certName; - } + // For now, remove self cert and ca cert is not implemented + // both requires a restart of the service. + if ((certSerialId != null && certIds.contains(certSerialId)) || + (caCertId != null && certIds.contains(caCertId)) || + (rootCaCertId != null && certIds.contains(rootCaCertId))) { + reInitCert = true; + } - if (certId.equals(rootCaCertId)) { - certName = ROOT_CA_CERT_PREFIX + certName; - } + Path basePath = securityConfig.getCertificateLocation(component); + for (String certId : certIds) { + if (certificateMap.containsKey(certId)) { + // remove on disk + String certName = String.format(CERT_FILE_NAME_FORMAT, certId); - FileUtils.deleteQuietly(basePath.resolve(certName).toFile()); - // remove in memory - certificateMap.remove(certId); + if (certId.equals(caCertId)) { + certName = CA_CERT_PREFIX + certName; + } - // TODO: reset certSerialId, caCertId or rootCaCertId + if (certId.equals(rootCaCertId)) { + certName = ROOT_CA_CERT_PREFIX + certName; } + + FileUtils.deleteQuietly(basePath.resolve(certName).toFile()); + // remove in memory + certificateMap.remove(certId); + + // TODO: reset certSerialId, caCertId or rootCaCertId } - } finally { - lock.unlock(); } return reInitCert; } @@ -1060,4 +1077,367 @@ public long getLocalCrlId() { public void setLocalCrlId(long crlId) { this.localCrlId = crlId; } + + @Override + public synchronized KeyStoresFactory getServerKeyStoresFactory() + throws CertificateException { + if (serverKeyStoresFactory == null) { + serverKeyStoresFactory = SecurityUtil.getServerKeyStoresFactory( + securityConfig, this, true); + } + return serverKeyStoresFactory; + } + + @Override + public KeyStoresFactory getClientKeyStoresFactory() + throws CertificateException { + if (clientKeyStoresFactory == null) { + clientKeyStoresFactory = SecurityUtil.getClientKeyStoresFactory( + securityConfig, this, true); + } + return clientKeyStoresFactory; + } + + @Override + public synchronized void close() throws IOException { + if (executorService != null) { + executorService.shutdown(); + } + + if (serverKeyStoresFactory != null) { + serverKeyStoresFactory.destroy(); + } + + if (clientKeyStoresFactory != null) { + clientKeyStoresFactory.destroy(); + } + } + + /** + * Check how much time before certificate will enter expiry grace period. + * @return Duration, time before certificate enters the grace + * period defined by "hdds.x509.renew.grace.duration" + */ + public Duration timeBeforeExpiryGracePeriod(X509Certificate certificate) { + Duration gracePeriod = securityConfig.getRenewalGracePeriod(); + Date expireDate = certificate.getNotAfter(); + LocalDateTime gracePeriodStart = expireDate.toInstant() + .atZone(ZoneId.systemDefault()).toLocalDateTime().minus(gracePeriod); + LocalDateTime currentTime = LocalDateTime.now(); + if (gracePeriodStart.isBefore(currentTime)) { + // Cert is already in grace period time. + return Duration.ZERO; + } else { + return Duration.between(currentTime, gracePeriodStart); + } + } + + /** + * Renew keys and certificate. Save the keys are certificate to disk in new + * directories, swap the current key directory and certs directory with the + * new directories. + * @param force, check certificate expiry time again if force is false. + * @return String, new certificate ID + * */ + public String renewAndStoreKeyAndCertificate(boolean force) + throws CertificateException { + if (!force) { + synchronized (this) { + Preconditions.checkArgument( + timeBeforeExpiryGracePeriod(x509Certificate).isZero()); + } + } + + String newKeyPath = securityConfig.getKeyLocation(component) + .toString() + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; + String newCertPath = securityConfig.getCertificateLocation(component) + .toString() + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; + File newKeyDir = new File(newKeyPath); + File newCertDir = new File(newCertPath); + try { + FileUtils.deleteDirectory(newKeyDir); + FileUtils.deleteDirectory(newCertDir); + Files.createDirectories(newKeyDir.toPath()); + Files.createDirectories(newCertDir.toPath()); + } catch (IOException e) { + throw new CertificateException("Error while deleting/creating " + + newKeyPath + " or " + newCertPath + " directories to cleanup " + + " certificate storage. ", e, RENEW_ERROR); + } + + // Generate key + KeyCodec newKeyCodec = new KeyCodec(securityConfig, newKeyDir.toPath()); + KeyPair newKeyPair; + try { + newKeyPair = createKeyPair(newKeyCodec); + } catch (CertificateException e) { + throw new CertificateException("Error while creating new key pair.", + e, RENEW_ERROR); + } + + // Get certificate signed + String newCertSerialId; + try { + CertificateSignRequest.Builder csrBuilder = getCSRBuilder(newKeyPair); + newCertSerialId = signAndStoreCertificate(csrBuilder.build(), + Paths.get(newCertPath)); + } catch (Exception e) { + throw new CertificateException("Error while signing and storing new" + + " certificates.", e, RENEW_ERROR); + } + + // switch Key and Certs directory on disk + File currentKeyDir = new File( + securityConfig.getKeyLocation(component).toString()); + File currentCertDir = new File( + securityConfig.getCertificateLocation(component).toString()); + File backupKeyDir = new File( + securityConfig.getKeyLocation(component).toString() + + HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX); + File backupCertDir = new File( + securityConfig.getCertificateLocation(component).toString() + + HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX); + + try { + Files.move(currentKeyDir.toPath(), backupKeyDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + // Cannot move current key dir to the backup dir + throw new CertificateException("Failed to move " + + currentKeyDir.getAbsolutePath() + + " to " + backupKeyDir.getAbsolutePath() + " during " + + "certificate renew.", RENEW_ERROR); + } + + try { + Files.move(currentCertDir.toPath(), backupCertDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + // Cannot move current cert dir to the backup dir + rollbackBackupDir(currentKeyDir, currentCertDir, backupKeyDir, + backupCertDir); + throw new CertificateException("Failed to move " + + currentCertDir.getAbsolutePath() + + " to " + backupCertDir.getAbsolutePath() + " during " + + "certificate renew.", RENEW_ERROR); + } + + try { + Files.move(newKeyDir.toPath(), currentKeyDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + // Cannot move new dir as the current dir + String msg = "Failed to move " + newKeyDir.getAbsolutePath() + + " to " + currentKeyDir.getAbsolutePath() + + " during certificate renew."; + // rollback + rollbackBackupDir(currentKeyDir, currentCertDir, backupKeyDir, + backupCertDir); + throw new CertificateException(msg, RENEW_ERROR); + } + + try { + Files.move(newCertDir.toPath(), currentCertDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + // Cannot move new dir as the current dir + String msg = "Failed to move " + newCertDir.getAbsolutePath() + + " to " + currentCertDir.getAbsolutePath() + + " during certificate renew."; + // delete currentKeyDir which is moved from new key directory + try { + FileUtils.deleteDirectory(new File(currentKeyDir.toString())); + } catch (IOException e1) { + getLogger().error("Failed to delete current KeyDir {} which is moved " + + " from the newly generated KeyDir {}", currentKeyDir, newKeyDir, e); + throw new CertificateException(msg, RENEW_ERROR); + } + // rollback + rollbackBackupDir(currentKeyDir, currentCertDir, backupKeyDir, + backupCertDir); + throw new CertificateException(msg, RENEW_ERROR); + } + + getLogger().info("Successful renew key and certificate." + + " New certificate {}.", newCertSerialId); + return newCertSerialId; + } + + private void rollbackBackupDir(File currentKeyDir, File currentCertDir, + File backupKeyDir, File backupCertDir) throws CertificateException { + // move backup dir back as current dir + try { + Files.move(backupKeyDir.toPath(), currentKeyDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + String msg = "Failed to move " + backupKeyDir.getAbsolutePath() + + " back to " + currentKeyDir.getAbsolutePath() + + " during rollback."; + // Need a manual recover process. + throw new CertificateException(msg, ROLLBACK_ERROR); + } + + try { + Files.move(backupCertDir.toPath(), currentCertDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + String msg = "Failed to move " + backupCertDir.getAbsolutePath() + + " back to " + currentCertDir.getAbsolutePath() + + " during rollback."; + // Need a manual recover process. + throw new CertificateException(msg, ROLLBACK_ERROR); + } + + Preconditions.checkArgument(currentCertDir.exists()); + Preconditions.checkArgument(currentKeyDir.exists()); + } + + /** + * Delete old backup key and cert directory. + */ + public void cleanBackupDir() { + File backupKeyDir = new File( + securityConfig.getKeyLocation(component).toString() + + HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX); + File backupCertDir = new File( + securityConfig.getCertificateLocation(component).toString() + + HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX); + if (backupKeyDir.exists()) { + try { + FileUtils.deleteDirectory(backupKeyDir); + } catch (IOException e) { + getLogger().error("Error while deleting {} directories for " + + "certificate storage cleanup.", backupKeyDir, e); + } + } + if (backupCertDir.exists()) { + try { + FileUtils.deleteDirectory(backupCertDir); + } catch (IOException e) { + getLogger().error("Error while deleting {} directories for " + + "certificate storage cleanup.", backupCertDir, e); + } + } + } + + synchronized void reloadKeyAndCertificate(String newCertId) { + // reset current value + privateKey = null; + publicKey = null; + x509Certificate = null; + certSerialId = null; + caCertId = null; + rootCaCertId = null; + + setCertificateId(newCertId); + getLogger().info("Reset and reload key and all certificates."); + } + + public SecurityConfig getSecurityConfig() { + return securityConfig; + } + + public OzoneConfiguration getConfig() { + return (OzoneConfiguration)securityConfig.getConfiguration(); + } + + @Override + public abstract String signAndStoreCertificate( + PKCS10CertificationRequest request, Path certPath) + throws CertificateException; + + public String signAndStoreCertificate(PKCS10CertificationRequest request) + throws CertificateException { + return signAndStoreCertificate(request, + getSecurityConfig().getCertificateLocation(getComponentName())); + } + + @Override + public abstract CertificateSignRequest.Builder getCSRBuilder(KeyPair keyPair) + throws CertificateException; + + public SCMSecurityProtocolClientSideTranslatorPB getScmSecureClient() + throws IOException { + if (scmSecurityProtocolClient == null) { + scmSecurityProtocolClient = + getScmSecurityClientWithMaxRetry( + (OzoneConfiguration) securityConfig.getConfiguration()); + } + return scmSecurityProtocolClient; + } + + @VisibleForTesting + public void setSecureScmClient( + SCMSecurityProtocolClientSideTranslatorPB client) { + scmSecurityProtocolClient = client; + } + + public synchronized void startCertificateMonitor() { + Preconditions.checkNotNull(getCertificate(), + "Component certificate should not be empty"); + // Schedule task to refresh certificate before it expires + Duration gracePeriod = securityConfig.getRenewalGracePeriod(); + long timeBeforeGracePeriod = + timeBeforeExpiryGracePeriod(x509Certificate).toMillis(); + // At least three chances to renew the certificate before it expires + long interval = + Math.min(gracePeriod.toMillis() / 3, TimeUnit.DAYS.toMillis(1)); + + if (executorService == null) { + executorService = Executors.newScheduledThreadPool(1, + new ThreadFactoryBuilder().setNameFormat("CertificateLifetimeMonitor") + .setDaemon(true).build()); + } + this.executorService.scheduleAtFixedRate(new CertificateLifetimeMonitor(), + timeBeforeGracePeriod, interval, TimeUnit.MILLISECONDS); + getLogger().info("CertificateLifetimeMonitor is started with first delay" + + " {} ms and interval {} ms.", timeBeforeGracePeriod, interval); + } + + /** + * Task to monitor certificate lifetime and renew the certificate if needed. + */ + public class CertificateLifetimeMonitor implements Runnable { + @Override + public void run() { + + renewLock.lock(); + try { + Duration timeLeft = timeBeforeExpiryGracePeriod(getCertificate()); + if (timeLeft.isZero()) { + String newCertId; + try { + getLogger().info("Current certificate has entered the expiry" + + " grace period {}. Starting renew key and certs.", + timeLeft, securityConfig.getRenewalGracePeriod()); + newCertId = renewAndStoreKeyAndCertificate(false); + } catch (CertificateException e) { + if (e.errorCode() == + CertificateException.ErrorCode.ROLLBACK_ERROR) { + if (shutdownCallback != null) { + getLogger().error("Failed to rollback key and cert after an " + + " unsuccessful renew try.", e); + shutdownCallback.run(); + } + } + getLogger().error("Failed to renew and store key and cert." + + " Keep using existing certificates.", e); + return; + } + + // Persist new cert serial id in component VERSION file + if (certIdSaveCallback != null) { + certIdSaveCallback.accept(newCertId); + } + + // reset and reload all certs + reloadKeyAndCertificate(newCertId); + // cleanup backup directory + cleanBackupDir(); + } + } finally { + renewLock.unlock(); + } + } + } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/OMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/OMCertificateClient.java deleted file mode 100644 index d6c535a98bd0..000000000000 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/OMCertificateClient.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package org.apache.hadoop.hdds.security.x509.certificate.client; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.hdds.security.x509.SecurityConfig; - -/** - * Certificate client for OzoneManager. - */ -public class OMCertificateClient extends CommonCertificateClient { - - private static final Logger LOG = - LoggerFactory.getLogger(OMCertificateClient.class); - - public static final String COMPONENT_NAME = "om"; - - public OMCertificateClient(SecurityConfig securityConfig, - String certSerialId, String localCrlId) { - super(securityConfig, LOG, certSerialId, COMPONENT_NAME); - this.setLocalCrlId(localCrlId != null ? - Long.parseLong(localCrlId) : 0); - } - - public OMCertificateClient(SecurityConfig securityConfig, - String certSerialId) { - this(securityConfig, certSerialId, null); - } - - public OMCertificateClient(SecurityConfig securityConfig) { - this(securityConfig, null, null); - } - - @Override - public Logger getLogger() { - return LOG; - } -} diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/ReconCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/ReconCertificateClient.java index afbcbf643559..3140444c42f4 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/ReconCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/ReconCertificateClient.java @@ -17,10 +17,27 @@ */ package org.apache.hadoop.hdds.security.x509.certificate.client; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos; import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; +import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; +import org.apache.hadoop.security.UserGroupInformation; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.net.InetAddress; +import java.nio.file.Path; +import java.security.KeyPair; +import java.util.function.Consumer; + +import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec.getX509Certificate; +import static org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString; +import static org.apache.hadoop.hdds.security.x509.exceptions.CertificateException.ErrorCode.CSR_ERROR; + /** * Certificate client for Recon. */ @@ -29,10 +46,93 @@ public class ReconCertificateClient extends CommonCertificateClient { LoggerFactory.getLogger(ReconCertificateClient.class); public static final String COMPONENT_NAME = "recon"; + private final String clusterID; + private final String reconID; + + public ReconCertificateClient(SecurityConfig securityConfig, + String certSerialId, String clusterId, String reconId, + Consumer saveCertIdCallback, Runnable shutdownCallback) { + super(securityConfig, LOG, certSerialId, COMPONENT_NAME, + saveCertIdCallback, shutdownCallback); + this.clusterID = clusterId; + this.reconID = reconId; + } public ReconCertificateClient(SecurityConfig securityConfig, - String certSerialId) { - super(securityConfig, LOG, certSerialId, COMPONENT_NAME); + String certSerialId, String clusterId, String reconId) { + super(securityConfig, LOG, certSerialId, COMPONENT_NAME, null, null); + this.clusterID = clusterId; + this.reconID = reconId; + } + + @Override + public CertificateSignRequest.Builder getCSRBuilder() + throws CertificateException { + return getCSRBuilder(new KeyPair(getPublicKey(), getPrivateKey())); + } + + @Override + public CertificateSignRequest.Builder getCSRBuilder(KeyPair keyPair) + throws CertificateException { + LOG.info("Creating CSR for Recon."); + try { + CertificateSignRequest.Builder builder = super.getCSRBuilder(); + String hostname = InetAddress.getLocalHost().getCanonicalHostName(); + String subject = UserGroupInformation.getCurrentUser() + .getShortUserName() + "@" + hostname; + + builder.setCA(false) + .setKey(keyPair) + .setConfiguration(getConfig()) + .setSubject(subject); + + return builder; + } catch (Exception e) { + LOG.error("Failed to get hostname or current user", e); + throw new CertificateException("Failed to get hostname or current user", + e, CSR_ERROR); + } + } + + @Override + public String signAndStoreCertificate(PKCS10CertificationRequest csr, + Path certPath) throws CertificateException { + try { + SCMSecurityProtocolProtos.SCMGetCertResponseProto response; + HddsProtos.NodeDetailsProto.Builder reconDetailsProtoBuilder = + HddsProtos.NodeDetailsProto.newBuilder() + .setHostName(InetAddress.getLocalHost().getHostName()) + .setClusterId(clusterID) + .setUuid(reconID) + .setNodeType(HddsProtos.NodeType.RECON); + // TODO: For SCM CA we should fetch certificate from multiple SCMs. + response = getScmSecureClient().getCertificateChain( + reconDetailsProtoBuilder.build(), getEncodedString(csr)); + + // Persist certificates. + if (response.hasX509CACertificate()) { + String pemEncodedCert = response.getX509Certificate(); + CertificateCodec certCodec = new CertificateCodec( + getSecurityConfig(), certPath); + storeCertificate(pemEncodedCert, true, false, false, certCodec, false); + storeCertificate(response.getX509CACertificate(), true, true, + false, certCodec, false); + + // Store Root CA certificate. + if (response.hasX509RootCACertificate()) { + storeCertificate(response.getX509RootCACertificate(), + true, false, true, certCodec, false); + } + return getX509Certificate(pemEncodedCert).getSerialNumber().toString(); + } else { + throw new CertificateException("Unable to retrieve recon certificate " + + "chain"); + } + } catch (IOException | java.security.cert.CertificateException e) { + LOG.error("Error while signing and storing SCM signed certificate.", e); + throw new CertificateException( + "Error while signing and storing SCM signed certificate.", e); + } } @Override diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index 91acc1e767a0..242ffaca8e85 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -22,10 +22,13 @@ import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; import org.apache.hadoop.ozone.OzoneConsts; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.nio.file.Path; import java.nio.file.Paths; +import java.security.KeyPair; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.FAILURE; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.GETCERT; @@ -48,16 +51,16 @@ public class SCMCertificateClient extends DefaultCertificateClient { public SCMCertificateClient(SecurityConfig securityConfig, String certSerialId) { - super(securityConfig, LOG, certSerialId, COMPONENT_NAME); + super(securityConfig, LOG, certSerialId, COMPONENT_NAME, null, null); } public SCMCertificateClient(SecurityConfig securityConfig) { - super(securityConfig, LOG, null, COMPONENT_NAME); + super(securityConfig, LOG, null, COMPONENT_NAME, null, null); } public SCMCertificateClient(SecurityConfig securityConfig, String certSerialId, String component) { - super(securityConfig, LOG, certSerialId, component); + super(securityConfig, LOG, certSerialId, component, null, null); } @Override @@ -140,4 +143,16 @@ public CertificateSignRequest.Builder getCSRBuilder() public Logger getLogger() { return LOG; } + + @Override + public String signAndStoreCertificate(PKCS10CertificationRequest request, + Path certPath) throws CertificateException { + return null; + } + + @Override + public CertificateSignRequest.Builder getCSRBuilder(KeyPair keyPair) + throws CertificateException { + return null; + } } \ No newline at end of file diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/exceptions/CertificateException.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/exceptions/CertificateException.java index b3121283b18e..89fde76cb38f 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/exceptions/CertificateException.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/exceptions/CertificateException.java @@ -74,6 +74,10 @@ public CertificateException(Throwable cause) { super(cause); } + public ErrorCode errorCode() { + return errorCode; + } + /** * Error codes to make it easy to decode these exceptions. */ @@ -84,6 +88,8 @@ public enum ErrorCode { BOOTSTRAP_ERROR, CSR_ERROR, CRYPTO_SIGNATURE_VERIFICATION_ERROR, - CERTIFICATE_NOT_FOUND_ERROR + CERTIFICATE_NOT_FOUND_ERROR, + RENEW_ERROR, + ROLLBACK_ERROR } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java index e57510c9ffa1..ab175d7838b9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java @@ -89,6 +89,18 @@ public KeyCodec(SecurityConfig config, String component) { this.location = securityConfig.getKeyLocation(component); } + /** + * Creates a KeyCodec with component name. + * + * @param config - Security Config. + * @param keyDir - path to save the key materials. + */ + public KeyCodec(SecurityConfig config, Path keyDir) { + this.securityConfig = config; + isPosixFileSystem = KeyCodec::isPosix; + this.location = keyDir; + } + /** * Checks if File System supports posix style security permissions. * @@ -323,9 +335,9 @@ private synchronized void writeKey(Path basePath, KeyPair keyPair, checkPreconditions(basePath); File privateKeyFile = - Paths.get(location.toString(), privateKeyFileName).toFile(); + Paths.get(basePath.toString(), privateKeyFileName).toFile(); File publicKeyFile = - Paths.get(location.toString(), publicKeyFileName).toFile(); + Paths.get(basePath.toString(), publicKeyFileName).toFile(); checkKeyFile(privateKeyFile, force, publicKeyFile); try (PemWriter privateKeyWriter = new PemWriter(new diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/SecurityUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/SecurityUtil.java index 6147d3a99012..bef416e8ca16 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/SecurityUtil.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/SecurityUtil.java @@ -18,6 +18,8 @@ */ package org.apache.hadoop.hdds.security.x509.keys; +import java.io.IOException; +import java.security.GeneralSecurityException; import java.security.KeyFactory; import java.security.NoSuchAlgorithmException; import java.security.NoSuchProviderException; @@ -26,7 +28,11 @@ import java.security.spec.InvalidKeySpecException; import java.security.spec.PKCS8EncodedKeySpec; import java.security.spec.X509EncodedKeySpec; + +import org.apache.hadoop.hdds.security.ssl.KeyStoresFactory; +import org.apache.hadoop.hdds.security.ssl.PemFileBasedKeyStoresFactory; import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; import org.bouncycastle.asn1.ASN1ObjectIdentifier; import org.bouncycastle.asn1.ASN1Sequence; @@ -135,4 +141,32 @@ public static PublicKey getPublicKey(byte[] encodedKey, return key; } + public static KeyStoresFactory getServerKeyStoresFactory( + SecurityConfig securityConfig, CertificateClient client, + boolean requireClientAuth) throws CertificateException { + PemFileBasedKeyStoresFactory factory = + new PemFileBasedKeyStoresFactory(securityConfig, client); + try { + factory.init(KeyStoresFactory.Mode.SERVER, requireClientAuth); + } catch (IOException | GeneralSecurityException e) { + throw new CertificateException("Failed to init keyStoresFactory", e, + CertificateException.ErrorCode.KEYSTORE_ERROR); + } + return factory; + } + + public static KeyStoresFactory getClientKeyStoresFactory( + SecurityConfig securityConfig, CertificateClient client, + boolean requireClientAuth) throws CertificateException { + PemFileBasedKeyStoresFactory factory = + new PemFileBasedKeyStoresFactory(securityConfig, client); + + try { + factory.init(KeyStoresFactory.Mode.CLIENT, requireClientAuth); + } catch (IOException | GeneralSecurityException e) { + throw new CertificateException("Failed to init keyStoresFactory", e, + CertificateException.ErrorCode.KEYSTORE_ERROR); + } + return factory; + } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/FixedThreadPoolWithAffinityExecutor.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/FixedThreadPoolWithAffinityExecutor.java index 4949195cddb1..f53bce533dff 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/FixedThreadPoolWithAffinityExecutor.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/FixedThreadPoolWithAffinityExecutor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.server.events; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.hdds.utils.MetricsUtil; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -126,10 +127,8 @@ public FixedThreadPoolWithAffinityExecutor( ++i; } - DefaultMetricsSystem.instance() - .register(EVENT_QUEUE + name, - "Event Executor metrics ", - this); + MetricsUtil.registerDynamic(this, EVENT_QUEUE + name, + "Event Executor metrics ", "EventQueue"); } public void setQueueWaitThreshold(long queueWaitThreshold) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/SingleThreadExecutor.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/SingleThreadExecutor.java index 09be9cfd5fdd..bc8f7425b656 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/SingleThreadExecutor.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/SingleThreadExecutor.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdds.server.events; +import org.apache.hadoop.hdds.utils.MetricsUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,7 +26,6 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; -import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; /** @@ -64,8 +64,8 @@ public class SingleThreadExecutor

implements EventExecutor

{ */ public SingleThreadExecutor(String name) { this.name = name; - DefaultMetricsSystem.instance() - .register(EVENT_QUEUE + name, "Event Executor metrics ", this); + MetricsUtil.registerDynamic(this, EVENT_QUEUE + name, + "Event Executor metrics ", "EventQueue"); executor = Executors.newSingleThreadExecutor( runnable -> { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BaseHttpServer.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BaseHttpServer.java index 4315dcdd54d2..144cec862099 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BaseHttpServer.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/BaseHttpServer.java @@ -148,7 +148,7 @@ public BaseHttpServer(MutableConfigurationSource conf, String name) conf.getBoolean(HddsConfigKeys.HDDS_PROFILER_ENABLED, false); if (prometheusSupport) { - prometheusMetricsSink = new PrometheusMetricsSink(); + prometheusMetricsSink = new PrometheusMetricsSink(name); httpServer.getWebAppContext().getServletContext() .setAttribute(PROMETHEUS_SINK, prometheusMetricsSink); HddsPrometheusConfig prometheusConfig = diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/PrometheusMetricsSink.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/PrometheusMetricsSink.java index 3a6a079a056d..a0bec87ea9e9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/PrometheusMetricsSink.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/PrometheusMetricsSink.java @@ -17,16 +17,14 @@ */ package org.apache.hadoop.hdds.server.http; -import static org.apache.hadoop.hdds.utils.RocksDBStoreMBean.ROCKSDB_CONTEXT_PREFIX; - import java.io.IOException; import java.io.Writer; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.TreeMap; -import java.util.regex.Pattern; import org.apache.commons.configuration2.SubsetConfiguration; -import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hdds.utils.PrometheusMetricsSinkUtil; import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricType; import org.apache.hadoop.metrics2.MetricsRecord; @@ -43,96 +41,90 @@ public class PrometheusMetricsSink implements MetricsSink { /** * Cached output lines for each metrics. */ - private final Map> metricLines = + private Map> metricLines = Collections.synchronizedSortedMap(new TreeMap<>()); + private Map> nextMetricLines = + Collections.synchronizedSortedMap(new TreeMap<>()); + private final String servername; - private static final Pattern SPLIT_PATTERN = - Pattern.compile("(? Collections.synchronizedSortedMap(new TreeMap<>())) - .put(prometheusMetricKeyAsString, String.valueOf(metrics.value())); + synchronized (this) { + nextMetricLines.computeIfAbsent(metricKey, + any -> Collections.synchronizedSortedMap(new TreeMap<>())) + .put(prometheusMetricKeyAsString, String.valueOf(metric.value())); + } } } } private String getPrometheusMetricKeyAsString(MetricsRecord metricsRecord, - String key) { + String key, String username) { StringBuilder prometheusMetricKey = new StringBuilder(); prometheusMetricKey.append(key) .append("{"); String sep = ""; + List metricsTags = + PrometheusMetricsSinkUtil.addTags(key, username, servername, + metricsRecord.tags()); + //add tags - for (MetricsTag tag : metricsRecord.tags()) { + for (MetricsTag tag : metricsTags) { String tagName = tag.name().toLowerCase(); //ignore specific tag which includes sub-hierarchy - if (!tagName.equals("numopenconnectionsperuser")) { - prometheusMetricKey.append(sep) - .append(tagName) - .append("=\"") - .append(tag.value()) - .append("\""); - sep = ","; + if (tagName.equals("numopenconnectionsperuser")) { + continue; } + + prometheusMetricKey.append(sep) + .append(tagName) + .append("=\"") + .append(tag.value()) + .append("\""); + sep = ","; } prometheusMetricKey.append("}"); return prometheusMetricKey.toString(); } - /** - * Convert CamelCase based names to lower-case names where the separator - * is the underscore, to follow prometheus naming conventions. - */ - public String prometheusName(String recordName, - String metricName) { - - // RocksDB metric names already have underscores as delimiters, - // but record name is from DB file name and '.' (as in 'om.db') is invalid - if (StringUtils.isNotEmpty(recordName) && - recordName.startsWith(ROCKSDB_CONTEXT_PREFIX)) { - return normalizeName(recordName) + "_" + metricName.toLowerCase(); - } - - String baseName = StringUtils.capitalize(recordName) - + StringUtils.capitalize(metricName); - return normalizeName(baseName); - } - - public static String normalizeName(String baseName) { - String[] parts = SPLIT_PATTERN.split(baseName); - String result = String.join("_", parts).toLowerCase(); - return REPLACE_PATTERN.matcher(result).replaceAll("_"); - } - @Override public void flush() { - + synchronized (this) { + metricLines = nextMetricLines; + nextMetricLines = Collections + .synchronizedSortedMap(new TreeMap<>()); + } } @Override @@ -140,7 +132,8 @@ public void init(SubsetConfiguration subsetConfiguration) { } - public void writeMetrics(Writer writer) throws IOException { + public synchronized void writeMetrics(Writer writer) + throws IOException { for (Map.Entry> metricsEntry : metricLines.entrySet()) { writer.write(metricsEntry.getKey() + "\n"); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisDropwizardExports.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisDropwizardExports.java index 50d840a7ad9a..834c7d80a5f1 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisDropwizardExports.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisDropwizardExports.java @@ -22,7 +22,10 @@ import io.prometheus.client.CollectorRegistry; import io.prometheus.client.dropwizard.DropwizardExports; import io.prometheus.client.dropwizard.samplebuilder.DefaultSampleBuilder; +import java.util.ArrayList; +import java.util.List; import java.util.function.BooleanSupplier; +import java.util.function.Consumer; import org.apache.ratis.metrics.MetricRegistries; import org.apache.ratis.metrics.MetricsReporting; import org.apache.ratis.metrics.RatisMetricRegistry; @@ -43,21 +46,29 @@ public RatisDropwizardExports(MetricRegistry registry) { super(registry, new RatisNameRewriteSampleBuilder()); } - public static void registerRatisMetricReporters( + public static List registerRatisMetricReporters( Map ratisMetricsMap, BooleanSupplier checkStopped) { //All the Ratis metrics (registered from now) will be published via JMX and //via the prometheus exporter (used by the /prom servlet - MetricRegistries.global() - .addReporterRegistration(MetricsReporting.jmxReporter(), - MetricsReporting.stopJmxReporter()); - MetricRegistries.global().addReporterRegistration( - r1 -> registerDropwizard(r1, ratisMetricsMap, checkStopped), - r2 -> deregisterDropwizard(r2, ratisMetricsMap)); + List ratisReporterList = new ArrayList<>(); + ratisReporterList.add(new MetricReporter(MetricsReporting.jmxReporter(), + MetricsReporting.stopJmxReporter())); + Consumer reporter + = r1 -> registerDropwizard(r1, ratisMetricsMap, checkStopped); + Consumer stopper + = r2 -> deregisterDropwizard(r2, ratisMetricsMap); + ratisReporterList.add(new MetricReporter(reporter, stopper)); + + for (MetricReporter metricReporter : ratisReporterList) { + metricReporter.addToGlobalRegistration(); + } + return ratisReporterList; } - public static void clear(Map - ratisMetricsMap) { + public static void clear( + Map ratisMetricsMap, + List ratisReporterList) { ratisMetricsMap.entrySet().stream().forEach(e -> { // remove and deregister from registry only one registered // as unregistered element if performed unregister again will @@ -67,6 +78,13 @@ public static void clear(Map CollectorRegistry.defaultRegistry.unregister(c); } }); + + if (null != ratisReporterList) { + for (MetricReporter metricReporter : ratisReporterList) { + metricReporter.removeFromGlobalRegistration(); + } + } + MetricRegistries.global().clear(); } @@ -94,4 +112,27 @@ private static void deregisterDropwizard(RatisMetricRegistry registry, CollectorRegistry.defaultRegistry.unregister(c); } } + + /** + * class for keeping track of reporters and add/remove to registry. + * + */ + public static class MetricReporter { + private final Consumer reporter; + private final Consumer stopper; + + MetricReporter(Consumer reporter, + Consumer stopper) { + this.reporter = reporter; + this.stopper = stopper; + } + + void addToGlobalRegistration() { + MetricRegistries.global().addReporterRegistration(reporter, stopper); + } + + void removeFromGlobalRegistration() { + MetricRegistries.global().removeReporterRegistration(reporter, stopper); + } + } } \ No newline at end of file diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DecayRpcSchedulerUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DecayRpcSchedulerUtil.java new file mode 100644 index 000000000000..cfdf704016ce --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DecayRpcSchedulerUtil.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.hdds.utils; + +import com.google.common.base.Strings; +import java.util.Optional; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.MetricsTag; + +/** + * Helper functions for DecayRpcScheduler + * metrics for Prometheus. + */ +public final class DecayRpcSchedulerUtil { + + private DecayRpcSchedulerUtil() { + } + + /** + * For Decay_Rpc_Scheduler, the metric name is in format + * "Caller().Volume" + * or + * "Caller().Priority" + * Split it and return the metric. + *

+ * If the recordName doesn't belong to Decay_Rpc_Scheduler, + * then return the metricName as it is without making + * any changes to it. + * + * @param recordName + * @param metricName "Caller(xyz).Volume" or "Caller(xyz).Priority" + * @return "Volume" or "Priority" or metricName(unchanged) + */ + public static String splitMetricNameIfNeeded(String recordName, + String metricName) { + if (recordName.toLowerCase().contains("decayrpcscheduler") && + metricName.toLowerCase().contains("caller(")) { + // names will contain ["Caller(xyz)", "Volume" / "Priority"] + String[] names = metricName.split("[.]"); + + // "Volume" or "Priority" + return names[1]; + } + return metricName; + } + + /** + * For Decay_Rpc_Scheduler, split the metric name + * and then get the part that is in the format "Caller()" + * and split it to return the username. + * + * @param recordName + * @param metricName + * @return caller username or null if not present + */ + public static String checkMetricNameForUsername(String recordName, + String metricName) { + if (recordName.toLowerCase().contains("decayrpcscheduler") && + metricName.toLowerCase().contains("caller(")) { + // names will contain ["Caller(xyz)", "Volume" / "Priority"] + String[] names = metricName.split("[.]"); + + // Caller(xyz) + String caller = names[0]; + + // subStrings will contain ["Caller", "xyz"] + String[] subStrings = caller.split("[()]"); + + String username = subStrings[1]; + + return username; + } + return null; + } + + + /** + * Create a username metrics tag. + * @param username caller username + * @return empty optional if no metrics tag was created, otherwise + * optional of metrics tag. + */ + public static Optional createUsernameTag(String username) { + if (Strings.isNullOrEmpty(username)) { + return Optional.empty(); + } + + final String name = "username"; + final String description = "caller username"; + final MetricsInfo metricsInfo = new MetricsInfo() { + @Override + public String name() { + return name; + } + + @Override + public String description() { + return description; + } + }; + MetricsTag metricsTag = new MetricsTag(metricsInfo, username); + return Optional.of(metricsTag); + } + +} diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/MetricsUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/MetricsUtil.java new file mode 100644 index 000000000000..0850bea1eaa6 --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/MetricsUtil.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.utils; + +import java.lang.annotation.Annotation; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.Map; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Metrics util for metrics. + */ +public final class MetricsUtil { + private static final String ANNOTATIONS = "annotations"; + private static final String ANNOTATION_DATA = "annotationData"; + private static final Class ANNOTATION_TO_ALTER + = Metrics.class; + + private static final Logger LOG = + LoggerFactory.getLogger(MetricsUtil.class); + + private MetricsUtil() { + } + + /** + * register metric with changing class annotation for metrics. + * + * @param source source to register + * @param name name of metric + * @param desc description of metric + * @param context context of metric + * @param source type + */ + public static void registerDynamic( + T source, String name, String desc, String context) { + updateAnnotation(source.getClass(), name, desc, context); + DefaultMetricsSystem.instance().register(name, desc, source); + } + + private static void updateAnnotation( + Class clz, String name, String desc, String context) { + try { + Annotation annotationValue = new Metrics() { + + @Override + public Class annotationType() { + return ANNOTATION_TO_ALTER; + } + + @Override + public String name() { + return name; + } + + @Override + public String about() { + return desc; + } + + @Override + public String context() { + return context; + } + }; + + Method method = clz.getClass().getDeclaredMethod( + ANNOTATION_DATA, null); + method.setAccessible(true); + Object annotationData = method.invoke(clz); + Field annotations = annotationData.getClass() + .getDeclaredField(ANNOTATIONS); + annotations.setAccessible(true); + Map, Annotation> map = + (Map, Annotation>) annotations + .get(annotationData); + map.put(ANNOTATION_TO_ALTER, annotationValue); + } catch (Exception e) { + LOG.error("Update Metrics annotation failed. ", e); + } + } +} diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/PrometheusMetricsSinkUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/PrometheusMetricsSinkUtil.java new file mode 100644 index 000000000000..44ccb5397be3 --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/PrometheusMetricsSinkUtil.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.utils; + +import static org.apache.hadoop.hdds.utils.RocksDBStoreMBean.ROCKSDB_CONTEXT_PREFIX; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Stream; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.metrics2.MetricsTag; + +/** + * Util class for + * {@link org.apache.hadoop.hdds.server.http.PrometheusMetricsSink}. + */ +public final class PrometheusMetricsSinkUtil { + private static final Pattern SPLIT_PATTERN = + Pattern.compile("(? addTags(String key, String username, + String servername, Collection unmodifiableTags) { + List metricTags = new ArrayList<>(unmodifiableTags); + + Stream.of(DecayRpcSchedulerUtil.createUsernameTag(username), + UgiMetricsUtil.createServernameTag(key, servername)) + .forEach( + metricsTag -> metricsTag.ifPresent(mt -> addTag(mt, metricTags))); + + return metricTags; + } + + /** + * Adds metric tag to a metrics tags. + * @param metricsTag metrics tag to be added + * @param metricsTags metrics tags where metrics tag needs to be added + */ + private static void addTag(MetricsTag metricsTag, + List metricsTags) { + metricsTags.add(metricsTag); + } + + /** + * Convert CamelCase based names to lower-case names where the separator + * is the underscore, to follow prometheus naming conventions. + */ + public static String prometheusName(String recordName, + String metricName) { + + // RocksDB metric names already have underscores as delimiters, + // but record name is from DB file name and '.' (as in 'om.db') is invalid + if (StringUtils.isNotEmpty(recordName) && + recordName.startsWith(ROCKSDB_CONTEXT_PREFIX)) { + return normalizeName(recordName) + "_" + metricName.toLowerCase(); + } + + String baseName = StringUtils.capitalize(recordName) + + StringUtils.capitalize(metricName); + return normalizeName(baseName); + } + + /** + * Normalizes metrics tag key name. + * @param baseName + * @return normalized name. + */ + private static String normalizeName(String baseName) { + String[] parts = SPLIT_PATTERN.split(baseName); + String result = String.join("_", parts).toLowerCase(); + return REPLACE_PATTERN.matcher(result).replaceAll("_"); + } + + public static String getMetricName(String recordName, String metricName) { + return DecayRpcSchedulerUtil.splitMetricNameIfNeeded(recordName, + metricName); + } + + public static String getUsername(String recordName, String metricName) { + return DecayRpcSchedulerUtil.checkMetricNameForUsername(recordName, + metricName); + } +} diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/UgiMetricsUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/UgiMetricsUtil.java new file mode 100644 index 000000000000..60edf3c982cd --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/UgiMetricsUtil.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.utils; + +import java.util.Optional; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.MetricsTag; + +/** + * Util class for UGI metrics. + */ +public final class UgiMetricsUtil { + + private static final String UGI_METRICS = "ugi_metrics"; + + /** + * Never constructed. + */ + private UgiMetricsUtil() { + } + + /** + * Creates servername metrics tag. + * + * @param key metrics entry key + * @param servername server name + * @return empty optional if no metrics tag was created, otherwise + * optional of metrics tag. + */ + public static Optional createServernameTag(String key, + String servername) { + if (!key.contains(UGI_METRICS)) { + return Optional.empty(); + } + + final String name = "servername"; + final String description = "name of the server"; + final MetricsInfo metricsInfo = new MetricsInfo() { + @Override + public String name() { + return name; + } + + @Override + public String description() { + return description; + } + }; + MetricsTag metricsTag = new MetricsTag(metricsInfo, servername); + return Optional.of(metricsTag); + } + +} diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java index 59952dc8ec6e..83e9d3b51527 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java @@ -68,7 +68,7 @@ public final class RocksDatabase { static final Logger LOG = LoggerFactory.getLogger(RocksDatabase.class); - static final String ESTIMATE_NUM_KEYS = "rocksdb.estimate-num-keys"; + public static final String ESTIMATE_NUM_KEYS = "rocksdb.estimate-num-keys"; static IOException toIOException(Object name, String op, RocksDBException e) { diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/ssl/TestPemFileBasedKeyStoresFactory.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/ssl/TestPemFileBasedKeyStoresFactory.java index adf66d94de8e..236439a37694 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/ssl/TestPemFileBasedKeyStoresFactory.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/ssl/TestPemFileBasedKeyStoresFactory.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hdds.security.x509.CertificateClientTest; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.ozone.container.ContainerTestHelper; -import org.apache.hadoop.security.ssl.SSLFactory; import org.apache.ratis.thirdparty.io.grpc.ManagedChannel; import org.apache.ratis.thirdparty.io.grpc.Server; import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts; @@ -84,7 +83,7 @@ private void clientMode(boolean clientAuth) throws Exception { KeyStoresFactory keyStoresFactory = new PemFileBasedKeyStoresFactory( secConf, caClient); try { - keyStoresFactory.init(SSLFactory.Mode.CLIENT, clientAuth); + keyStoresFactory.init(KeyStoresFactory.Mode.CLIENT, clientAuth); if (clientAuth) { Assert.assertTrue(keyStoresFactory.getKeyManagers()[0] instanceof ReloadingX509KeyManager); @@ -103,7 +102,7 @@ private void serverMode(boolean clientAuth) throws Exception { KeyStoresFactory keyStoresFactory = new PemFileBasedKeyStoresFactory( secConf, caClient); try { - keyStoresFactory.init(SSLFactory.Mode.SERVER, clientAuth); + keyStoresFactory.init(KeyStoresFactory.Mode.SERVER, clientAuth); Assert.assertTrue(keyStoresFactory.getKeyManagers()[0] instanceof ReloadingX509KeyManager); Assert.assertTrue(keyStoresFactory.getTrustManagers()[0] @@ -122,13 +121,13 @@ public void testConnectionWithCertReload() throws Exception { try { // create server serverFactory = new PemFileBasedKeyStoresFactory(secConf, caClient); - serverFactory.init(SSLFactory.Mode.SERVER, true); + serverFactory.init(KeyStoresFactory.Mode.SERVER, true); server = setupServer(serverFactory); server.start(); // create client clientFactory = new PemFileBasedKeyStoresFactory(secConf, caClient); - clientFactory.init(SSLFactory.Mode.CLIENT, true); + clientFactory.init(KeyStoresFactory.Mode.CLIENT, true); channel = setupClient(clientFactory, server.getPort()); XceiverClientProtocolServiceStub asyncStub = XceiverClientProtocolServiceGrpc.newStub(channel); diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/token/TestOzoneBlockTokenSecretManager.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/token/TestOzoneBlockTokenSecretManager.java index 63a34ef2369d..ad435a0f30b5 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/token/TestOzoneBlockTokenSecretManager.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/token/TestOzoneBlockTokenSecretManager.java @@ -36,7 +36,7 @@ import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.hdds.security.x509.certificate.client.OMCertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.hadoop.security.token.Token; import org.apache.ozone.test.GenericTestUtils; @@ -106,7 +106,7 @@ public void setUp() throws Exception { omCertSerialId = x509Certificate.getSerialNumber().toString(); secretManager = new OzoneBlockTokenSecretManager(securityConfig, TimeUnit.HOURS.toMillis(1), omCertSerialId); - client = Mockito.mock(OMCertificateClient.class); + client = Mockito.mock(DefaultCertificateClient.class); when(client.getCertificate()).thenReturn(x509Certificate); when(client.getCertificate(anyString())). thenReturn(x509Certificate); diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/CertificateClientTest.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/CertificateClientTest.java index 5003bf11c5c5..1c5e1118e0bb 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/CertificateClientTest.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/CertificateClientTest.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.file.Path; import java.security.KeyPair; import java.security.PrivateKey; import java.security.PublicKey; @@ -27,12 +28,14 @@ import java.util.List; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.security.ssl.KeyStoresFactory; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.crl.CRLInfo; import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; /** * Test implementation for CertificateClient. To be used only for test @@ -42,7 +45,6 @@ public class CertificateClientTest implements CertificateClient { private KeyPair keyPair; private X509Certificate x509Certificate; - private boolean isKeyRenewed; private SecurityConfig secConfig; public CertificateClientTest(OzoneConfiguration conf) @@ -90,6 +92,10 @@ public boolean verifyCertificate(X509Certificate certificate) { return true; } + @Override + public void setCertificateId(String certSerialId) { + } + @Override public byte[] signDataStream(InputStream stream) throws CertificateException { @@ -113,11 +119,29 @@ public boolean verifySignature(byte[] data, byte[] signature, return true; } + @Override + public CertificateSignRequest.Builder getCSRBuilder(KeyPair key) + throws IOException { + return null; + } + @Override public CertificateSignRequest.Builder getCSRBuilder() { return new CertificateSignRequest.Builder(); } + @Override + public String signAndStoreCertificate(PKCS10CertificationRequest request, + Path certPath) throws CertificateException { + return null; + } + + @Override + public String signAndStoreCertificate(PKCS10CertificationRequest request) + throws CertificateException { + return null; + } + @Override public X509Certificate queryCertificate(String query) { return null; @@ -218,8 +242,15 @@ public boolean processCrl(CRLInfo crl) { } @Override - public boolean isCertificateRenewed() { - return isKeyRenewed; + public KeyStoresFactory getServerKeyStoresFactory() + throws CertificateException { + return null; + } + + @Override + public KeyStoresFactory getClientKeyStoresFactory() + throws CertificateException { + return null; } public void renewKey() throws Exception { @@ -229,6 +260,9 @@ public void renewKey() throws Exception { keyPair = newKeyPair; x509Certificate = newCert; - isKeyRenewed = true; + } + + @Override + public void close() throws IOException { } } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultCAServer.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultCAServer.java index 7c3e035f1c67..b049a6a07655 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultCAServer.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/TestDefaultCAServer.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hdds.security.x509.certificate.authority; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.validator.routines.DomainValidator; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.security.exception.SCMSecurityException; @@ -29,7 +30,11 @@ import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; +import org.apache.hadoop.hdds.security.x509.certificates.utils.SelfSignedCertificate; import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; +import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; +import org.apache.hadoop.ozone.OzoneSecurityUtil; +import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.ozone.test.LambdaTestUtils; import org.bouncycastle.asn1.x509.CRLReason; @@ -50,12 +55,14 @@ import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.time.LocalDate; +import java.time.LocalDateTime; import java.time.ZoneId; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Optional; +import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.function.Consumer; @@ -65,6 +72,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType.SCM; import static org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer.CAType.INTERMEDIARY_CA; import static org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer.CAType.SELF_SIGNED_CA; +import static org.apache.hadoop.hdds.security.x509.exceptions.CertificateException.ErrorCode.CSR_ERROR; import static org.apache.hadoop.ozone.OzoneConsts.SCM_CA_CERT_STORAGE_DIR; import static org.apache.hadoop.ozone.OzoneConsts.SCM_CA_PATH; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -77,11 +85,12 @@ * Tests the Default CA Server. */ public class TestDefaultCAServer { - private static OzoneConfiguration conf = new OzoneConfiguration(); + private OzoneConfiguration conf; private MockCAStore caStore; @BeforeEach public void init(@TempDir Path tempDir) throws IOException { + conf = new OzoneConfiguration(); conf.set(OZONE_METADATA_DIRS, tempDir.toString()); caStore = new MockCAStore(); } @@ -339,6 +348,57 @@ public void testIntermediaryCAWithEmpty() { () -> scmCA.init(new SecurityConfig(conf), INTERMEDIARY_CA)); } + @Test + public void testExternalRootCA(@TempDir Path tempDir) throws Exception { + //Given an external certificate + String externalCaCertFileName = "CaCert.pem"; + setExternalPathsInConfig(tempDir, externalCaCertFileName); + + SecurityConfig securityConfig = new SecurityConfig(conf); + SCMCertificateClient scmCertificateClient = + new SCMCertificateClient(new SecurityConfig(conf)); + + KeyPair keyPair = KeyStoreTestUtil.generateKeyPair("RSA"); + KeyCodec keyPEMWriter = new KeyCodec(securityConfig, + scmCertificateClient.getComponentName()); + + keyPEMWriter.writeKey(tempDir, keyPair, true); + X509CertificateHolder externalCert = generateExternalCert(keyPair); + + CertificateCodec certificateCodec = new CertificateCodec(securityConfig, + scmCertificateClient.getComponentName()); + + certificateCodec.writeCertificate(tempDir, externalCaCertFileName, + CertificateCodec.getPEMEncodedString(externalCert), true); + + CertificateServer testCA = new DefaultCAServer("testCA", + RandomStringUtils.randomAlphabetic(4), + RandomStringUtils.randomAlphabetic(4), caStore, + new DefaultProfile(), + Paths.get(SCM_CA_CERT_STORAGE_DIR, SCM_CA_PATH).toString()); + //When initializing a CA server with external cert + testCA.init(securityConfig, SELF_SIGNED_CA); + //Then the external cert is set as CA cert for the server. + assertEquals(externalCert, testCA.getCACertificate()); + } + + private void setExternalPathsInConfig(Path tempDir, + String externalCaCertFileName) { + String externalCaCertPart = Paths.get(tempDir.toString(), + externalCaCertFileName).toString(); + String privateKeyPath = Paths.get(tempDir.toString(), + HddsConfigKeys.HDDS_PRIVATE_KEY_FILE_NAME_DEFAULT).toString(); + String publicKeyPath = Paths.get(tempDir.toString(), + HddsConfigKeys.HDDS_PUBLIC_KEY_FILE_NAME_DEFAULT).toString(); + + conf.set(HddsConfigKeys.HDDS_X509_ROOTCA_CERTIFICATE_FILE, + externalCaCertPart); + conf.set(HddsConfigKeys.HDDS_X509_ROOTCA_PRIVATE_KEY_FILE, + privateKeyPath); + conf.set(HddsConfigKeys.HDDS_X509_ROOTCA_PUBLIC_KEY_FILE, + publicKeyPath); + } + @Test public void testIntermediaryCA() throws Exception { @@ -414,4 +474,41 @@ clusterId, scmId, caStore, new DefaultProfile(), } + private X509CertificateHolder generateExternalCert(KeyPair keyPair) + throws Exception { + LocalDateTime notBefore = LocalDateTime.now(); + LocalDateTime notAfter = notBefore.plusYears(1); + String clusterID = UUID.randomUUID().toString(); + String scmID = UUID.randomUUID().toString(); + String subject = "testRootCert"; + + SelfSignedCertificate.Builder builder = + SelfSignedCertificate.newBuilder() + .setBeginDate(notBefore) + .setEndDate(notAfter) + .setClusterID(clusterID) + .setScmID(scmID) + .setSubject(subject) + .setKey(keyPair) + .setConfiguration(conf) + .makeCA(); + + try { + DomainValidator validator = DomainValidator.getInstance(); + // Add all valid ips. + OzoneSecurityUtil.getValidInetsForCurrentHost().forEach( + ip -> { + builder.addIpAddress(ip.getHostAddress()); + if (validator.isValid(ip.getCanonicalHostName())) { + builder.addDnsName(ip.getCanonicalHostName()); + } + }); + } catch (IOException e) { + throw new org.apache.hadoop.hdds.security.x509 + .exceptions.CertificateException( + "Error while adding ip to CA self signed certificate", e, + CSR_ERROR); + } + return builder.build(); + } } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java index 394f4b0a3b75..0e7beea5eb20 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java @@ -18,15 +18,23 @@ */ package org.apache.hadoop.hdds.security.x509.certificate.client; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos; +import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; import org.bouncycastle.cert.X509CertificateHolder; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; +import org.junit.Assert; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -66,6 +74,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyObject; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; @@ -79,16 +88,11 @@ public class TestDefaultCertificateClient { private String certSerialId; private X509Certificate x509Certificate; - private OMCertificateClient omCertClient; private DNCertificateClient dnCertClient; private HDDSKeyGenerator keyGenerator; - private Path omMetaDirPath; private Path dnMetaDirPath; - private SecurityConfig omSecurityConfig; private SecurityConfig dnSecurityConfig; private static final String DN_COMPONENT = DNCertificateClient.COMPONENT_NAME; - private static final String OM_COMPONENT = OMCertificateClient.COMPONENT_NAME; - private KeyCodec omKeyCodec; private KeyCodec dnKeyCodec; @BeforeEach @@ -96,25 +100,16 @@ public void setUp() throws Exception { OzoneConfiguration config = new OzoneConfiguration(); config.setStrings(OZONE_SCM_NAMES, "localhost"); config.setInt(IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 2); - final String omPath = GenericTestUtils - .getTempPath(UUID.randomUUID().toString()); final String dnPath = GenericTestUtils .getTempPath(UUID.randomUUID().toString()); - omMetaDirPath = Paths.get(omPath, "test"); dnMetaDirPath = Paths.get(dnPath, "test"); - - config.set(HDDS_METADATA_DIR_NAME, omMetaDirPath.toString()); - omSecurityConfig = new SecurityConfig(config); config.set(HDDS_METADATA_DIR_NAME, dnMetaDirPath.toString()); dnSecurityConfig = new SecurityConfig(config); - - keyGenerator = new HDDSKeyGenerator(omSecurityConfig); - omKeyCodec = new KeyCodec(omSecurityConfig, OM_COMPONENT); + keyGenerator = new HDDSKeyGenerator(dnSecurityConfig); dnKeyCodec = new KeyCodec(dnSecurityConfig, DN_COMPONENT); - Files.createDirectories(omSecurityConfig.getKeyLocation(OM_COMPONENT)); Files.createDirectories(dnSecurityConfig.getKeyLocation(DN_COMPONENT)); x509Certificate = generateX509Cert(null); certSerialId = x509Certificate.getSerialNumber().toString(); @@ -122,15 +117,14 @@ public void setUp() throws Exception { } private void getCertClient() { - omCertClient = new OMCertificateClient(omSecurityConfig, certSerialId); - dnCertClient = new DNCertificateClient(dnSecurityConfig, certSerialId); + dnCertClient = new DNCertificateClient(dnSecurityConfig, + MockDatanodeDetails.randomDatanodeDetails(), certSerialId, null, + () -> System.exit(1)); } @AfterEach public void tearDown() { - omCertClient = null; dnCertClient = null; - FileUtils.deleteQuietly(omMetaDirPath.toFile()); FileUtils.deleteQuietly(dnMetaDirPath.toFile()); } @@ -141,13 +135,13 @@ public void tearDown() { @Test public void testKeyOperations() throws Exception { cleanupOldKeyPair(); - PrivateKey pvtKey = omCertClient.getPrivateKey(); - PublicKey publicKey = omCertClient.getPublicKey(); + PrivateKey pvtKey = dnCertClient.getPrivateKey(); + PublicKey publicKey = dnCertClient.getPublicKey(); assertNull(publicKey); assertNull(pvtKey); KeyPair keyPair = generateKeyPairFiles(); - pvtKey = omCertClient.getPrivateKey(); + pvtKey = dnCertClient.getPrivateKey(); assertNotNull(pvtKey); assertEquals(pvtKey, keyPair.getPrivate()); @@ -159,21 +153,12 @@ public void testKeyOperations() throws Exception { private KeyPair generateKeyPairFiles() throws Exception { cleanupOldKeyPair(); KeyPair keyPair = keyGenerator.generateKey(); - omKeyCodec.writePrivateKey(keyPair.getPrivate()); - omKeyCodec.writePublicKey(keyPair.getPublic()); - dnKeyCodec.writePrivateKey(keyPair.getPrivate()); dnKeyCodec.writePublicKey(keyPair.getPublic()); return keyPair; } private void cleanupOldKeyPair() { - FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getPrivateKeyFileName()).toFile()); - FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getPublicKeyFileName()).toFile()); FileUtils.deleteQuietly(Paths.get( dnSecurityConfig.getKeyLocation(DN_COMPONENT).toString(), dnSecurityConfig.getPrivateKeyFileName()).toFile()); @@ -187,12 +172,12 @@ private void cleanupOldKeyPair() { */ @Test public void testCertificateOps() throws Exception { - X509Certificate cert = omCertClient.getCertificate(); + X509Certificate cert = dnCertClient.getCertificate(); assertNull(cert); - omCertClient.storeCertificate(getPEMEncodedString(x509Certificate), + dnCertClient.storeCertificate(getPEMEncodedString(x509Certificate), true); - cert = omCertClient.getCertificate( + cert = dnCertClient.getCertificate( x509Certificate.getSerialNumber().toString()); assertNotNull(cert); assertTrue(cert.getEncoded().length > 0); @@ -206,26 +191,26 @@ private X509Certificate generateX509Cert(KeyPair keyPair) throws Exception { keyPair = generateKeyPairFiles(); } return KeyStoreTestUtil.generateCertificate("CN=Test", keyPair, 30, - omSecurityConfig.getSignatureAlgo()); + dnSecurityConfig.getSignatureAlgo()); } @Test public void testSignDataStream() throws Exception { String data = RandomStringUtils.random(100); FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getPrivateKeyFileName()).toFile()); + dnSecurityConfig.getKeyLocation(DN_COMPONENT).toString(), + dnSecurityConfig.getPrivateKeyFileName()).toFile()); FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getPublicKeyFileName()).toFile()); + dnSecurityConfig.getKeyLocation(DN_COMPONENT).toString(), + dnSecurityConfig.getPublicKeyFileName()).toFile()); // Expect error when there is no private key to sign. LambdaTestUtils.intercept(IOException.class, "Error while " + "signing the stream", - () -> omCertClient.signDataStream(IOUtils.toInputStream(data, UTF_8))); + () -> dnCertClient.signDataStream(IOUtils.toInputStream(data, UTF_8))); generateKeyPairFiles(); - byte[] sign = omCertClient.signDataStream(IOUtils.toInputStream(data, + byte[] sign = dnCertClient.signDataStream(IOUtils.toInputStream(data, UTF_8)); validateHash(sign, data.getBytes(UTF_8)); } @@ -236,9 +221,9 @@ public void testSignDataStream() throws Exception { private void validateHash(byte[] hash, byte[] data) throws Exception { Signature rsaSignature = - Signature.getInstance(omSecurityConfig.getSignatureAlgo(), - omSecurityConfig.getProvider()); - rsaSignature.initVerify(omCertClient.getPublicKey()); + Signature.getInstance(dnSecurityConfig.getSignatureAlgo(), + dnSecurityConfig.getProvider()); + rsaSignature.initVerify(dnCertClient.getPublicKey()); rsaSignature.update(data); assertTrue(rsaSignature.verify(hash)); } @@ -249,20 +234,20 @@ private void validateHash(byte[] hash, byte[] data) @Test public void verifySignatureStream() throws Exception { String data = RandomStringUtils.random(500); - byte[] sign = omCertClient.signDataStream(IOUtils.toInputStream(data, + byte[] sign = dnCertClient.signDataStream(IOUtils.toInputStream(data, UTF_8)); // Positive tests. - assertTrue(omCertClient.verifySignature(data.getBytes(UTF_8), sign, + assertTrue(dnCertClient.verifySignature(data.getBytes(UTF_8), sign, x509Certificate)); - assertTrue(omCertClient.verifySignature( + assertTrue(dnCertClient.verifySignature( IOUtils.toInputStream(data, UTF_8), sign, x509Certificate)); // Negative tests. - assertFalse(omCertClient.verifySignature(data.getBytes(UTF_8), + assertFalse(dnCertClient.verifySignature(data.getBytes(UTF_8), "abc".getBytes(UTF_8), x509Certificate)); - assertFalse(omCertClient.verifySignature(IOUtils.toInputStream(data, + assertFalse(dnCertClient.verifySignature(IOUtils.toInputStream(data, UTF_8), "abc".getBytes(UTF_8), x509Certificate)); } @@ -273,19 +258,19 @@ public void verifySignatureStream() throws Exception { @Test public void verifySignatureDataArray() throws Exception { String data = RandomStringUtils.random(500); - byte[] sign = omCertClient.signData(data.getBytes(UTF_8)); + byte[] sign = dnCertClient.signData(data.getBytes(UTF_8)); // Positive tests. - assertTrue(omCertClient.verifySignature(data.getBytes(UTF_8), sign, + assertTrue(dnCertClient.verifySignature(data.getBytes(UTF_8), sign, x509Certificate)); - assertTrue(omCertClient.verifySignature( + assertTrue(dnCertClient.verifySignature( IOUtils.toInputStream(data, UTF_8), sign, x509Certificate)); // Negative tests. - assertFalse(omCertClient.verifySignature(data.getBytes(UTF_8), + assertFalse(dnCertClient.verifySignature(data.getBytes(UTF_8), "abc".getBytes(UTF_8), x509Certificate)); - assertFalse(omCertClient.verifySignature(IOUtils.toInputStream(data, + assertFalse(dnCertClient.verifySignature(IOUtils.toInputStream(data, UTF_8), "abc".getBytes(UTF_8), x509Certificate)); } @@ -294,7 +279,7 @@ public void verifySignatureDataArray() throws Exception { public void queryCertificate() throws Exception { LambdaTestUtils.intercept(UnsupportedOperationException.class, "Operation not supported", - () -> omCertClient.queryCertificate("")); + () -> dnCertClient.queryCertificate("")); } @Test @@ -329,7 +314,8 @@ public void testCertificateLoadingOnInit() throws Exception { getPEMEncodedString(cert3), true); // Re instantiate DN client which will load certificates from filesystem. - dnCertClient = new DNCertificateClient(dnSecurityConfig, certSerialId); + dnCertClient = new DNCertificateClient(dnSecurityConfig, null, + certSerialId, null, null); assertNotNull(dnCertClient.getCertificate(cert1.getSerialNumber() .toString())); @@ -361,66 +347,34 @@ public void testStoreCertificate() throws Exception { @Test public void testInitCertAndKeypairValidationFailures() throws Exception { - GenericTestUtils.LogCapturer dnClientLog = GenericTestUtils.LogCapturer .captureLogs(dnCertClient.getLogger()); - GenericTestUtils.LogCapturer omClientLog = GenericTestUtils.LogCapturer - .captureLogs(omCertClient.getLogger()); KeyPair keyPair = keyGenerator.generateKey(); - KeyPair keyPair2 = keyGenerator.generateKey(); + KeyPair keyPair1 = keyGenerator.generateKey(); dnClientLog.clearOutput(); - omClientLog.clearOutput(); // Case 1. Expect failure when keypair validation fails. - FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getPrivateKeyFileName()).toFile()); - FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getPublicKeyFileName()).toFile()); - - FileUtils.deleteQuietly(Paths.get( dnSecurityConfig.getKeyLocation(DN_COMPONENT).toString(), dnSecurityConfig.getPrivateKeyFileName()).toFile()); FileUtils.deleteQuietly(Paths.get( dnSecurityConfig.getKeyLocation(DN_COMPONENT).toString(), dnSecurityConfig.getPublicKeyFileName()).toFile()); - - omKeyCodec.writePrivateKey(keyPair.getPrivate()); - omKeyCodec.writePublicKey(keyPair2.getPublic()); - dnKeyCodec.writePrivateKey(keyPair.getPrivate()); - dnKeyCodec.writePublicKey(keyPair2.getPublic()); - + dnKeyCodec.writePublicKey(keyPair1.getPublic()); // Check for DN. assertEquals(FAILURE, dnCertClient.init()); assertTrue(dnClientLog.getOutput().contains("Keypair validation failed")); dnClientLog.clearOutput(); - omClientLog.clearOutput(); - - // Check for OM. - assertEquals(FAILURE, omCertClient.init()); - assertTrue(omClientLog.getOutput().contains("Keypair validation failed")); - dnClientLog.clearOutput(); - omClientLog.clearOutput(); // Case 2. Expect failure when certificate is generated from different // private key and keypair validation fails. getCertClient(); - FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getCertificateFileName()).toFile()); FileUtils.deleteQuietly(Paths.get( dnSecurityConfig.getKeyLocation(DN_COMPONENT).toString(), dnSecurityConfig.getCertificateFileName()).toFile()); - CertificateCodec omCertCodec = new CertificateCodec(omSecurityConfig, - OM_COMPONENT); - omCertCodec.writeCertificate(new X509CertificateHolder( - x509Certificate.getEncoded())); - CertificateCodec dnCertCodec = new CertificateCodec(dnSecurityConfig, DN_COMPONENT); dnCertCodec.writeCertificate(new X509CertificateHolder( @@ -429,26 +383,15 @@ public void testInitCertAndKeypairValidationFailures() throws Exception { assertEquals(FAILURE, dnCertClient.init()); assertTrue(dnClientLog.getOutput().contains("Keypair validation failed")); dnClientLog.clearOutput(); - omClientLog.clearOutput(); - - // Check for OM. - assertEquals(FAILURE, omCertClient.init()); - assertTrue(omClientLog.getOutput().contains("Keypair validation failed")); - dnClientLog.clearOutput(); - omClientLog.clearOutput(); // Case 3. Expect failure when certificate is generated from different // private key and certificate validation fails. // Re-write the correct public key. - FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getPublicKeyFileName()).toFile()); FileUtils.deleteQuietly(Paths.get( dnSecurityConfig.getKeyLocation(DN_COMPONENT).toString(), dnSecurityConfig.getPublicKeyFileName()).toFile()); getCertClient(); - omKeyCodec.writePublicKey(keyPair.getPublic()); dnKeyCodec.writePublicKey(keyPair.getPublic()); // Check for DN. @@ -456,20 +399,9 @@ public void testInitCertAndKeypairValidationFailures() throws Exception { assertTrue(dnClientLog.getOutput() .contains("Stored certificate is generated with different")); dnClientLog.clearOutput(); - omClientLog.clearOutput(); - - //Check for OM. - assertEquals(FAILURE, omCertClient.init()); - assertTrue(omClientLog.getOutput() - .contains("Stored certificate is generated with different")); - dnClientLog.clearOutput(); - omClientLog.clearOutput(); // Case 4. Failure when public key recovery fails. getCertClient(); - FileUtils.deleteQuietly(Paths.get( - omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(), - omSecurityConfig.getPublicKeyFileName()).toFile()); FileUtils.deleteQuietly(Paths.get( dnSecurityConfig.getKeyLocation(DN_COMPONENT).toString(), dnSecurityConfig.getPublicKeyFileName()).toFile()); @@ -477,12 +409,6 @@ public void testInitCertAndKeypairValidationFailures() throws Exception { // Check for DN. assertEquals(FAILURE, dnCertClient.init()); assertTrue(dnClientLog.getOutput().contains("Can't recover public key")); - - // Check for OM. - assertEquals(FAILURE, omCertClient.init()); - assertTrue(omClientLog.getOutput().contains("Can't recover public key")); - dnClientLog.clearOutput(); - omClientLog.clearOutput(); } @Test @@ -505,7 +431,8 @@ public void testCertificateExpirationHandlingInInit() throws Exception { when(mockCert.getNotAfter()).thenReturn(expiration); DefaultCertificateClient client = - new DefaultCertificateClient(config, mockLogger, certId, compName) { + new DefaultCertificateClient(config, mockLogger, certId, compName, + null, null) { @Override public PrivateKey getPrivateKey() { return mock(PrivateKey.class); @@ -520,10 +447,113 @@ public PublicKey getPublicKey() { public X509Certificate getCertificate() { return mockCert; } + + @Override + public String signAndStoreCertificate( + PKCS10CertificationRequest request, Path certPath) + throws CertificateException { + return null; + } + + @Override + public CertificateSignRequest.Builder getCSRBuilder(KeyPair keyPair) + throws CertificateException { + return null; + } }; InitResponse resp = client.init(); verify(mockLogger, atLeastOnce()).info(anyString()); assertEquals(resp, REINIT); } + + @Test + public void testTimeBeforeExpiryGracePeriod() throws Exception { + KeyPair keyPair = keyGenerator.generateKey(); + Duration gracePeriod = dnSecurityConfig.getRenewalGracePeriod(); + + X509Certificate cert = KeyStoreTestUtil.generateCertificate("CN=Test", + keyPair, (int)(gracePeriod.toDays()), + dnSecurityConfig.getSignatureAlgo()); + dnCertClient.storeCertificate(getPEMEncodedString(cert), true); + Duration duration = dnCertClient.timeBeforeExpiryGracePeriod(cert); + Assert.assertTrue(duration.isZero()); + + cert = KeyStoreTestUtil.generateCertificate("CN=Test", + keyPair, (int)(gracePeriod.toDays() + 1), + dnSecurityConfig.getSignatureAlgo()); + dnCertClient.storeCertificate(getPEMEncodedString(cert), true); + duration = dnCertClient.timeBeforeExpiryGracePeriod(cert); + Assert.assertTrue(duration.toMillis() < Duration.ofDays(1).toMillis() && + duration.toMillis() > Duration.ofHours(23).plusMinutes(59).toMillis()); + } + + @Test + public void testRenewAndStoreKeyAndCertificate() throws Exception { + // save the certificate on dn + CertificateCodec certCodec = new CertificateCodec(dnSecurityConfig, + dnSecurityConfig.getCertificateLocation(DN_COMPONENT)); + certCodec.writeCertificate( + new X509CertificateHolder(x509Certificate.getEncoded())); + + SCMSecurityProtocolClientSideTranslatorPB scmClient = + mock(SCMSecurityProtocolClientSideTranslatorPB.class); + X509Certificate newCert = generateX509Cert(null); + dnCertClient.setSecureScmClient(scmClient); + String pemCert = CertificateCodec.getPEMEncodedString(newCert); + SCMSecurityProtocolProtos.SCMGetCertResponseProto responseProto = + SCMSecurityProtocolProtos.SCMGetCertResponseProto + .newBuilder().setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .setX509CACertificate(pemCert) + .build(); + when(scmClient.getDataNodeCertificateChain(anyObject(), anyString())) + .thenReturn(responseProto); + + String certID = dnCertClient.getCertificate().getSerialNumber().toString(); + // a success renew + String newCertId = dnCertClient.renewAndStoreKeyAndCertificate(true); + Assert.assertFalse(certID.equals(newCertId)); + Assert.assertTrue(dnCertClient.getCertificate().getSerialNumber() + .toString().equals(certID)); + + File newKeyDir = new File(dnSecurityConfig.getKeyLocation( + dnCertClient.getComponentName()).toString() + + HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX); + File newCertDir = new File(dnSecurityConfig.getCertificateLocation( + dnCertClient.getComponentName()).toString() + + HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX); + File backupKeyDir = new File(dnSecurityConfig.getKeyLocation( + dnCertClient.getComponentName()).toString() + + HddsConfigKeys.HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX); + File backupCertDir = new File(dnSecurityConfig.getCertificateLocation( + dnCertClient.getComponentName()).toString() + + HddsConfigKeys.HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX); + + // backup directories exist + Assert.assertTrue(backupKeyDir.exists()); + Assert.assertTrue(backupCertDir.exists()); + // new directories should not exist + Assert.assertFalse(newKeyDir.exists()); + Assert.assertFalse(newCertDir.exists()); + + // cleanup backup key and cert dir + dnCertClient.cleanBackupDir(); + + Files.createDirectories(newKeyDir.toPath()); + Files.createDirectories(newCertDir.toPath()); + KeyPair keyPair = KeyStoreTestUtil.generateKeyPair("RSA"); + KeyCodec newKeyCodec = new KeyCodec(dnSecurityConfig, newKeyDir.toPath()); + newKeyCodec.writeKey(keyPair); + + X509Certificate cert = KeyStoreTestUtil.generateCertificate( + "CN=OzoneMaster", keyPair, 30, "SHA256withRSA"); + certCodec = new CertificateCodec(dnSecurityConfig, + newCertDir.toPath()); + dnCertClient.storeCertificate(getPEMEncodedString(cert), true, false, false, + certCodec, false); + // a success renew after auto cleanup new key and cert dir + dnCertClient.renewAndStoreKeyAndCertificate(true); + } } \ No newline at end of file diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDnCertificateClientInit.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDnCertificateClientInit.java new file mode 100644 index 000000000000..9b31426fce69 --- /dev/null +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDnCertificateClientInit.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.hadoop.hdds.security.x509.certificate.client; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; +import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; +import org.apache.hadoop.ozone.OzoneSecurityUtil; +import org.apache.hadoop.security.ssl.KeyStoreTestUtil; +import org.apache.ozone.test.GenericTestUtils; +import org.bouncycastle.cert.X509CertificateHolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.KeyPair; +import java.security.cert.X509Certificate; +import java.util.UUID; +import java.util.stream.Stream; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_METADATA_DIR_NAME; +import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse; +import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.FAILURE; +import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.GETCERT; +import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.SUCCESS; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +/** + * Test class for {@link DNCertificateClient}. + */ +public class TestDnCertificateClientInit { + + private KeyPair keyPair; + private String certSerialId = "3284792342234"; + private CertificateClient dnCertificateClient; + private HDDSKeyGenerator keyGenerator; + private Path metaDirPath; + private SecurityConfig securityConfig; + private KeyCodec dnKeyCodec; + private X509Certificate x509Certificate; + private static final String DN_COMPONENT = DNCertificateClient.COMPONENT_NAME; + + private static Stream parameters() { + return Stream.of( + arguments(false, false, false, GETCERT), + arguments(false, false, true, FAILURE), + arguments(false, true, false, FAILURE), + arguments(true, false, false, FAILURE), + arguments(false, true, true, FAILURE), + arguments(true, true, false, GETCERT), + arguments(true, false, true, SUCCESS), + arguments(true, true, true, SUCCESS) + ); + } + + @BeforeEach + public void setUp() throws Exception { + OzoneConfiguration config = new OzoneConfiguration(); + final String path = GenericTestUtils + .getTempPath(UUID.randomUUID().toString()); + metaDirPath = Paths.get(path, "test"); + config.set(HDDS_METADATA_DIR_NAME, metaDirPath.toString()); + securityConfig = new SecurityConfig(config); + keyGenerator = new HDDSKeyGenerator(securityConfig); + keyPair = keyGenerator.generateKey(); + x509Certificate = getX509Certificate(); + certSerialId = x509Certificate.getSerialNumber().toString(); + dnCertificateClient = + new DNCertificateClient(securityConfig, null, certSerialId, null, null); + dnKeyCodec = new KeyCodec(securityConfig, DN_COMPONENT); + + Files.createDirectories(securityConfig.getKeyLocation(DN_COMPONENT)); + } + + @AfterEach + public void tearDown() { + dnCertificateClient = null; + FileUtils.deleteQuietly(metaDirPath.toFile()); + } + + + @ParameterizedTest + @MethodSource("parameters") + public void testInitDatanode(boolean pvtKeyPresent, boolean pubKeyPresent, + boolean certPresent, InitResponse expectedResult) throws Exception { + if (pvtKeyPresent) { + dnKeyCodec.writePrivateKey(keyPair.getPrivate()); + } else { + FileUtils.deleteQuietly(Paths.get( + securityConfig.getKeyLocation(DN_COMPONENT).toString(), + securityConfig.getPrivateKeyFileName()).toFile()); + } + + if (pubKeyPresent) { + if (dnCertificateClient.getPublicKey() == null) { + dnKeyCodec.writePublicKey(keyPair.getPublic()); + } + } else { + FileUtils.deleteQuietly( + Paths.get(securityConfig.getKeyLocation(DN_COMPONENT).toString(), + securityConfig.getPublicKeyFileName()).toFile()); + } + + if (certPresent) { + CertificateCodec codec = new CertificateCodec(securityConfig, + DN_COMPONENT); + codec.writeCertificate(new X509CertificateHolder( + x509Certificate.getEncoded())); + } else { + FileUtils.deleteQuietly(Paths.get( + securityConfig.getKeyLocation(DN_COMPONENT).toString(), + securityConfig.getCertificateFileName()).toFile()); + } + InitResponse response = dnCertificateClient.init(); + + assertEquals(expectedResult, response); + + if (!response.equals(FAILURE)) { + assertTrue(OzoneSecurityUtil.checkIfFileExist( + securityConfig.getKeyLocation(DN_COMPONENT), + securityConfig.getPrivateKeyFileName())); + assertTrue(OzoneSecurityUtil.checkIfFileExist( + securityConfig.getKeyLocation(DN_COMPONENT), + securityConfig.getPublicKeyFileName())); + } + } + + private X509Certificate getX509Certificate() throws Exception { + return KeyStoreTestUtil.generateCertificate( + "CN=Test", keyPair, 365, securityConfig.getSignatureAlgo()); + } +} \ No newline at end of file diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestPrometheusMetricsSink.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestPrometheusMetricsIntegration.java similarity index 55% rename from hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestPrometheusMetricsSink.java rename to hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestPrometheusMetricsIntegration.java index 5339a06bbb61..5243dfda51d9 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestPrometheusMetricsSink.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestPrometheusMetricsIntegration.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,8 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; +import java.util.concurrent.TimeoutException; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsSource; @@ -31,15 +33,16 @@ import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** - * Test prometheus Sink. + * Test prometheus Metrics. */ -public class TestPrometheusMetricsSink { +public class TestPrometheusMetricsIntegration { private MetricsSystem metrics; private PrometheusMetricsSink sink; @@ -76,7 +79,7 @@ public void init() { metrics = DefaultMetricsSystem.instance(); metrics.init("test"); - sink = new PrometheusMetricsSink(); + sink = new PrometheusMetricsSink("random"); metrics.register("Prometheus", "Prometheus", sink); } @@ -87,15 +90,15 @@ public void tearDown() { } @Test - public void testPublish() throws IOException { + public void testPublish() + throws InterruptedException, TimeoutException { //GIVEN TestMetrics testMetrics = metrics .register("TestMetrics", "Testing metrics", new TestMetrics()); testMetrics.numBucketCreateFails.incr(); - //WHEN - String writtenMetrics = publishMetricsAndGetOutput(); + String writtenMetrics = waitForMetricsToPublish("test_metrics_num"); //THEN Assertions.assertTrue( @@ -103,11 +106,14 @@ public void testPublish() throws IOException { "test_metrics_num_bucket_create_fails{context=\"dfs\""), "The expected metric line is missing from prometheus metrics output" ); + + metrics.unregisterSource("TestMetrics"); } @Test - public void testPublishWithSameName() throws IOException { - //GIVEN + public void testPublishWithSameName() + throws InterruptedException, TimeoutException { + // GIVEN metrics.register("FooBar", "fooBar", (MetricsSource) (collector, all) -> { collector.addRecord("RpcMetrics").add(new MetricsTag(PORT_INFO, "1234")) .addGauge(COUNTER_INFO, COUNTER_1).endRecord(); @@ -116,8 +122,7 @@ public void testPublishWithSameName() throws IOException { PORT_INFO, "2345")).addGauge(COUNTER_INFO, COUNTER_2).endRecord(); }); - // WHEN - String writtenMetrics = publishMetricsAndGetOutput(); + String writtenMetrics = waitForMetricsToPublish("rpc_metrics_counter"); // THEN Assertions.assertTrue( @@ -127,11 +132,14 @@ public void testPublishWithSameName() throws IOException { Assertions.assertTrue( writtenMetrics.contains("rpc_metrics_counter{port=\"1234\""), "The expected metric line is missing from prometheus metrics output"); + + metrics.unregisterSource("FooBar"); } @Test - public void testTypeWithSameNameButDifferentLabels() throws IOException { - //GIVEN + public void testTypeWithSameNameButDifferentLabels() + throws InterruptedException, TimeoutException { + // GIVEN metrics.register("SameName", "sameName", (MetricsSource) (collector, all) -> { collector.addRecord("SameName").add(new MetricsTag(PORT_INFO, "1234")) @@ -141,59 +149,63 @@ public void testTypeWithSameNameButDifferentLabels() throws IOException { }); // WHEN - String writtenMetrics = publishMetricsAndGetOutput(); + String writtenMetrics = waitForMetricsToPublish("same_name_counter"); // THEN Assertions.assertEquals(1, StringUtils.countMatches(writtenMetrics, "# TYPE same_name_counter")); - } - @Test - public void testNamingCamelCase() { - //THEN - Assertions.assertEquals("rpc_time_some_metrics", - sink.prometheusName("RpcTime", "SomeMetrics")); - - Assertions.assertEquals("om_rpc_time_om_info_keys", - sink.prometheusName("OMRpcTime", "OMInfoKeys")); - - Assertions.assertEquals("rpc_time_small", - sink.prometheusName("RpcTime", "small")); - } + // both metrics should be present + Assertions.assertTrue( + writtenMetrics.contains("same_name_counter{port=\"1234\""), + "The expected metric line is present in prometheus metrics output"); + Assertions.assertTrue( + writtenMetrics.contains("same_name_counter{port=\"2345\""), + "The expected metric line is present in prometheus metrics output"); - @Test - public void testNamingRocksDB() { - //RocksDB metrics are handled differently. - // THEN - Assertions.assertEquals("rocksdb_om_db_num_open_connections", - sink.prometheusName("Rocksdb_om.db", "num_open_connections")); + metrics.unregisterSource("SameName"); } + /** + * Make sure Prometheus metrics start fresh after each flush. + * Publish the metrics and flush them, + * then unregister one of them and register another. + * Publish and flush the metrics again + * and then check that the unregistered metric is not present. + */ @Test - public void testNamingPipeline() { + public void testRemovingStaleMetricsOnFlush() + throws InterruptedException, TimeoutException { // GIVEN - String recordName = "SCMPipelineMetrics"; - String metricName = "NumBlocksAllocated-" - + "RATIS-THREE-47659e3d-40c9-43b3-9792-4982fc279aba"; + metrics.register("StaleMetric", "staleMetric", + (MetricsSource) (collector, all) -> + collector.addRecord("StaleMetric") + .add(new MetricsTag(PORT_INFO, "1234")) + .addGauge(COUNTER_INFO, COUNTER_1).endRecord()); - // THEN - Assertions.assertEquals( - "scm_pipeline_metrics_" - + "num_blocks_allocated_" - + "ratis_three_47659e3d_40c9_43b3_9792_4982fc279aba", - sink.prometheusName(recordName, metricName)); - } + waitForMetricsToPublish("stale_metric_counter"); - @Test - public void testNamingSpaces() { - //GIVEN - String recordName = "JvmMetrics"; - String metricName = "GcTimeMillisG1 Young Generation"; + // unregister the metric + metrics.unregisterSource("StaleMetric"); + + metrics.register("SomeMetric", "someMetric", + (MetricsSource) (collector, all) -> + collector.addRecord("SomeMetric") + .add(new MetricsTag(PORT_INFO, "4321")) + .addGauge(COUNTER_INFO, COUNTER_2).endRecord()); + + String writtenMetrics = waitForMetricsToPublish("some_metric_counter"); // THEN - Assertions.assertEquals( - "jvm_metrics_gc_time_millis_g1_young_generation", - sink.prometheusName(recordName, metricName)); + // The first metric shouldn't be present + Assertions.assertFalse( + writtenMetrics.contains("stale_metric_counter{port=\"1234\""), + "The expected metric line is present in prometheus metrics output"); + Assertions.assertTrue( + writtenMetrics.contains("some_metric_counter{port=\"4321\""), + "The expected metric line is present in prometheus metrics output"); + + metrics.unregisterSource("SomeMetric"); } private String publishMetricsAndGetOutput() throws IOException { @@ -204,10 +216,36 @@ private String publishMetricsAndGetOutput() throws IOException { sink.writeMetrics(writer); writer.flush(); - return stream.toString(UTF_8.name()); } + /** + * metrics.publishMetricsNow() might not finish in a reasonable + * amount of time leading to a full queue and any further attempt + * for publishing to fail. Wrapping the call with + * GenericTestUtils.waitFor() to retry until the queue has been + * cleared and publish is a success. + * + * @param registeredMetric to check if it's published + * @return all published metrics + */ + private String waitForMetricsToPublish(String registeredMetric) + throws InterruptedException, TimeoutException { + + final String[] writtenMetrics = new String[1]; + + GenericTestUtils.waitFor(() -> { + try { + writtenMetrics[0] = publishMetricsAndGetOutput(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return writtenMetrics[0].contains(registeredMetric); + }, 1000, 120000); + + return writtenMetrics[0]; + } + /** * Example metric pojo. */ diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestDecayRpcSchedulerUtil.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestDecayRpcSchedulerUtil.java new file mode 100644 index 000000000000..f84e6ce06872 --- /dev/null +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestDecayRpcSchedulerUtil.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.hdds.utils; + +import java.util.Optional; +import org.apache.hadoop.metrics2.MetricsTag; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +/** + * Test class for DecayRpcSchedulerUtil. + */ +public class TestDecayRpcSchedulerUtil { + + private static final String USERNAME = "testUser"; + private static final String METRIC_NAME_VOLUME = "Volume"; + + private static final String RECORD_NAME = + "org.apache.hadoop.ipc.DecayRpcScheduler"; + private static final String METRIC_NAME = + "Caller(" + USERNAME + ")." + METRIC_NAME_VOLUME; + + private static final String RANDOM_RECORD_NAME = "JvmMetrics"; + private static final String RANDOM_METRIC_NAME = "ThreadsNew"; + + @Test + void testSplitMetricNameIfNeeded() { + // Split the metric name and return only the + // name of the metric type. + String splitName = DecayRpcSchedulerUtil + .splitMetricNameIfNeeded(RECORD_NAME, METRIC_NAME); + + assertEquals(METRIC_NAME_VOLUME, splitName); + + // This metric name should remain the same. + String unchangedName = DecayRpcSchedulerUtil + .splitMetricNameIfNeeded(RANDOM_RECORD_NAME, RANDOM_METRIC_NAME); + + assertEquals(RANDOM_METRIC_NAME, unchangedName); + } + + @Test + void testCheckMetricNameForUsername() { + // Get the username from the metric name. + String decayRpcSchedulerUsername = DecayRpcSchedulerUtil + .checkMetricNameForUsername(RECORD_NAME, METRIC_NAME); + + assertEquals(USERNAME, decayRpcSchedulerUsername); + + // This metric doesn't contain a username in the metric name. + // DecayRpcSchedulerUtil.checkMetricNameForUsername() + // should return null. + String nullUsername = DecayRpcSchedulerUtil + .checkMetricNameForUsername(RANDOM_RECORD_NAME, RANDOM_METRIC_NAME); + + assertNull(nullUsername); + } + + @Test + void testCreateUsernameTagWithNullUsername() { + // GIVEN + final String username = null; + + // WHEN + Optional optionalMetricsTag = + DecayRpcSchedulerUtil.createUsernameTag(username); + + // THEN + Assertions.assertFalse(optionalMetricsTag.isPresent()); + } + + @Test + void testCreateUsernameTagWithNotNullUsername() { + // GIVEN + final String username = "username"; + + // WHEN + Optional optionalMetricsTag = + DecayRpcSchedulerUtil.createUsernameTag(username); + + // THEN + Assertions.assertTrue(optionalMetricsTag.isPresent()); + Assertions.assertEquals(username, optionalMetricsTag.get().value()); + Assertions.assertEquals(username, optionalMetricsTag.get().name()); + Assertions.assertEquals("caller username", + optionalMetricsTag.get().description()); + } +} diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestPrometheusMetricsSinkUtil.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestPrometheusMetricsSinkUtil.java new file mode 100644 index 000000000000..ef64bbb6e8e1 --- /dev/null +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestPrometheusMetricsSinkUtil.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.utils; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import org.apache.hadoop.metrics2.MetricsTag; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Class for unit tests for {@link PrometheusMetricsSinkUtil}. + */ +class TestPrometheusMetricsSinkUtil { + + private static final String USERNAME_TAG_NAME = "username"; + private static final String USERNAME_TAG_DESCRIPTION = "caller username"; + private static final String SERVERNAME_TAG_NAME = "servername"; + private static final String SERVERNAME_TAG_DESCRIPTION = "name of the server"; + + @Test + void testAddTagsAddUsernameTagWithNullUsername() { + // GIVEN + final String key = "key"; + final String username = null; + final String servername = null; + Collection unmodifiableMetricTags = + Collections.unmodifiableList(new ArrayList<>()); + + // WHEN + List metricsTags = PrometheusMetricsSinkUtil.addTags(key, + username, servername, unmodifiableMetricTags); + + // THEN + Assertions.assertFalse(metricsTags.stream() + .anyMatch(metricsTag -> metricsTag.name().equals(USERNAME_TAG_NAME) || + metricsTag.description().equals(USERNAME_TAG_DESCRIPTION))); + } + + @Test + void testAddTagsAddUsernameTagWithEmptyUsername() { + // GIVEN + final String key = "key"; + final String username = ""; + final String servername = null; + Collection unmodifiableMetricTags = + Collections.unmodifiableList(new ArrayList<>()); + + // WHEN + List metricsTags = PrometheusMetricsSinkUtil.addTags(key, + username, servername, unmodifiableMetricTags); + + // THEN + Assertions.assertFalse(metricsTags.stream() + .anyMatch(metricsTag -> metricsTag.name().equals(USERNAME_TAG_NAME) || + metricsTag.description().equals(USERNAME_TAG_DESCRIPTION))); + } + + @Test + void testAddTagsAddUsernameTagWithUsername() { + // GIVEN + final String key = "key"; + final String username = "username"; + final String servername = null; + Collection unmodifiableMetricTags = + Collections.unmodifiableList(new ArrayList<>()); + + // WHEN + List metricsTags = PrometheusMetricsSinkUtil.addTags(key, + username, servername, unmodifiableMetricTags); + + // THEN + Assertions.assertTrue(metricsTags.stream() + .anyMatch(metricsTag -> metricsTag.name().equals(USERNAME_TAG_NAME) && + metricsTag.description().equals(USERNAME_TAG_DESCRIPTION))); + } + + @Test + void testAddTagsAddServernameTagWithNoUgiMetricsKey() { + // GIVEN + final String key = "key"; + final String username = null; + final String servername = "SERVERNAME"; + Collection unmodifiableMetricTags = + Collections.unmodifiableList(new ArrayList<>()); + + // WHEN + List metricsTags = PrometheusMetricsSinkUtil.addTags(key, + username, servername, unmodifiableMetricTags); + + // THEN + Assertions.assertFalse(metricsTags.stream() + .anyMatch(metricsTag -> metricsTag.name().equals(SERVERNAME_TAG_NAME) || + metricsTag.description().equals(SERVERNAME_TAG_DESCRIPTION))); + } + + @Test + void testAddTagsAddServernameTagWithUgiMetricsKey() { + // GIVEN + final String key = "ugi_metrics"; + final String username = null; + final String servername = "SERVERNAME"; + Collection unmodifiableMetricTags = + Collections.unmodifiableList(new ArrayList<>()); + + // WHEN + List metricsTags = PrometheusMetricsSinkUtil.addTags(key, + username, servername, unmodifiableMetricTags); + + // THEN + Assertions.assertTrue(metricsTags.stream() + .anyMatch(metricsTag -> metricsTag.name().equals(SERVERNAME_TAG_NAME) && + metricsTag.description().equals(SERVERNAME_TAG_DESCRIPTION))); + } + + @Test + void testAddTags() { + // GIVEN + final String key = "ugi_metrics"; + final String username = "username"; + final String servername = "SERVERNAME"; + Collection unmodifiableMetricTags = + Collections.unmodifiableList(new ArrayList<>()); + + // WHEN + List metricsTags = PrometheusMetricsSinkUtil.addTags(key, + username, servername, unmodifiableMetricTags); + + // THEN + Assertions.assertTrue(metricsTags.stream() + .anyMatch(metricsTag -> metricsTag.name().equals(USERNAME_TAG_NAME))); + Assertions.assertTrue(metricsTags.stream() + .anyMatch(metricsTag -> metricsTag.name().equals(SERVERNAME_TAG_NAME))); + } + + @Test + void testNamingCamelCase() { + //THEN + Assertions.assertEquals("rpc_time_some_metrics", + PrometheusMetricsSinkUtil.prometheusName("RpcTime", "SomeMetrics")); + + Assertions.assertEquals("om_rpc_time_om_info_keys", + PrometheusMetricsSinkUtil.prometheusName("OMRpcTime", "OMInfoKeys")); + + Assertions.assertEquals("rpc_time_small", + PrometheusMetricsSinkUtil.prometheusName("RpcTime", "small")); + } + + @Test + void testNamingRocksDB() { + //RocksDB metrics are handled differently. + // THEN + Assertions.assertEquals("rocksdb_om_db_num_open_connections", + PrometheusMetricsSinkUtil.prometheusName("Rocksdb_om.db", + "num_open_connections")); + } + + @Test + void testNamingPipeline() { + // GIVEN + String recordName = "SCMPipelineMetrics"; + String metricName = "NumBlocksAllocated-" + + "RATIS-THREE-47659e3d-40c9-43b3-9792-4982fc279aba"; + + // THEN + Assertions.assertEquals( + "scm_pipeline_metrics_" + "num_blocks_allocated_" + + "ratis_three_47659e3d_40c9_43b3_9792_4982fc279aba", + PrometheusMetricsSinkUtil.prometheusName(recordName, metricName)); + } + + @Test + void testNamingSpaces() { + //GIVEN + String recordName = "JvmMetrics"; + String metricName = "GcTimeMillisG1 Young Generation"; + + // THEN + Assertions.assertEquals( + "jvm_metrics_gc_time_millis_g1_young_generation", + PrometheusMetricsSinkUtil.prometheusName(recordName, metricName)); + } + + @Test + void testGetMetricName() { + // GIVEN + final String recordName = "record_name"; + final String metricName = "metric_name"; + + // WHEN + String newMetricName = PrometheusMetricsSinkUtil.getMetricName(recordName, + metricName); + + // THEN + Assertions.assertEquals(metricName, newMetricName); + } + + @Test + void testGetUsername() { + // GIVEN + final String recordName = "record_name"; + final String metricName = "metric_name"; + + // WHEN + String username = PrometheusMetricsSinkUtil.getUsername(recordName, + metricName); + + // THEN + Assertions.assertNull(username); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestUgiMetricsUtil.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestUgiMetricsUtil.java new file mode 100644 index 000000000000..75ee8cd69463 --- /dev/null +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestUgiMetricsUtil.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.utils; + +import java.util.Optional; +import org.apache.hadoop.metrics2.MetricsTag; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Class for unit tests for {@link UgiMetricsUtil}. + */ +class TestUgiMetricsUtil { + + @Test + void testCreateServernameTagWithNonCompatibleKey() { + // GIVEN + final String key = "non_ugi"; + final String servername = "servername"; + + // WHEN + Optional optionalMetricsTag = + UgiMetricsUtil.createServernameTag(key, servername); + + // THEN + Assertions.assertFalse(optionalMetricsTag.isPresent()); + } + + @Test + void testCreateServernameTagWithCompatibleKey() { + // GIVEN + final String key = "ugi_metrics"; + final String servername = "servername"; + + // WHEN + Optional optionalMetricsTag = + UgiMetricsUtil.createServernameTag(key, servername); + + // THEN + Assertions.assertTrue(optionalMetricsTag.isPresent()); + Assertions.assertEquals(servername, optionalMetricsTag.get().value()); + Assertions.assertEquals(servername, optionalMetricsTag.get().name()); + Assertions.assertEquals("name of the server", + optionalMetricsTag.get().description()); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/hadoop-dependency-client/pom.xml b/hadoop-hdds/hadoop-dependency-client/pom.xml index 5b7776b7ff9b..bf24441ca38a 100644 --- a/hadoop-hdds/hadoop-dependency-client/pom.xml +++ b/hadoop-hdds/hadoop-dependency-client/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-hadoop-dependency-client - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Hadoop client dependencies Apache Ozone HDDS Hadoop Client dependencies diff --git a/hadoop-hdds/hadoop-dependency-server/pom.xml b/hadoop-hdds/hadoop-dependency-server/pom.xml index ab3ae72e82e8..5dd3ca38b935 100644 --- a/hadoop-hdds/hadoop-dependency-server/pom.xml +++ b/hadoop-hdds/hadoop-dependency-server/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-hadoop-dependency-server - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Hadoop server dependencies Apache Ozone HDDS Hadoop Server dependencies diff --git a/hadoop-hdds/hadoop-dependency-test/pom.xml b/hadoop-hdds/hadoop-dependency-test/pom.xml index 653bc33016dd..b379355d446d 100644 --- a/hadoop-hdds/hadoop-dependency-test/pom.xml +++ b/hadoop-hdds/hadoop-dependency-test/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-hadoop-dependency-test - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Hadoop test dependencies Apache Ozone HDDS Hadoop Test dependencies diff --git a/hadoop-hdds/interface-admin/pom.xml b/hadoop-hdds/interface-admin/pom.xml index 9d0331884ff2..bd013e04d0f8 100644 --- a/hadoop-hdds/interface-admin/pom.xml +++ b/hadoop-hdds/interface-admin/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-interface-admin - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Admin interface Apache Ozone HDDS Admin Interface diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index ccb5e2155e44..da7c6e15bd9b 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -174,6 +174,7 @@ enum Type { GetContainerReplicas = 34; GetReplicationManagerReport = 35; ResetDeletedBlockRetryCount = 36; + GetClosedContainerCount = 37; } /** diff --git a/hadoop-hdds/interface-admin/src/main/resources/proto.lock b/hadoop-hdds/interface-admin/src/main/resources/proto.lock index ec40a30649d4..d834dd4dffed 100644 --- a/hadoop-hdds/interface-admin/src/main/resources/proto.lock +++ b/hadoop-hdds/interface-admin/src/main/resources/proto.lock @@ -155,6 +155,22 @@ { "name": "QueryUpgradeFinalizationProgress", "integer": 32 + }, + { + "name": "GetContainerCount", + "integer": 33 + }, + { + "name": "GetContainerReplicas", + "integer": 34 + }, + { + "name": "GetReplicationManagerReport", + "integer": 35 + }, + { + "name": "ResetDeletedBlockRetryCount", + "integer": 36 } ] }, @@ -200,177 +216,236 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "version", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 6, "name": "containerRequest", - "type": "ContainerRequestProto" + "type": "ContainerRequestProto", + "optional": true }, { "id": 7, "name": "getContainerRequest", - "type": "GetContainerRequestProto" + "type": "GetContainerRequestProto", + "optional": true }, { "id": 8, "name": "getContainerWithPipelineRequest", - "type": "GetContainerWithPipelineRequestProto" + "type": "GetContainerWithPipelineRequestProto", + "optional": true }, { "id": 9, "name": "scmListContainerRequest", - "type": "SCMListContainerRequestProto" + "type": "SCMListContainerRequestProto", + "optional": true }, { "id": 10, "name": "scmDeleteContainerRequest", - "type": "SCMDeleteContainerRequestProto" + "type": "SCMDeleteContainerRequestProto", + "optional": true }, { "id": 11, "name": "nodeQueryRequest", - "type": "NodeQueryRequestProto" + "type": "NodeQueryRequestProto", + "optional": true }, { "id": 12, "name": "scmCloseContainerRequest", - "type": "SCMCloseContainerRequestProto" + "type": "SCMCloseContainerRequestProto", + "optional": true }, { "id": 13, "name": "pipelineRequest", - "type": "PipelineRequestProto" + "type": "PipelineRequestProto", + "optional": true }, { "id": 14, "name": "listPipelineRequest", - "type": "ListPipelineRequestProto" + "type": "ListPipelineRequestProto", + "optional": true }, { "id": 15, "name": "activatePipelineRequest", - "type": "ActivatePipelineRequestProto" + "type": "ActivatePipelineRequestProto", + "optional": true }, { "id": 16, "name": "deactivatePipelineRequest", - "type": "DeactivatePipelineRequestProto" + "type": "DeactivatePipelineRequestProto", + "optional": true }, { "id": 17, "name": "closePipelineRequest", - "type": "ClosePipelineRequestProto" + "type": "ClosePipelineRequestProto", + "optional": true }, { "id": 18, "name": "getScmInfoRequest", - "type": "GetScmInfoRequestProto" + "type": "GetScmInfoRequestProto", + "optional": true }, { "id": 19, "name": "inSafeModeRequest", - "type": "InSafeModeRequestProto" + "type": "InSafeModeRequestProto", + "optional": true }, { "id": 20, "name": "forceExitSafeModeRequest", - "type": "ForceExitSafeModeRequestProto" + "type": "ForceExitSafeModeRequestProto", + "optional": true }, { "id": 21, "name": "startReplicationManagerRequest", - "type": "StartReplicationManagerRequestProto" + "type": "StartReplicationManagerRequestProto", + "optional": true }, { "id": 22, "name": "stopReplicationManagerRequest", - "type": "StopReplicationManagerRequestProto" + "type": "StopReplicationManagerRequestProto", + "optional": true }, { "id": 23, "name": "seplicationManagerStatusRequest", - "type": "ReplicationManagerStatusRequestProto" + "type": "ReplicationManagerStatusRequestProto", + "optional": true }, { "id": 24, "name": "getPipelineRequest", - "type": "GetPipelineRequestProto" + "type": "GetPipelineRequestProto", + "optional": true }, { "id": 25, "name": "getContainerWithPipelineBatchRequest", - "type": "GetContainerWithPipelineBatchRequestProto" + "type": "GetContainerWithPipelineBatchRequestProto", + "optional": true }, { "id": 26, "name": "getSafeModeRuleStatusesRequest", - "type": "GetSafeModeRuleStatusesRequestProto" + "type": "GetSafeModeRuleStatusesRequestProto", + "optional": true }, { "id": 27, "name": "decommissionNodesRequest", - "type": "DecommissionNodesRequestProto" + "type": "DecommissionNodesRequestProto", + "optional": true }, { "id": 28, "name": "recommissionNodesRequest", - "type": "RecommissionNodesRequestProto" + "type": "RecommissionNodesRequestProto", + "optional": true }, { "id": 29, "name": "startMaintenanceNodesRequest", - "type": "StartMaintenanceNodesRequestProto" + "type": "StartMaintenanceNodesRequestProto", + "optional": true }, { "id": 30, "name": "DatanodeUsageInfoRequest", - "type": "DatanodeUsageInfoRequestProto" + "type": "DatanodeUsageInfoRequestProto", + "optional": true }, { "id": 31, "name": "getExistContainerWithPipelinesInBatchRequest", - "type": "GetExistContainerWithPipelinesInBatchRequestProto" + "type": "GetExistContainerWithPipelinesInBatchRequestProto", + "optional": true }, { "id": 32, "name": "containerTokenRequest", - "type": "GetContainerTokenRequestProto" + "type": "GetContainerTokenRequestProto", + "optional": true }, { "id": 33, "name": "startContainerBalancerRequest", - "type": "StartContainerBalancerRequestProto" + "type": "StartContainerBalancerRequestProto", + "optional": true }, { "id": 34, "name": "stopContainerBalancerRequest", - "type": "StopContainerBalancerRequestProto" + "type": "StopContainerBalancerRequestProto", + "optional": true }, { "id": 35, "name": "containerBalancerStatusRequest", - "type": "ContainerBalancerStatusRequestProto" + "type": "ContainerBalancerStatusRequestProto", + "optional": true }, { "id": 36, "name": "finalizeScmUpgradeRequest", - "type": "FinalizeScmUpgradeRequestProto" + "type": "FinalizeScmUpgradeRequestProto", + "optional": true }, { "id": 37, "name": "queryUpgradeFinalizationProgressRequest", - "type": "QueryUpgradeFinalizationProgressRequestProto" + "type": "QueryUpgradeFinalizationProgressRequestProto", + "optional": true + }, + { + "id": 38, + "name": "getContainerCountRequest", + "type": "GetContainerCountRequestProto", + "optional": true + }, + { + "id": 39, + "name": "getContainerReplicasRequest", + "type": "GetContainerReplicasRequestProto", + "optional": true + }, + { + "id": 40, + "name": "replicationManagerReportRequest", + "type": "ReplicationManagerReportRequestProto", + "optional": true + }, + { + "id": 41, + "name": "resetDeletedBlockRetryCountRequest", + "type": "ResetDeletedBlockRetryCountRequestProto", + "optional": true } ] }, @@ -380,17 +455,20 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "success", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -401,172 +479,230 @@ { "id": 4, "name": "message", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "status", - "type": "Status" + "type": "Status", + "required": true }, { "id": 6, "name": "containerResponse", - "type": "ContainerResponseProto" + "type": "ContainerResponseProto", + "optional": true }, { "id": 7, "name": "getContainerResponse", - "type": "GetContainerResponseProto" + "type": "GetContainerResponseProto", + "optional": true }, { "id": 8, "name": "getContainerWithPipelineResponse", - "type": "GetContainerWithPipelineResponseProto" + "type": "GetContainerWithPipelineResponseProto", + "optional": true }, { "id": 9, "name": "scmListContainerResponse", - "type": "SCMListContainerResponseProto" + "type": "SCMListContainerResponseProto", + "optional": true }, { "id": 10, "name": "scmDeleteContainerResponse", - "type": "SCMDeleteContainerResponseProto" + "type": "SCMDeleteContainerResponseProto", + "optional": true }, { "id": 11, "name": "nodeQueryResponse", - "type": "NodeQueryResponseProto" + "type": "NodeQueryResponseProto", + "optional": true }, { "id": 12, "name": "scmCloseContainerResponse", - "type": "SCMCloseContainerResponseProto" + "type": "SCMCloseContainerResponseProto", + "optional": true }, { "id": 13, "name": "pipelineResponse", - "type": "PipelineResponseProto" + "type": "PipelineResponseProto", + "optional": true }, { "id": 14, "name": "listPipelineResponse", - "type": "ListPipelineResponseProto" + "type": "ListPipelineResponseProto", + "optional": true }, { "id": 15, "name": "activatePipelineResponse", - "type": "ActivatePipelineResponseProto" + "type": "ActivatePipelineResponseProto", + "optional": true }, { "id": 16, "name": "deactivatePipelineResponse", - "type": "DeactivatePipelineResponseProto" + "type": "DeactivatePipelineResponseProto", + "optional": true }, { "id": 17, "name": "closePipelineResponse", - "type": "ClosePipelineResponseProto" + "type": "ClosePipelineResponseProto", + "optional": true }, { "id": 18, "name": "getScmInfoResponse", - "type": "GetScmInfoResponseProto" + "type": "GetScmInfoResponseProto", + "optional": true }, { "id": 19, "name": "inSafeModeResponse", - "type": "InSafeModeResponseProto" + "type": "InSafeModeResponseProto", + "optional": true }, { "id": 20, "name": "forceExitSafeModeResponse", - "type": "ForceExitSafeModeResponseProto" + "type": "ForceExitSafeModeResponseProto", + "optional": true }, { "id": 21, "name": "startReplicationManagerResponse", - "type": "StartReplicationManagerResponseProto" + "type": "StartReplicationManagerResponseProto", + "optional": true }, { "id": 22, "name": "stopReplicationManagerResponse", - "type": "StopReplicationManagerResponseProto" + "type": "StopReplicationManagerResponseProto", + "optional": true }, { "id": 23, "name": "replicationManagerStatusResponse", - "type": "ReplicationManagerStatusResponseProto" + "type": "ReplicationManagerStatusResponseProto", + "optional": true }, { "id": 24, "name": "getPipelineResponse", - "type": "GetPipelineResponseProto" + "type": "GetPipelineResponseProto", + "optional": true }, { "id": 25, "name": "getContainerWithPipelineBatchResponse", - "type": "GetContainerWithPipelineBatchResponseProto" + "type": "GetContainerWithPipelineBatchResponseProto", + "optional": true }, { "id": 26, "name": "getSafeModeRuleStatusesResponse", - "type": "GetSafeModeRuleStatusesResponseProto" + "type": "GetSafeModeRuleStatusesResponseProto", + "optional": true }, { "id": 27, "name": "decommissionNodesResponse", - "type": "DecommissionNodesResponseProto" + "type": "DecommissionNodesResponseProto", + "optional": true }, { "id": 28, "name": "recommissionNodesResponse", - "type": "RecommissionNodesResponseProto" + "type": "RecommissionNodesResponseProto", + "optional": true }, { "id": 29, "name": "startMaintenanceNodesResponse", - "type": "StartMaintenanceNodesResponseProto" + "type": "StartMaintenanceNodesResponseProto", + "optional": true }, { "id": 30, "name": "DatanodeUsageInfoResponse", - "type": "DatanodeUsageInfoResponseProto" + "type": "DatanodeUsageInfoResponseProto", + "optional": true }, { "id": 31, "name": "getExistContainerWithPipelinesInBatchResponse", - "type": "GetExistContainerWithPipelinesInBatchResponseProto" + "type": "GetExistContainerWithPipelinesInBatchResponseProto", + "optional": true }, { "id": 32, "name": "containerTokenResponse", - "type": "GetContainerTokenResponseProto" + "type": "GetContainerTokenResponseProto", + "optional": true }, { "id": 33, "name": "startContainerBalancerResponse", - "type": "StartContainerBalancerResponseProto" + "type": "StartContainerBalancerResponseProto", + "optional": true }, { "id": 34, "name": "stopContainerBalancerResponse", - "type": "StopContainerBalancerResponseProto" + "type": "StopContainerBalancerResponseProto", + "optional": true }, { "id": 35, "name": "containerBalancerStatusResponse", - "type": "ContainerBalancerStatusResponseProto" + "type": "ContainerBalancerStatusResponseProto", + "optional": true }, { "id": 36, "name": "finalizeScmUpgradeResponse", - "type": "FinalizeScmUpgradeResponseProto" + "type": "FinalizeScmUpgradeResponseProto", + "optional": true }, { "id": 37, "name": "queryUpgradeFinalizationProgressResponse", - "type": "QueryUpgradeFinalizationProgressResponseProto" + "type": "QueryUpgradeFinalizationProgressResponseProto", + "optional": true + }, + { + "id": 38, + "name": "getContainerCountResponse", + "type": "GetContainerCountResponseProto", + "optional": true + }, + { + "id": 39, + "name": "getContainerReplicasResponse", + "type": "GetContainerReplicasResponseProto", + "optional": true + }, + { + "id": 40, + "name": "getReplicationManagerReportResponse", + "type": "ReplicationManagerReportResponseProto", + "optional": true + }, + { + "id": 41, + "name": "resetDeletedBlockRetryCountResponse", + "type": "ResetDeletedBlockRetryCountResponseProto", + "optional": true } ] }, @@ -576,22 +712,26 @@ { "id": 2, "name": "replicationFactor", - "type": "ReplicationFactor" + "type": "ReplicationFactor", + "required": true }, { "id": 3, "name": "replicationType", - "type": "ReplicationType" + "type": "ReplicationType", + "required": true }, { "id": 4, "name": "owner", - "type": "string" + "type": "string", + "required": true }, { "id": 5, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -601,17 +741,20 @@ { "id": 1, "name": "errorCode", - "type": "Error" + "type": "Error", + "required": true }, { "id": 2, "name": "containerWithPipeline", - "type": "ContainerWithPipeline" + "type": "ContainerWithPipeline", + "required": true }, { "id": 3, "name": "errorMessage", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -621,12 +764,14 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -636,7 +781,8 @@ { "id": 1, "name": "containerInfo", - "type": "ContainerInfoProto" + "type": "ContainerInfoProto", + "required": true } ] }, @@ -646,12 +792,14 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -661,7 +809,36 @@ { "id": 1, "name": "containerWithPipeline", - "type": "ContainerWithPipeline" + "type": "ContainerWithPipeline", + "required": true + } + ] + }, + { + "name": "GetContainerReplicasRequestProto", + "fields": [ + { + "id": 1, + "name": "containerID", + "type": "int64", + "required": true + }, + { + "id": 2, + "name": "traceID", + "type": "string", + "optional": true + } + ] + }, + { + "name": "GetContainerReplicasResponseProto", + "fields": [ + { + "id": 1, + "name": "containerReplica", + "type": "SCMContainerReplicaProto", + "is_repeated": true } ] }, @@ -677,7 +854,8 @@ { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -693,7 +871,8 @@ { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -706,17 +885,20 @@ { "id": 1, "name": "ruleName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "validate", - "type": "bool" + "type": "bool", + "required": true }, { "id": 3, "name": "statusText", - "type": "string" + "type": "string", + "required": true } ] }, @@ -759,22 +941,44 @@ { "id": 1, "name": "count", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 2, "name": "startContainerID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 3, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "state", - "type": "LifeCycleState" + "type": "LifeCycleState", + "optional": true + }, + { + "id": 5, + "name": "factor", + "type": "ReplicationFactor", + "optional": true + }, + { + "id": 6, + "name": "type", + "type": "ReplicationType", + "optional": true + }, + { + "id": 7, + "name": "ecReplicationConfig", + "type": "ECReplicationConfig", + "optional": true } ] }, @@ -795,12 +999,14 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -813,12 +1019,14 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -831,27 +1039,32 @@ { "id": 1, "name": "state", - "type": "NodeState" + "type": "NodeState", + "optional": true }, { "id": 2, "name": "scope", - "type": "QueryScope" + "type": "QueryScope", + "required": true }, { "id": 3, "name": "poolName", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "opState", - "type": "NodeOperationalState" + "type": "NodeOperationalState", + "optional": true } ] }, @@ -872,22 +1085,26 @@ { "id": 1, "name": "ipaddress", - "type": "string" + "type": "string", + "optional": true }, { "id": 2, "name": "uuid", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "mostUsed", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 4, "name": "count", - "type": "uint32" + "type": "uint32", + "optional": true } ] }, @@ -919,12 +1136,14 @@ { "id": 1, "name": "host", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "error", - "type": "string" + "type": "string", + "required": true } ] }, @@ -973,7 +1192,8 @@ { "id": 2, "name": "endInHours", - "type": "int64" + "type": "int64", + "optional": true } ] }, @@ -994,27 +1214,32 @@ { "id": 1, "name": "replicationType", - "type": "ReplicationType" + "type": "ReplicationType", + "required": true }, { "id": 2, "name": "replicationFactor", - "type": "ReplicationFactor" + "type": "ReplicationFactor", + "required": true }, { "id": 3, "name": "nodePool", - "type": "NodePool" + "type": "NodePool", + "optional": true }, { "id": 4, "name": "pipelineID", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1024,17 +1249,20 @@ { "id": 1, "name": "errorCode", - "type": "Error" + "type": "Error", + "required": true }, { "id": 2, "name": "pipeline", - "type": "Pipeline" + "type": "Pipeline", + "optional": true }, { "id": 3, "name": "errorMessage", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1044,7 +1272,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1065,12 +1294,14 @@ { "id": 1, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1080,7 +1311,22 @@ { "id": 1, "name": "pipeline", - "type": "Pipeline" + "type": "Pipeline", + "required": true + } + ] + }, + { + "name": "GetContainerCountRequestProto" + }, + { + "name": "GetContainerCountResponseProto", + "fields": [ + { + "id": 1, + "name": "containerCount", + "type": "int64", + "required": true } ] }, @@ -1090,12 +1336,14 @@ { "id": 1, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1108,12 +1356,14 @@ { "id": 1, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1126,12 +1376,14 @@ { "id": 1, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1144,7 +1396,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1154,7 +1407,8 @@ { "id": 1, "name": "inSafeMode", - "type": "bool" + "type": "bool", + "required": true } ] }, @@ -1164,7 +1418,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1174,7 +1429,8 @@ { "id": 1, "name": "exitedSafeMode", - "type": "bool" + "type": "bool", + "required": true } ] }, @@ -1184,7 +1440,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1197,7 +1454,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1210,7 +1468,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1220,7 +1479,58 @@ { "id": 1, "name": "isRunning", - "type": "bool" + "type": "bool", + "required": true + } + ] + }, + { + "name": "ReplicationManagerReportRequestProto", + "fields": [ + { + "id": 1, + "name": "traceID", + "type": "string", + "optional": true + } + ] + }, + { + "name": "ReplicationManagerReportResponseProto", + "fields": [ + { + "id": 1, + "name": "report", + "type": "ReplicationManagerReportProto", + "required": true + } + ] + }, + { + "name": "ResetDeletedBlockRetryCountRequestProto", + "fields": [ + { + "id": 1, + "name": "traceID", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "transactionId", + "type": "int64", + "is_repeated": true + } + ] + }, + { + "name": "ResetDeletedBlockRetryCountResponseProto", + "fields": [ + { + "id": 1, + "name": "resetCount", + "type": "int32", + "required": true } ] }, @@ -1230,7 +1540,8 @@ { "id": 1, "name": "upgradeClientId", - "type": "string" + "type": "string", + "required": true } ] }, @@ -1240,7 +1551,8 @@ { "id": 1, "name": "status", - "type": "hadoop.hdds.UpgradeFinalizationStatus" + "type": "hadoop.hdds.UpgradeFinalizationStatus", + "required": true } ] }, @@ -1250,17 +1562,20 @@ { "id": 1, "name": "upgradeClientId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "takeover", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 3, "name": "readonly", - "type": "bool" + "type": "bool", + "optional": true } ] }, @@ -1270,7 +1585,8 @@ { "id": 1, "name": "status", - "type": "hadoop.hdds.UpgradeFinalizationStatus" + "type": "hadoop.hdds.UpgradeFinalizationStatus", + "required": true } ] }, @@ -1280,22 +1596,26 @@ { "id": 1, "name": "ownerId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "containerId", - "type": "ContainerID" + "type": "ContainerID", + "required": true }, { "id": 3, "name": "expiryDate", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 4, "name": "certSerialId", - "type": "string" + "type": "string", + "required": true } ] }, @@ -1305,7 +1625,8 @@ { "id": 1, "name": "containerID", - "type": "ContainerID" + "type": "ContainerID", + "required": true } ] }, @@ -1315,7 +1636,8 @@ { "id": 1, "name": "token", - "type": "TokenProto" + "type": "TokenProto", + "required": true } ] }, @@ -1325,37 +1647,68 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 2, "name": "threshold", - "type": "double" + "type": "double", + "optional": true }, { "id": 3, "name": "idleiterations", - "type": "int32" + "type": "int32", + "optional": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 4, "name": "maxDatanodesRatioToInvolvePerIteration", - "type": "double" + "type": "double", + "optional": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 5, "name": "maxSizeToMovePerIterationInGB", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 6, "name": "maxSizeEnteringTargetInGB", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 7, "name": "maxSizeLeavingSourceInGB", - "type": "int64" + "type": "int64", + "optional": true + }, + { + "id": 8, + "name": "maxDatanodesPercentageToInvolvePerIteration", + "type": "int32", + "optional": true + }, + { + "id": 9, + "name": "iterations", + "type": "int32", + "optional": true } ] }, @@ -1365,7 +1718,14 @@ { "id": 1, "name": "start", - "type": "bool" + "type": "bool", + "required": true + }, + { + "id": 2, + "name": "message", + "type": "string", + "optional": true } ] }, @@ -1375,7 +1735,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1388,7 +1749,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1398,7 +1760,8 @@ { "id": 1, "name": "isRunning", - "type": "bool" + "type": "bool", + "required": true } ] } diff --git a/hadoop-hdds/interface-client/pom.xml b/hadoop-hdds/interface-client/pom.xml index 808664305949..eac95a00e1b7 100644 --- a/hadoop-hdds/interface-client/pom.xml +++ b/hadoop-hdds/interface-client/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-interface-client - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Client interface Apache Ozone HDDS Client Interface diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index b6c1c9c0a3a0..1b7fcad140aa 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -100,6 +100,9 @@ enum Type { GetSmallFile = 16; CloseContainer = 17; GetCommittedBlockLength = 18; + + StreamInit = 19; + StreamWrite = 20; } @@ -401,7 +404,7 @@ enum ChecksumType { message WriteChunkRequestProto { required DatanodeBlockID blockID = 1; - required ChunkInfo chunkData = 2; + optional ChunkInfo chunkData = 2; optional bytes data = 3; } @@ -474,11 +477,20 @@ message GetSmallFileResponseProto { required ReadChunkResponseProto data = 1; } +enum CopyContainerCompressProto { + NO_COMPRESSION = 1; + GZIP = 2; + LZ4 = 3; + SNAPPY = 4; + ZSTD = 5; +} + message CopyContainerRequestProto { required int64 containerID = 1; required uint64 readOffset = 2; optional uint64 len = 3; optional uint32 version = 4; + optional CopyContainerCompressProto compression = 5; } message CopyContainerResponseProto { diff --git a/hadoop-hdds/interface-client/src/main/resources/proto.lock b/hadoop-hdds/interface-client/src/main/resources/proto.lock index 84a4a442c8a3..ee6d2251d3e5 100644 --- a/hadoop-hdds/interface-client/src/main/resources/proto.lock +++ b/hadoop-hdds/interface-client/src/main/resources/proto.lock @@ -251,6 +251,18 @@ { "name": "CHUNK_FILE_INCONSISTENCY", "integer": 43 + }, + { + "name": "DELETE_ON_NON_EMPTY_CONTAINER", + "integer": 44 + }, + { + "name": "EXPORT_CONTAINER_METADATA_FAILED", + "integer": 45 + }, + { + "name": "IMPORT_CONTAINER_METADATA_FAILED", + "integer": 46 } ] }, @@ -284,6 +296,10 @@ { "name": "DELETED", "integer": 7 + }, + { + "name": "RECOVERING", + "integer": 8 } ] }, @@ -341,23 +357,32 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "localID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 3, "name": "blockCommitSequenceId", "type": "uint64", + "optional": true, "options": [ { "name": "default", "value": "0" } ] + }, + { + "id": 4, + "name": "replicaIndex", + "type": "int32", + "optional": true } ] }, @@ -367,12 +392,14 @@ { "id": 1, "name": "key", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "value", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -382,122 +409,158 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 4, "name": "datanodeUuid", - "type": "string" + "type": "string", + "required": true }, { "id": 5, "name": "pipelineID", - "type": "string" + "type": "string", + "optional": true }, { "id": 6, "name": "createContainer", - "type": "CreateContainerRequestProto" + "type": "CreateContainerRequestProto", + "optional": true }, { "id": 7, "name": "readContainer", - "type": "ReadContainerRequestProto" + "type": "ReadContainerRequestProto", + "optional": true }, { "id": 8, "name": "updateContainer", - "type": "UpdateContainerRequestProto" + "type": "UpdateContainerRequestProto", + "optional": true }, { "id": 9, "name": "deleteContainer", - "type": "DeleteContainerRequestProto" + "type": "DeleteContainerRequestProto", + "optional": true }, { "id": 10, "name": "listContainer", - "type": "ListContainerRequestProto" + "type": "ListContainerRequestProto", + "optional": true }, { "id": 11, "name": "closeContainer", - "type": "CloseContainerRequestProto" + "type": "CloseContainerRequestProto", + "optional": true }, { "id": 12, "name": "putBlock", - "type": "PutBlockRequestProto" + "type": "PutBlockRequestProto", + "optional": true }, { "id": 13, "name": "getBlock", - "type": "GetBlockRequestProto" + "type": "GetBlockRequestProto", + "optional": true }, { "id": 14, "name": "deleteBlock", - "type": "DeleteBlockRequestProto" + "type": "DeleteBlockRequestProto", + "optional": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 15, "name": "listBlock", - "type": "ListBlockRequestProto" + "type": "ListBlockRequestProto", + "optional": true }, { "id": 16, "name": "readChunk", - "type": "ReadChunkRequestProto" + "type": "ReadChunkRequestProto", + "optional": true }, { "id": 17, "name": "writeChunk", - "type": "WriteChunkRequestProto" + "type": "WriteChunkRequestProto", + "optional": true }, { "id": 18, "name": "deleteChunk", - "type": "DeleteChunkRequestProto" + "type": "DeleteChunkRequestProto", + "optional": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 19, "name": "listChunk", - "type": "ListChunkRequestProto" + "type": "ListChunkRequestProto", + "optional": true }, { "id": 20, "name": "putSmallFile", - "type": "PutSmallFileRequestProto" + "type": "PutSmallFileRequestProto", + "optional": true }, { "id": 21, "name": "getSmallFile", - "type": "GetSmallFileRequestProto" + "type": "GetSmallFileRequestProto", + "optional": true }, { "id": 22, "name": "getCommittedBlockLength", - "type": "GetCommittedBlockLengthRequestProto" + "type": "GetCommittedBlockLengthRequestProto", + "optional": true }, { "id": 23, "name": "encodedToken", - "type": "string" + "type": "string", + "optional": true }, { "id": 24, "name": "version", - "type": "uint32" + "type": "uint32", + "optional": true } ] }, @@ -507,107 +570,128 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "result", - "type": "Result" + "type": "Result", + "required": true }, { "id": 4, "name": "message", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "createContainer", - "type": "CreateContainerResponseProto" + "type": "CreateContainerResponseProto", + "optional": true }, { "id": 6, "name": "readContainer", - "type": "ReadContainerResponseProto" + "type": "ReadContainerResponseProto", + "optional": true }, { "id": 7, "name": "updateContainer", - "type": "UpdateContainerResponseProto" + "type": "UpdateContainerResponseProto", + "optional": true }, { "id": 8, "name": "deleteContainer", - "type": "DeleteContainerResponseProto" + "type": "DeleteContainerResponseProto", + "optional": true }, { "id": 9, "name": "listContainer", - "type": "ListContainerResponseProto" + "type": "ListContainerResponseProto", + "optional": true }, { "id": 10, "name": "closeContainer", - "type": "CloseContainerResponseProto" + "type": "CloseContainerResponseProto", + "optional": true }, { "id": 11, "name": "putBlock", - "type": "PutBlockResponseProto" + "type": "PutBlockResponseProto", + "optional": true }, { "id": 12, "name": "getBlock", - "type": "GetBlockResponseProto" + "type": "GetBlockResponseProto", + "optional": true }, { "id": 13, "name": "deleteBlock", - "type": "DeleteBlockResponseProto" + "type": "DeleteBlockResponseProto", + "optional": true }, { "id": 14, "name": "listBlock", - "type": "ListBlockResponseProto" + "type": "ListBlockResponseProto", + "optional": true }, { "id": 15, "name": "writeChunk", - "type": "WriteChunkResponseProto" + "type": "WriteChunkResponseProto", + "optional": true }, { "id": 16, "name": "readChunk", - "type": "ReadChunkResponseProto" + "type": "ReadChunkResponseProto", + "optional": true }, { "id": 17, "name": "deleteChunk", - "type": "DeleteChunkResponseProto" + "type": "DeleteChunkResponseProto", + "optional": true }, { "id": 18, "name": "listChunk", - "type": "ListChunkResponseProto" + "type": "ListChunkResponseProto", + "optional": true }, { "id": 19, "name": "putSmallFile", - "type": "PutSmallFileResponseProto" + "type": "PutSmallFileResponseProto", + "optional": true }, { "id": 20, "name": "getSmallFile", - "type": "GetSmallFileResponseProto" + "type": "GetSmallFileResponseProto", + "optional": true }, { "id": 21, "name": "getCommittedBlockLength", - "type": "GetCommittedBlockLengthResponseProto" + "type": "GetCommittedBlockLengthResponseProto", + "optional": true } ] }, @@ -617,7 +701,8 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, @@ -628,27 +713,32 @@ { "id": 4, "name": "containerPath", - "type": "string" + "type": "string", + "optional": true }, { "id": 6, "name": "bytesUsed", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 7, "name": "size", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 8, "name": "blockCount", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 9, "name": "state", "type": "State", + "optional": true, "options": [ { "name": "default", @@ -660,6 +750,7 @@ "id": 10, "name": "containerType", "type": "ContainerType", + "optional": true, "options": [ { "name": "default", @@ -695,12 +786,25 @@ "id": 3, "name": "containerType", "type": "ContainerType", + "optional": true, "options": [ { "name": "default", "value": "KeyValueContainer" } ] + }, + { + "id": 4, + "name": "replicaIndex", + "type": "int32", + "optional": true + }, + { + "id": 5, + "name": "state", + "type": "ContainerDataProto.State", + "optional": true } ] }, @@ -716,7 +820,8 @@ { "id": 1, "name": "containerData", - "type": "ContainerDataProto" + "type": "ContainerDataProto", + "optional": true } ] }, @@ -733,6 +838,7 @@ "id": 3, "name": "forceUpdate", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -752,6 +858,7 @@ "id": 2, "name": "forceDelete", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -770,7 +877,8 @@ { "id": 2, "name": "count", - "type": "uint32" + "type": "uint32", + "optional": true } ] }, @@ -794,12 +902,14 @@ { "id": 1, "name": "hash", - "type": "string" + "type": "string", + "optional": true }, { "id": 2, "name": "containerID", - "type": "int64" + "type": "int64", + "optional": true } ] }, @@ -809,12 +919,14 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true }, { "id": 2, "name": "flags", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 3, @@ -831,7 +943,8 @@ { "id": 5, "name": "size", - "type": "int64" + "type": "int64", + "optional": true } ] }, @@ -841,12 +954,14 @@ { "id": 1, "name": "blockData", - "type": "BlockData" + "type": "BlockData", + "required": true }, { "id": 2, "name": "eof", - "type": "bool" + "type": "bool", + "optional": true } ] }, @@ -856,7 +971,8 @@ { "id": 1, "name": "committedBlockLength", - "type": "GetCommittedBlockLengthResponseProto" + "type": "GetCommittedBlockLengthResponseProto", + "required": true } ] }, @@ -866,7 +982,8 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true } ] }, @@ -876,7 +993,8 @@ { "id": 1, "name": "blockData", - "type": "BlockData" + "type": "BlockData", + "required": true } ] }, @@ -886,7 +1004,8 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true } ] }, @@ -896,7 +1015,8 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true } ] }, @@ -906,12 +1026,14 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true }, { "id": 2, "name": "blockLength", - "type": "int64" + "type": "int64", + "required": true } ] }, @@ -924,12 +1046,14 @@ { "id": 2, "name": "startLocalID", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 3, "name": "count", - "type": "uint32" + "type": "uint32", + "required": true } ] }, @@ -950,17 +1074,20 @@ { "id": 1, "name": "chunkName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "offset", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "len", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 4, @@ -971,7 +1098,8 @@ { "id": 5, "name": "checksumData", - "type": "ChecksumData" + "type": "ChecksumData", + "required": true } ] }, @@ -992,12 +1120,14 @@ { "id": 1, "name": "type", - "type": "ChecksumType" + "type": "ChecksumType", + "required": true }, { "id": 2, "name": "bytesPerChecksum", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 3, @@ -1013,17 +1143,20 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true }, { "id": 2, "name": "chunkData", - "type": "ChunkInfo" + "type": "ChunkInfo", + "required": true }, { "id": 3, "name": "data", - "type": "bytes" + "type": "bytes", + "optional": true } ] }, @@ -1036,17 +1169,20 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true }, { "id": 2, "name": "chunkData", - "type": "ChunkInfo" + "type": "ChunkInfo", + "required": true }, { "id": 3, "name": "readChunkVersion", - "type": "ReadChunkVersion" + "type": "ReadChunkVersion", + "optional": true } ] }, @@ -1056,12 +1192,14 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true }, { "id": 2, "name": "chunkData", - "type": "ChunkInfo" + "type": "ChunkInfo", + "required": true }, { "id": 3, @@ -1092,12 +1230,14 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true }, { "id": 2, "name": "chunkData", - "type": "ChunkInfo" + "type": "ChunkInfo", + "required": true } ] }, @@ -1110,17 +1250,20 @@ { "id": 1, "name": "blockID", - "type": "DatanodeBlockID" + "type": "DatanodeBlockID", + "required": true }, { "id": 2, "name": "prevChunkName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "count", - "type": "uint32" + "type": "uint32", + "required": true } ] }, @@ -1141,17 +1284,20 @@ { "id": 1, "name": "block", - "type": "PutBlockRequestProto" + "type": "PutBlockRequestProto", + "required": true }, { "id": 2, "name": "chunkInfo", - "type": "ChunkInfo" + "type": "ChunkInfo", + "required": true }, { "id": 3, "name": "data", - "type": "bytes" + "type": "bytes", + "required": true } ] }, @@ -1161,7 +1307,8 @@ { "id": 1, "name": "committedBlockLength", - "type": "GetCommittedBlockLengthResponseProto" + "type": "GetCommittedBlockLengthResponseProto", + "required": true } ] }, @@ -1171,12 +1318,14 @@ { "id": 1, "name": "block", - "type": "GetBlockRequestProto" + "type": "GetBlockRequestProto", + "required": true }, { "id": 2, "name": "readChunkVersion", - "type": "ReadChunkVersion" + "type": "ReadChunkVersion", + "optional": true } ] }, @@ -1186,7 +1335,8 @@ { "id": 1, "name": "data", - "type": "ReadChunkResponseProto" + "type": "ReadChunkResponseProto", + "required": true } ] }, @@ -1196,22 +1346,26 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "readOffset", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "len", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 4, "name": "version", - "type": "uint32" + "type": "uint32", + "optional": true } ] }, @@ -1221,32 +1375,38 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "readOffset", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "len", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 4, "name": "eof", - "type": "bool" + "type": "bool", + "required": true }, { "id": 5, "name": "data", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 6, "name": "checksum", - "type": "int64" + "type": "int64", + "optional": true } ] } @@ -1426,6 +1586,10 @@ { "name": "DELETED", "integer": 6 + }, + { + "name": "RECOVERING", + "integer": 7 } ] }, @@ -1472,6 +1636,14 @@ { "name": "CHAINED", "integer": 3 + }, + { + "name": "EC", + "integer": 4 + }, + { + "name": "NONE", + "integer": -1 } ] }, @@ -1485,6 +1657,9 @@ { "name": "THREE", "integer": 3 + }, + { + "name": "ZERO" } ] }, @@ -1591,12 +1766,14 @@ { "id": 1, "name": "mostSigBits", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "leastSigBits", - "type": "int64" + "type": "int64", + "required": true } ] }, @@ -1606,17 +1783,20 @@ { "id": 1, "name": "uuid", - "type": "string" + "type": "string", + "optional": true }, { "id": 2, "name": "ipAddress", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "hostName", - "type": "string" + "type": "string", + "required": true }, { "id": 4, @@ -1627,32 +1807,38 @@ { "id": 5, "name": "certSerialId", - "type": "string" + "type": "string", + "optional": true }, { "id": 6, "name": "networkName", - "type": "string" + "type": "string", + "optional": true }, { "id": 7, "name": "networkLocation", - "type": "string" + "type": "string", + "optional": true }, { "id": 8, "name": "persistedOpState", - "type": "NodeOperationalState" + "type": "NodeOperationalState", + "optional": true }, { "id": 9, "name": "persistedOpStateExpiry", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 100, "name": "uuid128", - "type": "UUID" + "type": "UUID", + "optional": true } ] }, @@ -1662,27 +1848,32 @@ { "id": 1, "name": "datanodeDetails", - "type": "DatanodeDetailsProto" + "type": "DatanodeDetailsProto", + "required": true }, { "id": 2, "name": "version", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "setupTime", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 4, "name": "revision", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "buildDate", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1692,12 +1883,14 @@ { "id": 1, "name": "src", - "type": "DatanodeDetailsProto" + "type": "DatanodeDetailsProto", + "required": true }, { "id": 2, "name": "tgt", - "type": "DatanodeDetailsProto" + "type": "DatanodeDetailsProto", + "required": true } ] }, @@ -1707,17 +1900,20 @@ { "id": 1, "name": "uuid", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "ipAddress", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "hostName", - "type": "string" + "type": "string", + "required": true }, { "id": 4, @@ -1733,17 +1929,49 @@ { "id": 1, "name": "scmNodeId", - "type": "string" + "type": "string", + "required": true + }, + { + "id": 2, + "name": "clusterId", + "type": "string", + "required": true + }, + { + "id": 3, + "name": "hostName", + "type": "string", + "required": true + } + ] + }, + { + "name": "NodeDetailsProto", + "fields": [ + { + "id": 1, + "name": "uuid", + "type": "string", + "required": true }, { "id": 2, "name": "clusterId", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "hostName", - "type": "string" + "type": "string", + "required": true + }, + { + "id": 4, + "name": "nodeType", + "type": "NodeType", + "required": true } ] }, @@ -1753,12 +1981,14 @@ { "id": 1, "name": "name", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "value", - "type": "uint32" + "type": "uint32", + "required": true } ] }, @@ -1768,12 +1998,14 @@ { "id": 1, "name": "id", - "type": "string" + "type": "string", + "optional": true }, { "id": 100, "name": "uuid128", - "type": "UUID" + "type": "UUID", + "optional": true } ] }, @@ -1783,7 +2015,8 @@ { "id": 1, "name": "id", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -1800,6 +2033,7 @@ "id": 2, "name": "state", "type": "PipelineState", + "optional": true, "options": [ { "name": "default", @@ -1811,6 +2045,7 @@ "id": 3, "name": "type", "type": "ReplicationType", + "optional": true, "options": [ { "name": "default", @@ -1822,6 +2057,7 @@ "id": 4, "name": "factor", "type": "ReplicationFactor", + "optional": true, "options": [ { "name": "default", @@ -1832,12 +2068,14 @@ { "id": 5, "name": "id", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 6, "name": "leaderID", - "type": "string" + "type": "string", + "optional": true }, { "id": 7, @@ -1848,17 +2086,32 @@ { "id": 8, "name": "creationTimeStamp", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 9, "name": "suggestedLeaderID", - "type": "UUID" + "type": "UUID", + "optional": true + }, + { + "id": 10, + "name": "memberReplicaIndexes", + "type": "uint32", + "is_repeated": true + }, + { + "id": 11, + "name": "ecReplicationConfig", + "type": "ECReplicationConfig", + "optional": true }, { "id": 100, "name": "leaderID128", - "type": "UUID" + "type": "UUID", + "optional": true } ] }, @@ -1868,12 +2121,14 @@ { "id": 1, "name": "key", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "value", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1883,7 +2138,8 @@ { "id": 1, "name": "nodeID", - "type": "DatanodeDetailsProto" + "type": "DatanodeDetailsProto", + "required": true }, { "id": 2, @@ -1916,22 +2172,26 @@ { "id": 1, "name": "capacity", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 2, "name": "used", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 3, "name": "remaining", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 4, "name": "node", - "type": "DatanodeDetailsProto" + "type": "DatanodeDetailsProto", + "optional": true } ] }, @@ -1941,57 +2201,74 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "state", - "type": "LifeCycleState" + "type": "LifeCycleState", + "required": true }, { "id": 3, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "optional": true }, { "id": 4, "name": "usedBytes", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 5, "name": "numberOfKeys", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 6, "name": "stateEnterTime", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 7, "name": "owner", - "type": "string" + "type": "string", + "required": true }, { "id": 8, "name": "deleteTransactionId", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 9, "name": "sequenceId", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 10, "name": "replicationFactor", - "type": "ReplicationFactor" + "type": "ReplicationFactor", + "optional": true }, { "id": 11, "name": "replicationType", - "type": "ReplicationType" + "type": "ReplicationType", + "required": true + }, + { + "id": 12, + "name": "ecReplicationConfig", + "type": "ECReplicationConfig", + "optional": true } ] }, @@ -2001,12 +2278,14 @@ { "id": 1, "name": "containerInfo", - "type": "ContainerInfoProto" + "type": "ContainerInfoProto", + "required": true }, { "id": 2, "name": "pipeline", - "type": "Pipeline" + "type": "Pipeline", + "required": true } ] }, @@ -2016,7 +2295,8 @@ { "id": 1, "name": "traceID", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -2026,12 +2306,14 @@ { "id": 1, "name": "clusterId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "scmId", - "type": "string" + "type": "string", + "required": true }, { "id": 3, @@ -2047,17 +2329,20 @@ { "id": 1, "name": "clusterId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "scmId", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "ratisAddr", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2067,12 +2352,66 @@ { "id": 1, "name": "success", - "type": "bool" + "type": "bool", + "required": true }, { "id": 2, "name": "scmId", - "type": "string" + "type": "string", + "optional": true + } + ] + }, + { + "name": "ECReplicationConfig", + "fields": [ + { + "id": 1, + "name": "data", + "type": "int32", + "required": true + }, + { + "id": 2, + "name": "parity", + "type": "int32", + "required": true + }, + { + "id": 3, + "name": "codec", + "type": "string", + "required": true + }, + { + "id": 4, + "name": "ecChunkSize", + "type": "int32", + "required": true + } + ] + }, + { + "name": "DefaultReplicationConfig", + "fields": [ + { + "id": 1, + "name": "type", + "type": "ReplicationType", + "required": true + }, + { + "id": 2, + "name": "factor", + "type": "ReplicationFactor", + "optional": true + }, + { + "id": 3, + "name": "ecReplicationConfig", + "type": "ECReplicationConfig", + "optional": true } ] }, @@ -2105,12 +2444,14 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "localID", - "type": "int64" + "type": "int64", + "required": true } ] }, @@ -2120,22 +2461,26 @@ { "id": 1, "name": "keyId", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 2, "name": "expiryDate", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "privateKeyBytes", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 4, "name": "publicKeyBytes", - "type": "bytes" + "type": "bytes", + "required": true } ] }, @@ -2145,22 +2490,26 @@ { "id": 1, "name": "identifier", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 2, "name": "password", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 3, "name": "kind", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "service", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2170,22 +2519,26 @@ { "id": 1, "name": "ownerId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "blockId", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "expiryDate", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 4, "name": "omCertSerialId", - "type": "string" + "type": "string", + "required": true }, { "id": 5, @@ -2196,7 +2549,8 @@ { "id": 6, "name": "maxLength", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -2206,12 +2560,14 @@ { "id": 1, "name": "containerBlockID", - "type": "ContainerBlockID" + "type": "ContainerBlockID", + "required": true }, { "id": 2, "name": "blockCommitSequenceId", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -2227,7 +2583,8 @@ { "id": 1, "name": "status", - "type": "Status" + "type": "Status", + "required": true }, { "id": 2, @@ -2243,17 +2600,20 @@ { "id": 1, "name": "x509CRL", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "creationTimestamp", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "crlSequenceID", - "type": "int64" + "type": "int64", + "required": true } ] }, @@ -2263,12 +2623,259 @@ { "id": 1, "name": "x509Certificate", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "timestamp", - "type": "uint64" + "type": "uint64", + "required": true + } + ] + }, + { + "name": "ContainerReplicaHistoryListProto", + "fields": [ + { + "id": 1, + "name": "replicaHistory", + "type": "ContainerReplicaHistoryProto", + "is_repeated": true + } + ] + }, + { + "name": "ContainerReplicaHistoryProto", + "fields": [ + { + "id": 1, + "name": "uuid", + "type": "string", + "required": true + }, + { + "id": 2, + "name": "firstSeenTime", + "type": "int64", + "required": true + }, + { + "id": 3, + "name": "lastSeenTime", + "type": "int64", + "required": true + }, + { + "id": 4, + "name": "bcsId", + "type": "int64", + "required": true + } + ] + }, + { + "name": "SCMContainerReplicaProto", + "fields": [ + { + "id": 1, + "name": "containerID", + "type": "int64", + "required": true + }, + { + "id": 2, + "name": "state", + "type": "string", + "required": true + }, + { + "id": 3, + "name": "datanodeDetails", + "type": "DatanodeDetailsProto", + "required": true + }, + { + "id": 4, + "name": "placeOfBirth", + "type": "string", + "required": true + }, + { + "id": 5, + "name": "sequenceID", + "type": "int64", + "required": true + }, + { + "id": 6, + "name": "keyCount", + "type": "int64", + "required": true + }, + { + "id": 7, + "name": "bytesUsed", + "type": "int64", + "required": true + }, + { + "id": 8, + "name": "replicaIndex", + "type": "int64", + "optional": true + } + ] + }, + { + "name": "KeyContainerIDList", + "fields": [ + { + "id": 1, + "name": "key", + "type": "string", + "required": true + }, + { + "id": 2, + "name": "container", + "type": "ContainerID", + "is_repeated": true + } + ] + }, + { + "name": "KeyIntValue", + "fields": [ + { + "id": 1, + "name": "key", + "type": "string", + "required": true + }, + { + "id": 2, + "name": "value", + "type": "int64", + "optional": true + } + ] + }, + { + "name": "ReplicationManagerReportProto", + "fields": [ + { + "id": 1, + "name": "timestamp", + "type": "int64", + "required": true + }, + { + "id": 2, + "name": "stat", + "type": "KeyIntValue", + "is_repeated": true + }, + { + "id": 3, + "name": "statSample", + "type": "KeyContainerIDList", + "is_repeated": true + } + ] + }, + { + "name": "ContainerBalancerConfigurationProto", + "fields": [ + { + "id": 5, + "name": "utilizationThreshold", + "type": "string", + "optional": true + }, + { + "id": 6, + "name": "datanodesInvolvedMaxPercentagePerIteration", + "type": "int32", + "optional": true + }, + { + "id": 7, + "name": "sizeMovedMaxPerIteration", + "type": "int64", + "optional": true + }, + { + "id": 8, + "name": "sizeEnteringTargetMax", + "type": "int64", + "optional": true + }, + { + "id": 9, + "name": "sizeLeavingSourceMax", + "type": "int64", + "optional": true + }, + { + "id": 10, + "name": "iterations", + "type": "int32", + "optional": true + }, + { + "id": 11, + "name": "excludeContainers", + "type": "string", + "optional": true + }, + { + "id": 12, + "name": "moveTimeout", + "type": "int64", + "optional": true + }, + { + "id": 13, + "name": "balancingIterationInterval", + "type": "int64", + "optional": true + }, + { + "id": 14, + "name": "includeDatanodes", + "type": "string", + "optional": true + }, + { + "id": 15, + "name": "excludeDatanodes", + "type": "string", + "optional": true + }, + { + "id": 16, + "name": "moveNetworkTopologyEnable", + "type": "bool", + "optional": true + }, + { + "id": 17, + "name": "triggerDuBeforeMoveEnable", + "type": "bool", + "optional": true + }, + { + "id": 18, + "name": "shouldRun", + "type": "bool", + "required": true + }, + { + "id": 19, + "name": "nextIterationIndex", + "type": "int32", + "optional": true } ] } diff --git a/hadoop-hdds/interface-server/pom.xml b/hadoop-hdds/interface-server/pom.xml index e79e8c744458..61a67c78d076 100644 --- a/hadoop-hdds/interface-server/pom.xml +++ b/hadoop-hdds/interface-server/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-interface-server - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Server interface Apache Ozone HDDS Server Interface diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto index 6465eeb40b47..39d6a2931e93 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto @@ -152,6 +152,9 @@ message CommandQueueReportProto { message SCMHeartbeatResponseProto { required string datanodeUUID = 1; repeated SCMCommandProto commands = 2; + + // Same as term in SCMCommandProto + optional int64 term = 3; } message SCMNodeAddressList { @@ -343,6 +346,7 @@ message SCMCommandProto { // SCM is a leader. If running without Ratis, holds SCMContext.INVALID_TERM. optional int64 term = 15; optional string encodedToken = 16; + optional int64 deadlineMsSinceEpoch = 17; } /** diff --git a/hadoop-hdds/interface-server/src/main/resources/proto.lock b/hadoop-hdds/interface-server/src/main/resources/proto.lock index 8c95857a1683..31659ed1094b 100644 --- a/hadoop-hdds/interface-server/src/main/resources/proto.lock +++ b/hadoop-hdds/interface-server/src/main/resources/proto.lock @@ -10,7 +10,8 @@ { "id": 1, "name": "flush", - "type": "bool" + "type": "bool", + "required": true } ] }, @@ -20,32 +21,38 @@ { "id": 1, "name": "clusterId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "len", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "eof", - "type": "bool" + "type": "bool", + "required": true }, { "id": 4, "name": "data", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 6, "name": "readOffset", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 7, "name": "checksum", - "type": "int64" + "type": "int64", + "optional": true } ] } @@ -112,6 +119,14 @@ { "name": "MOVE", "integer": 6 + }, + { + "name": "STATEFUL_SERVICE_CONFIG", + "integer": 7 + }, + { + "name": "FINALIZE", + "integer": 8 } ] } @@ -123,7 +138,8 @@ { "id": 1, "name": "name", - "type": "string" + "type": "string", + "required": true }, { "id": 2, @@ -139,12 +155,14 @@ { "id": 1, "name": "type", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "value", - "type": "bytes" + "type": "bytes", + "required": true } ] }, @@ -154,7 +172,8 @@ { "id": 1, "name": "type", - "type": "string" + "type": "string", + "required": true }, { "id": 2, @@ -170,12 +189,14 @@ { "id": 1, "name": "type", - "type": "RequestType" + "type": "RequestType", + "required": true }, { "id": 2, "name": "method", - "type": "Method" + "type": "Method", + "required": true } ] }, @@ -185,12 +206,14 @@ { "id": 2, "name": "type", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "value", - "type": "bytes" + "type": "bytes", + "required": true } ] } @@ -239,17 +262,20 @@ { "id": 1, "name": "x509CRL", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "creationTimestamp", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "crlSequenceID", - "type": "int64" + "type": "int64", + "required": true } ] }, @@ -259,12 +285,14 @@ { "id": 1, "name": "msb", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "lsb", - "type": "int64" + "type": "int64", + "required": true } ] }, @@ -274,7 +302,8 @@ { "id": 1, "name": "hostname", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -284,7 +313,8 @@ { "id": 1, "name": "clientId", - "type": "ClientId" + "type": "ClientId", + "required": true } ] }, @@ -294,22 +324,26 @@ { "id": 1, "name": "updateType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "clientId", - "type": "ClientId" + "type": "ClientId", + "required": true }, { "id": 4, "name": "crlUpdateRequest", - "type": "CRLUpdateRequest" + "type": "CRLUpdateRequest", + "optional": true } ] }, @@ -319,17 +353,20 @@ { "id": 1, "name": "updateType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "crlUpdateResponse", - "type": "CRLUpdateResponse" + "type": "CRLUpdateResponse", + "optional": true } ] }, @@ -339,7 +376,8 @@ { "id": 1, "name": "receivedCrlId", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, @@ -355,7 +393,8 @@ { "id": 1, "name": "crlInfo", - "type": "CRLInfoProto" + "type": "CRLInfoProto", + "optional": true } ] }, @@ -365,7 +404,8 @@ { "id": 1, "name": "clientId", - "type": "ClientId" + "type": "ClientId", + "required": true } ] }, @@ -593,6 +633,9 @@ { "name": "SCMCommandProto.Type", "enum_fields": [ + { + "name": "unknownScmCommand" + }, { "name": "reregisterCommand", "integer": 1 @@ -628,6 +671,14 @@ { "name": "finalizeNewLayoutVersionCommand", "integer": 9 + }, + { + "name": "refreshVolumeUsageInfo", + "integer": 10 + }, + { + "name": "reconstructECContainersCommand", + "integer": 11 } ] } @@ -639,27 +690,32 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "getVersionRequest", - "type": "SCMVersionRequestProto" + "type": "SCMVersionRequestProto", + "optional": true }, { "id": 4, "name": "registerRequest", - "type": "SCMRegisterRequestProto" + "type": "SCMRegisterRequestProto", + "optional": true }, { "id": 5, "name": "sendHeartbeatRequest", - "type": "SCMHeartbeatRequestProto" + "type": "SCMHeartbeatRequestProto", + "optional": true } ] }, @@ -669,17 +725,20 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "success", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -690,27 +749,32 @@ { "id": 4, "name": "message", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "status", - "type": "Status" + "type": "Status", + "required": true }, { "id": 6, "name": "getVersionResponse", - "type": "SCMVersionResponseProto" + "type": "SCMVersionResponseProto", + "optional": true }, { "id": 7, "name": "registerResponse", - "type": "SCMRegisteredResponseProto" + "type": "SCMRegisteredResponseProto", + "optional": true }, { "id": 8, "name": "sendHeartbeatResponse", - "type": "SCMHeartbeatResponseProto" + "type": "SCMHeartbeatResponseProto", + "optional": true } ] }, @@ -720,12 +784,14 @@ { "id": 1, "name": "metadataLayoutVersion", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 2, "name": "softwareLayoutVersion", - "type": "uint32" + "type": "uint32", + "required": true } ] }, @@ -738,7 +804,8 @@ { "id": 1, "name": "softwareVersion", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 2, @@ -754,27 +821,32 @@ { "id": 1, "name": "extendedDatanodeDetails", - "type": "ExtendedDatanodeDetailsProto" + "type": "ExtendedDatanodeDetailsProto", + "required": true }, { "id": 2, "name": "nodeReport", - "type": "NodeReportProto" + "type": "NodeReportProto", + "required": true }, { "id": 3, "name": "containerReport", - "type": "ContainerReportsProto" + "type": "ContainerReportsProto", + "required": true }, { "id": 4, "name": "pipelineReports", - "type": "PipelineReportsProto" + "type": "PipelineReportsProto", + "required": true }, { "id": 5, "name": "dataNodeLayoutVersion", - "type": "LayoutVersionProto" + "type": "LayoutVersionProto", + "optional": true } ] }, @@ -784,42 +856,50 @@ { "id": 1, "name": "errorCode", - "type": "ErrorCode" + "type": "ErrorCode", + "required": true }, { "id": 2, "name": "datanodeUUID", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "clusterID", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "addressList", - "type": "SCMNodeAddressList" + "type": "SCMNodeAddressList", + "optional": true }, { "id": 5, "name": "hostname", - "type": "string" + "type": "string", + "optional": true }, { "id": 6, "name": "ipAddress", - "type": "string" + "type": "string", + "optional": true }, { "id": 7, "name": "networkName", - "type": "string" + "type": "string", + "optional": true }, { "id": 8, "name": "networkLocation", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -829,17 +909,20 @@ { "id": 1, "name": "datanodeDetails", - "type": "DatanodeDetailsProto" + "type": "DatanodeDetailsProto", + "required": true }, { "id": 2, "name": "nodeReport", - "type": "NodeReportProto" + "type": "NodeReportProto", + "optional": true }, { "id": 3, "name": "containerReport", - "type": "ContainerReportsProto" + "type": "ContainerReportsProto", + "optional": true }, { "id": 4, @@ -856,22 +939,49 @@ { "id": 6, "name": "containerActions", - "type": "ContainerActionsProto" + "type": "ContainerActionsProto", + "optional": true }, { "id": 7, "name": "pipelineActions", - "type": "PipelineActionsProto" + "type": "PipelineActionsProto", + "optional": true }, { "id": 8, "name": "pipelineReports", - "type": "PipelineReportsProto" + "type": "PipelineReportsProto", + "optional": true }, { "id": 9, "name": "dataNodeLayoutVersion", - "type": "LayoutVersionProto" + "type": "LayoutVersionProto", + "optional": true + }, + { + "id": 10, + "name": "commandQueueReport", + "type": "CommandQueueReportProto", + "optional": true + } + ] + }, + { + "name": "CommandQueueReportProto", + "fields": [ + { + "id": 1, + "name": "command", + "type": "SCMCommandProto.Type", + "is_repeated": true + }, + { + "id": 2, + "name": "count", + "type": "uint32", + "is_repeated": true } ] }, @@ -881,7 +991,8 @@ { "id": 1, "name": "datanodeUUID", - "type": "string" + "type": "string", + "required": true }, { "id": 2, @@ -916,6 +1027,12 @@ "name": "metadataStorageReport", "type": "MetadataStorageReportProto", "is_repeated": true + }, + { + "id": 3, + "name": "dbStorageReport", + "type": "StorageReportProto", + "is_repeated": true } ] }, @@ -925,17 +1042,20 @@ { "id": 1, "name": "storageUuid", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "storageLocation", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "capacity", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -947,6 +1067,7 @@ "id": 4, "name": "scmUsed", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -958,6 +1079,7 @@ "id": 5, "name": "remaining", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -969,6 +1091,7 @@ "id": 6, "name": "storageType", "type": "StorageTypeProto", + "optional": true, "options": [ { "name": "default", @@ -980,6 +1103,7 @@ "id": 7, "name": "failed", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -995,12 +1119,14 @@ { "id": 1, "name": "storageLocation", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "storageType", "type": "StorageTypeProto", + "optional": true, "options": [ { "name": "default", @@ -1012,6 +1138,7 @@ "id": 3, "name": "capacity", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -1023,6 +1150,7 @@ "id": 4, "name": "scmUsed", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -1034,6 +1162,7 @@ "id": 5, "name": "remaining", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -1045,6 +1174,7 @@ "id": 6, "name": "failed", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -1082,67 +1212,86 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "state", - "type": "State" + "type": "State", + "required": true }, { "id": 3, "name": "size", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 4, "name": "used", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 5, "name": "keyCount", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 6, "name": "readCount", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 7, "name": "writeCount", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 8, "name": "readBytes", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 9, "name": "writeBytes", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 10, "name": "finalhash", - "type": "string" + "type": "string", + "optional": true }, { "id": 11, "name": "deleteTransactionId", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 12, "name": "blockCommitSequenceId", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 13, "name": "originNodeId", - "type": "string" + "type": "string", + "optional": true + }, + { + "id": 14, + "name": "replicaIndex", + "type": "int32", + "optional": true } ] }, @@ -1163,12 +1312,14 @@ { "id": 1, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "status", "type": "Status", + "required": true, "options": [ { "name": "default", @@ -1179,17 +1330,20 @@ { "id": 3, "name": "type", - "type": "SCMCommandProto.Type" + "type": "SCMCommandProto.Type", + "required": true }, { "id": 4, "name": "msg", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "blockDeletionAck", - "type": "ContainerBlocksDeletionACKProto" + "type": "ContainerBlocksDeletionACKProto", + "optional": true } ] }, @@ -1210,17 +1364,20 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "action", - "type": "Action" + "type": "Action", + "required": true }, { "id": 3, "name": "reason", - "type": "Reason" + "type": "Reason", + "optional": true } ] }, @@ -1230,17 +1387,20 @@ { "id": 1, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 2, "name": "isLeader", - "type": "bool" + "type": "bool", + "required": true }, { "id": 3, "name": "bytesWritten", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -1272,17 +1432,20 @@ { "id": 1, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 3, "name": "reason", - "type": "Reason" + "type": "Reason", + "optional": true }, { "id": 4, "name": "detailedReason", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1292,12 +1455,14 @@ { "id": 1, "name": "action", - "type": "Action" + "type": "Action", + "required": true }, { "id": 2, "name": "closePipeline", - "type": "ClosePipelineInfo" + "type": "ClosePipelineInfo", + "optional": true } ] }, @@ -1307,62 +1472,86 @@ { "id": 1, "name": "commandType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "reregisterCommandProto", - "type": "ReregisterCommandProto" + "type": "ReregisterCommandProto", + "optional": true }, { "id": 3, "name": "deleteBlocksCommandProto", - "type": "DeleteBlocksCommandProto" + "type": "DeleteBlocksCommandProto", + "optional": true }, { "id": 4, "name": "closeContainerCommandProto", - "type": "CloseContainerCommandProto" + "type": "CloseContainerCommandProto", + "optional": true }, { "id": 5, "name": "deleteContainerCommandProto", - "type": "DeleteContainerCommandProto" + "type": "DeleteContainerCommandProto", + "optional": true }, { "id": 6, "name": "replicateContainerCommandProto", - "type": "ReplicateContainerCommandProto" + "type": "ReplicateContainerCommandProto", + "optional": true }, { "id": 7, "name": "createPipelineCommandProto", - "type": "CreatePipelineCommandProto" + "type": "CreatePipelineCommandProto", + "optional": true }, { "id": 8, "name": "closePipelineCommandProto", - "type": "ClosePipelineCommandProto" + "type": "ClosePipelineCommandProto", + "optional": true }, { "id": 9, "name": "setNodeOperationalStateCommandProto", - "type": "SetNodeOperationalStateCommandProto" + "type": "SetNodeOperationalStateCommandProto", + "optional": true }, { "id": 10, "name": "finalizeNewLayoutVersionCommandProto", - "type": "FinalizeNewLayoutVersionCommandProto" + "type": "FinalizeNewLayoutVersionCommandProto", + "optional": true + }, + { + "id": 11, + "name": "refreshVolumeUsageCommandProto", + "type": "RefreshVolumeUsageCommandProto", + "optional": true + }, + { + "id": 12, + "name": "reconstructECContainersCommandProto", + "type": "ReconstructECContainersCommandProto", + "optional": true }, { "id": 15, "name": "term", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 16, "name": "encodedToken", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1381,7 +1570,8 @@ { "id": 3, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true } ] }, @@ -1391,12 +1581,14 @@ { "id": 1, "name": "txID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 3, @@ -1407,7 +1599,8 @@ { "id": 4, "name": "count", - "type": "int32" + "type": "int32", + "required": true } ] }, @@ -1423,7 +1616,8 @@ { "id": 2, "name": "dnId", - "type": "string" + "type": "string", + "required": true } ], "messages": [ @@ -1433,17 +1627,20 @@ { "id": 1, "name": "txID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 3, "name": "success", - "type": "bool" + "type": "bool", + "required": true } ] } @@ -1455,22 +1652,26 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 3, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true }, { "id": 4, "name": "force", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -1486,23 +1687,32 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true }, { "id": 3, "name": "force", "type": "bool", + "required": true, "options": [ { "name": "default", "value": "false" } ] + }, + { + "id": 4, + "name": "replicaIndex", + "type": "int32", + "optional": true } ] }, @@ -1512,7 +1722,8 @@ { "id": 1, "name": "containerID", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, @@ -1523,7 +1734,72 @@ { "id": 3, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true + }, + { + "id": 4, + "name": "replicaIndex", + "type": "int32", + "optional": true + } + ] + }, + { + "name": "ReconstructECContainersCommandProto", + "fields": [ + { + "id": 1, + "name": "containerID", + "type": "int64", + "required": true + }, + { + "id": 2, + "name": "sources", + "type": "DatanodeDetailsAndReplicaIndexProto", + "is_repeated": true + }, + { + "id": 3, + "name": "targets", + "type": "DatanodeDetailsProto", + "is_repeated": true + }, + { + "id": 4, + "name": "missingContainerIndexes", + "type": "bytes", + "required": true + }, + { + "id": 5, + "name": "ecReplicationConfig", + "type": "ECReplicationConfig", + "required": true + }, + { + "id": 6, + "name": "cmdId", + "type": "int64", + "required": true + } + ] + }, + { + "name": "DatanodeDetailsAndReplicaIndexProto", + "fields": [ + { + "id": 1, + "name": "datanodeDetails", + "type": "DatanodeDetailsProto", + "required": true + }, + { + "id": 2, + "name": "replicaIndex", + "type": "int32", + "required": true } ] }, @@ -1533,17 +1809,20 @@ { "id": 1, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 2, "name": "type", - "type": "ReplicationType" + "type": "ReplicationType", + "required": true }, { "id": 3, "name": "factor", - "type": "ReplicationFactor" + "type": "ReplicationFactor", + "required": true }, { "id": 4, @@ -1554,7 +1833,8 @@ { "id": 5, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true }, { "id": 6, @@ -1570,12 +1850,25 @@ { "id": 1, "name": "pipelineID", - "type": "PipelineID" + "type": "PipelineID", + "required": true }, { "id": 2, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true + } + ] + }, + { + "name": "RefreshVolumeUsageCommandProto", + "fields": [ + { + "id": 1, + "name": "cmdId", + "type": "int64", + "required": true } ] }, @@ -1585,17 +1878,20 @@ { "id": 1, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, "name": "nodeOperationalState", - "type": "NodeOperationalState" + "type": "NodeOperationalState", + "required": true }, { "id": 3, "name": "stateExpiryEpochSeconds", - "type": "int64" + "type": "int64", + "required": true } ] }, @@ -1605,7 +1901,8 @@ { "id": 1, "name": "receivedCrlId", - "type": "int64" + "type": "int64", + "required": true }, { "id": 2, @@ -1621,7 +1918,8 @@ { "id": 1, "name": "crlInfo", - "type": "CRLInfoProto" + "type": "CRLInfoProto", + "required": true } ] }, @@ -1632,6 +1930,7 @@ "id": 1, "name": "finalizeNewLayoutVersion", "type": "bool", + "required": true, "options": [ { "name": "default", @@ -1642,12 +1941,14 @@ { "id": 2, "name": "dataNodeLayoutVersion", - "type": "LayoutVersionProto" + "type": "LayoutVersionProto", + "required": true }, { "id": 3, "name": "cmdId", - "type": "int64" + "type": "int64", + "required": true } ] } @@ -1907,47 +2208,56 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "userInfo", - "type": "UserInfo" + "type": "UserInfo", + "optional": true }, { "id": 4, "name": "version", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 11, "name": "allocateScmBlockRequest", - "type": "AllocateScmBlockRequestProto" + "type": "AllocateScmBlockRequestProto", + "optional": true }, { "id": 12, "name": "deleteScmKeyBlocksRequest", - "type": "DeleteScmKeyBlocksRequestProto" + "type": "DeleteScmKeyBlocksRequestProto", + "optional": true }, { "id": 13, "name": "getScmInfoRequest", - "type": "hadoop.hdds.GetScmInfoRequestProto" + "type": "hadoop.hdds.GetScmInfoRequestProto", + "optional": true }, { "id": 14, "name": "sortDatanodesRequest", - "type": "SortDatanodesRequestProto" + "type": "SortDatanodesRequestProto", + "optional": true }, { "id": 15, "name": "addScmRequestProto", - "type": "hadoop.hdds.AddScmRequestProto" + "type": "hadoop.hdds.AddScmRequestProto", + "optional": true } ] }, @@ -1957,17 +2267,20 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "success", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -1978,47 +2291,56 @@ { "id": 4, "name": "message", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "status", - "type": "Status" + "type": "Status", + "required": true }, { "id": 6, "name": "leaderOMNodeId", - "type": "string" + "type": "string", + "optional": true }, { "id": 7, "name": "leaderSCMNodeId", - "type": "string" + "type": "string", + "optional": true }, { "id": 11, "name": "allocateScmBlockResponse", - "type": "AllocateScmBlockResponseProto" + "type": "AllocateScmBlockResponseProto", + "optional": true }, { "id": 12, "name": "deleteScmKeyBlocksResponse", - "type": "DeleteScmKeyBlocksResponseProto" + "type": "DeleteScmKeyBlocksResponseProto", + "optional": true }, { "id": 13, "name": "getScmInfoResponse", - "type": "hadoop.hdds.GetScmInfoResponseProto" + "type": "hadoop.hdds.GetScmInfoResponseProto", + "optional": true }, { "id": 14, "name": "sortDatanodesResponse", - "type": "SortDatanodesResponseProto" + "type": "SortDatanodesResponseProto", + "optional": true }, { "id": 15, "name": "addScmResponse", - "type": "hadoop.hdds.AddScmResponseProto" + "type": "hadoop.hdds.AddScmResponseProto", + "optional": true } ] }, @@ -2028,12 +2350,14 @@ { "id": 1, "name": "userName", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "remoteAddress", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -2043,32 +2367,44 @@ { "id": 1, "name": "size", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 2, "name": "numBlocks", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 3, "name": "type", - "type": "ReplicationType" + "type": "ReplicationType", + "required": true }, { "id": 4, "name": "factor", - "type": "hadoop.hdds.ReplicationFactor" + "type": "hadoop.hdds.ReplicationFactor", + "optional": true }, { "id": 5, "name": "owner", - "type": "string" + "type": "string", + "required": true }, { "id": 7, "name": "excludeList", - "type": "ExcludeListProto" + "type": "ExcludeListProto", + "optional": true + }, + { + "id": 8, + "name": "ecReplicationConfig", + "type": "hadoop.hdds.ECReplicationConfig", + "optional": true } ] }, @@ -2089,7 +2425,8 @@ { "id": 1, "name": "key", - "type": "string" + "type": "string", + "required": true }, { "id": 2, @@ -2116,7 +2453,8 @@ { "id": 1, "name": "objectKey", - "type": "string" + "type": "string", + "required": true }, { "id": 2, @@ -2132,12 +2470,14 @@ { "id": 1, "name": "result", - "type": "Result" + "type": "Result", + "required": true }, { "id": 2, "name": "blockID", - "type": "BlockID" + "type": "BlockID", + "required": true } ] }, @@ -2147,12 +2487,14 @@ { "id": 1, "name": "containerBlockID", - "type": "ContainerBlockID" + "type": "ContainerBlockID", + "optional": true }, { "id": 2, "name": "pipeline", - "type": "hadoop.hdds.Pipeline" + "type": "hadoop.hdds.Pipeline", + "optional": true } ] }, @@ -2173,7 +2515,8 @@ { "id": 1, "name": "client", - "type": "string" + "type": "string", + "required": true }, { "id": 2, @@ -2285,6 +2628,10 @@ { "name": "RevokeCertificates", "integer": 11 + }, + { + "name": "GetCert", + "integer": 12 } ] }, @@ -2443,62 +2790,80 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "getDataNodeCertRequest", - "type": "SCMGetDataNodeCertRequestProto" + "type": "SCMGetDataNodeCertRequestProto", + "optional": true }, { "id": 4, "name": "getOMCertRequest", - "type": "SCMGetOMCertRequestProto" + "type": "SCMGetOMCertRequestProto", + "optional": true }, { "id": 5, "name": "getCertificateRequest", - "type": "SCMGetCertificateRequestProto" + "type": "SCMGetCertificateRequestProto", + "optional": true }, { "id": 6, "name": "getCACertificateRequest", - "type": "SCMGetCACertificateRequestProto" + "type": "SCMGetCACertificateRequestProto", + "optional": true }, { "id": 7, "name": "listCertificateRequest", - "type": "SCMListCertificateRequestProto" + "type": "SCMListCertificateRequestProto", + "optional": true }, { "id": 8, "name": "getSCMCertificateRequest", - "type": "SCMGetSCMCertRequestProto" + "type": "SCMGetSCMCertRequestProto", + "optional": true }, { "id": 9, "name": "listCACertificateRequestProto", - "type": "SCMListCACertificateRequestProto" + "type": "SCMListCACertificateRequestProto", + "optional": true }, { "id": 10, "name": "getCrlsRequest", - "type": "SCMGetCrlsRequestProto" + "type": "SCMGetCrlsRequestProto", + "optional": true }, { "id": 11, "name": "getLatestCrlIdRequest", - "type": "SCMGetLatestCrlIdRequestProto" + "type": "SCMGetLatestCrlIdRequestProto", + "optional": true }, { "id": 12, "name": "revokeCertificatesRequest", - "type": "SCMRevokeCertificatesRequestProto" + "type": "SCMRevokeCertificatesRequestProto", + "optional": true + }, + { + "id": 13, + "name": "getCertRequest", + "type": "SCMGetCertRequestProto", + "optional": true } ] }, @@ -2508,17 +2873,20 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "success", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -2529,37 +2897,44 @@ { "id": 4, "name": "message", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "status", - "type": "Status" + "type": "Status", + "required": true }, { "id": 6, "name": "getCertResponseProto", - "type": "SCMGetCertResponseProto" + "type": "SCMGetCertResponseProto", + "optional": true }, { "id": 7, "name": "listCertificateResponseProto", - "type": "SCMListCertificateResponseProto" + "type": "SCMListCertificateResponseProto", + "optional": true }, { "id": 8, "name": "getCrlsResponseProto", - "type": "SCMGetCrlsResponseProto" + "type": "SCMGetCrlsResponseProto", + "optional": true }, { "id": 9, "name": "getLatestCrlIdResponseProto", - "type": "SCMGetLatestCrlIdResponseProto" + "type": "SCMGetLatestCrlIdResponseProto", + "optional": true }, { "id": 10, "name": "revokeCertificatesResponseProto", - "type": "SCMRevokeCertificatesResponseProto" + "type": "SCMRevokeCertificatesResponseProto", + "optional": true } ] }, @@ -2569,12 +2944,14 @@ { "id": 1, "name": "datanodeDetails", - "type": "DatanodeDetailsProto" + "type": "DatanodeDetailsProto", + "required": true }, { "id": 2, "name": "CSR", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2584,12 +2961,31 @@ { "id": 1, "name": "omDetails", - "type": "OzoneManagerDetailsProto" + "type": "OzoneManagerDetailsProto", + "required": true }, { "id": 2, "name": "CSR", - "type": "string" + "type": "string", + "required": true + } + ] + }, + { + "name": "SCMGetCertRequestProto", + "fields": [ + { + "id": 1, + "name": "nodeDetails", + "type": "NodeDetailsProto", + "required": true + }, + { + "id": 2, + "name": "CSR", + "type": "string", + "required": true } ] }, @@ -2599,12 +2995,14 @@ { "id": 1, "name": "scmDetails", - "type": "ScmNodeDetailsProto" + "type": "ScmNodeDetailsProto", + "required": true }, { "id": 2, "name": "CSR", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2614,7 +3012,8 @@ { "id": 1, "name": "certSerialId", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2627,22 +3026,26 @@ { "id": 1, "name": "role", - "type": "NodeType" + "type": "NodeType", + "optional": true }, { "id": 2, "name": "startCertId", - "type": "int64" + "type": "int64", + "optional": true }, { "id": 3, "name": "count", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 4, "name": "isRevoked", - "type": "bool" + "type": "bool", + "optional": true } ] }, @@ -2652,22 +3055,26 @@ { "id": 1, "name": "responseCode", - "type": "ResponseCode" + "type": "ResponseCode", + "required": true }, { "id": 2, "name": "x509Certificate", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "x509CACertificate", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "x509RootCACertificate", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -2677,7 +3084,8 @@ { "id": 1, "name": "responseCode", - "type": "ResponseCode" + "type": "ResponseCode", + "required": true }, { "id": 2, @@ -2724,7 +3132,8 @@ { "id": 1, "name": "crlId", - "type": "int64" + "type": "int64", + "optional": true } ] }, @@ -2741,6 +3150,7 @@ "id": 2, "name": "reason", "type": "Reason", + "optional": true, "options": [ { "name": "default", @@ -2751,7 +3161,8 @@ { "id": 3, "name": "revokeTime", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2761,7 +3172,8 @@ { "id": 1, "name": "crlId", - "type": "int64" + "type": "int64", + "optional": true } ] } diff --git a/hadoop-hdds/pom.xml b/hadoop-hdds/pom.xml index d95768fab13b..7abc26079312 100644 --- a/hadoop-hdds/pom.xml +++ b/hadoop-hdds/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone-main - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Project Apache Ozone HDDS pom diff --git a/hadoop-hdds/server-scm/pom.xml b/hadoop-hdds/server-scm/pom.xml index 60feece1489a..caab32f30136 100644 --- a/hadoop-hdds/server-scm/pom.xml +++ b/hadoop-hdds/server-scm/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-server-scm - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Storage Container Manager Server Apache Ozone HDDS SCM Server jar diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java index 5f7e944235c2..8d02d7fc3b80 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java @@ -18,35 +18,45 @@ package org.apache.hadoop.hdds.scm; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import java.util.Random; -import java.util.stream.Collectors; - +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; +import org.apache.hadoop.hdds.scm.net.Node; import org.apache.hadoop.hdds.scm.node.DatanodeInfo; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.node.NodeStatus; - -import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Random; +import java.util.Set; +import java.util.stream.Collectors; + /** * This policy implements a set of invariants which are common * for all basic placement policies, acts as the repository of helper * functions which are common to placement policies. */ -public abstract class SCMCommonPlacementPolicy implements PlacementPolicy { +public abstract class SCMCommonPlacementPolicy implements + PlacementPolicy { @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(SCMCommonPlacementPolicy.class); @@ -346,6 +356,22 @@ protected int getRequiredRackCount(int numReplicas) { return 1; } + /** + * Default implementation to return the max number of replicas per rack. + * For simple policies that are not rack aware + * we return numReplicas, from this default implementation. + * + * @param numReplicas - The desired replica counts + * @param numberOfRacks - The desired number of racks + * @return The max number of replicas per rack + */ + protected int getMaxReplicasPerRack(int numReplicas, int numberOfRacks) { + return numReplicas / numberOfRacks + + Math.min(numReplicas % numberOfRacks, 1); + } + + + /** * This default implementation handles rack aware policies and non rack * aware policies. If a future placement policy needs to check more than racks @@ -363,6 +389,7 @@ protected int getRequiredRackCount(int numReplicas) { public ContainerPlacementStatus validateContainerPlacement( List dns, int replicas) { NetworkTopology topology = nodeManager.getClusterNetworkTopologyMap(); + // We have a network topology so calculate if it is satisfied or not. int requiredRacks = getRequiredRackCount(replicas); if (topology == null || replicas == 1 || requiredRacks == 1) { if (dns.size() > 0) { @@ -372,22 +399,22 @@ public ContainerPlacementStatus validateContainerPlacement( return invalidPlacement; } } - // We have a network topology so calculate if it is satisfied or not. - int numRacks = 1; + Map currentRackCount = dns.stream() + .collect(Collectors.groupingBy(this::getPlacementGroup, + Collectors.counting())); final int maxLevel = topology.getMaxLevel(); // The leaf nodes are all at max level, so the number of nodes at // leafLevel - 1 is the rack count - numRacks = topology.getNumOfNodes(maxLevel - 1); - final long currentRackCount = dns.stream() - .map(d -> topology.getAncestor(d, 1)) - .distinct() - .count(); - + int numRacks = topology.getNumOfNodes(maxLevel - 1); if (replicas < requiredRacks) { requiredRacks = replicas; } + int maxReplicasPerRack = getMaxReplicasPerRack(replicas, + Math.min(requiredRacks, numRacks)); return new ContainerPlacementStatusDefault( - (int)currentRackCount, requiredRacks, numRacks); + currentRackCount.size(), requiredRacks, numRacks, maxReplicasPerRack, + currentRackCount.values().stream().map(Long::intValue) + .collect(Collectors.toList())); } /** @@ -426,4 +453,144 @@ public boolean isValidNode(DatanodeDetails datanodeDetails, } return false; } + + /** + * Given a set of replicas of a container which are + * neither over underreplicated nor overreplicated, + * return a set of replicas to copy to another node to fix misreplication. + * @param replicas: Map of replicas with value signifying if + * replica can be copied + */ + @Override + public Set replicasToCopyToFixMisreplication( + Map replicas) { + Map> placementGroupReplicaIdMap + = replicas.keySet().stream() + .collect(Collectors.groupingBy(replica -> + getPlacementGroup(replica.getDatanodeDetails()))); + + int totalNumberOfReplicas = replicas.size(); + int requiredNumberOfPlacementGroups = + getRequiredRackCount(totalNumberOfReplicas); + Set copyReplicaSet = Sets.newHashSet(); + List> replicaSet = placementGroupReplicaIdMap + .values().stream() + .sorted((o1, o2) -> Integer.compare(o2.size(), o1.size())) + .limit(requiredNumberOfPlacementGroups) + .collect(Collectors.toList()); + for (List replicaList: replicaSet) { + int maxReplicasPerPlacementGroup = getMaxReplicasPerRack( + totalNumberOfReplicas, requiredNumberOfPlacementGroups); + int numberOfReplicasToBeCopied = Math.max(0, + replicaList.size() - maxReplicasPerPlacementGroup); + totalNumberOfReplicas -= maxReplicasPerPlacementGroup; + requiredNumberOfPlacementGroups -= 1; + if (numberOfReplicasToBeCopied > 0) { + List replicasToBeCopied = replicaList.stream() + .filter(replicas::get) + .limit(numberOfReplicasToBeCopied) + .collect(Collectors.toList()); + if (numberOfReplicasToBeCopied > replicasToBeCopied.size()) { + Node rack = replicaList.size() > 0 ? this.getPlacementGroup( + replicaList.get(0).getDatanodeDetails()) : null; + LOG.warn("Not enough copyable replicas available in rack {}. " + + "Required number of Replicas to be copied: {}." + + " Available Replicas to be copied: {}", + rack, numberOfReplicasToBeCopied, + replicasToBeCopied.size()); + } + copyReplicaSet.addAll(replicasToBeCopied); + } + } + return copyReplicaSet; + } + + protected Node getPlacementGroup(DatanodeDetails dn) { + return nodeManager.getClusterNetworkTopologyMap().getAncestor(dn, 1); + } + + /** + * Given a set of replicas, expectedCount for Each replica, + * number of unique replica indexes. Replicas to be deleted for fixing over + * replication is computed. + * The algorithm starts with creating a replicaIdMap which contains the + * replicas grouped by replica Index. A placementGroup Map is created which + * groups replicas based on their rack & the replicas within the rack + * are further grouped based on the replica Index. + * A placement Group Count Map is created which keeps + * track of the count of replicas in each rack. + * We iterate through overreplicated replica indexes sorted in descending + * order based on their current replication factor in a descending factor. + * For each replica Index the replica is removed from the rack which contains + * the most replicas, in order to achieve this the racks are put + * into priority queue & are based on the number of replicas they have. + * The replica is removed from the rack with maximum replicas & the replica + * to be removed is also removed from the maps created above & + * the count for rack is reduced. + * The set of replicas computed are then returned by the function. + * @param replicas: Set of existing replicas of the container + * @param expectedCountPerUniqueReplica: Replication factor of each + * * unique replica + * @return Set of replicas to be removed are computed. + */ + @Override + public Set replicasToRemoveToFixOverreplication( + Set replicas, int expectedCountPerUniqueReplica) { + Map> replicaIdMap = new HashMap<>(); + Map>> placementGroupReplicaIdMap + = new HashMap<>(); + Map placementGroupCntMap = new HashMap<>(); + for (ContainerReplica replica:replicas) { + Integer replicaId = replica.getReplicaIndex(); + Node placementGroup = getPlacementGroup(replica.getDatanodeDetails()); + replicaIdMap.computeIfAbsent(replicaId, (rid) -> Sets.newHashSet()) + .add(replica); + placementGroupCntMap.compute(placementGroup, + (group, cnt) -> (cnt == null ? 0 : cnt) + 1); + placementGroupReplicaIdMap.computeIfAbsent(placementGroup, + (pg) -> Maps.newHashMap()).computeIfAbsent(replicaId, (rid) -> + Sets.newHashSet()).add(replica); + } + + Set replicasToRemove = new HashSet<>(); + List sortedRIDList = replicaIdMap.keySet().stream() + .filter(rid -> replicaIdMap.get(rid).size() > + expectedCountPerUniqueReplica) + .sorted((o1, o2) -> Integer.compare(replicaIdMap.get(o2).size(), + replicaIdMap.get(o1).size())) + .collect(Collectors.toList()); + for (Integer rid : sortedRIDList) { + if (replicaIdMap.get(rid).size() <= expectedCountPerUniqueReplica) { + break; + } + Queue pq = new PriorityQueue<>((o1, o2) -> + Integer.compare(placementGroupCntMap.get(o2), + placementGroupCntMap.get(o1))); + pq.addAll(placementGroupReplicaIdMap.entrySet() + .stream() + .filter(nodeMapEntry -> nodeMapEntry.getValue().containsKey(rid)) + .map(Map.Entry::getKey) + .collect(Collectors.toList())); + + while (replicaIdMap.get(rid).size() > expectedCountPerUniqueReplica) { + Node rack = pq.poll(); + Set replicaSet = + placementGroupReplicaIdMap.get(rack).get(rid); + if (replicaSet.size() > 0) { + ContainerReplica r = replicaSet.stream().findFirst().get(); + replicasToRemove.add(r); + replicaSet.remove(r); + replicaIdMap.get(rid).remove(r); + placementGroupCntMap.compute(rack, + (group, cnt) -> (cnt == null ? 0 : cnt) - 1); + if (replicaSet.size() == 0) { + placementGroupReplicaIdMap.get(rack).remove(rid); + } else { + pq.add(rack); + } + } + } + } + return replicasToRemove; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java index fb171677f0bc..a093775067a6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java @@ -80,6 +80,24 @@ default List getContainers() { */ List getContainers(LifeCycleState state); + /** + * Returns containers under certain conditions. + * Search container IDs from start ID(exclusive), + * The max size of the searching range cannot exceed the + * value of count. + * + * @param startID start containerID, >=0, + * start searching at the head if 0. + * @param count count must be >= 0 + * Usually the count will be replace with a very big + * value instead of being unlimited in case the db is very big. + * @param state container state + * + * @return a list of container. + */ + List getContainers(ContainerID startID, + int count, LifeCycleState state); + /** * Returns the size of containers which are in the specified state. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java index 9314d07de3f8..a3281efcf2ce 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java @@ -178,6 +178,27 @@ public List getContainers(final LifeCycleState state) { return containers; } + @Override + public List getContainers(final ContainerID startID, + final int count, + final LifeCycleState state) { + scmContainerManagerMetrics.incNumListContainersOps(); + final List containersIds = + new ArrayList<>(containerStateManager.getContainerIDs(state)); + Collections.sort(containersIds); + List containers; + lock.lock(); + try { + containers = containersIds.stream() + .filter(id -> id.compareTo(startID) >= 0).limit(count) + .map(containerStateManager::getContainer) + .collect(Collectors.toList()); + } finally { + lock.unlock(); + } + return containers; + } + @Override public int getContainerStateCount(final LifeCycleState state) { return containerStateManager.getContainerIDs(state).size(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java index 3d4d783212d7..2ff50aab757d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java @@ -209,8 +209,10 @@ public void onMessage(final ContainerReportFromDatanode reportFromDatanode, } /** - * Processes the ContainerReport, unknown container reported - * that will be deleted by SCM. + * Processes the ContainerReport. + * Any unknown container reported by DN and not present in SCM + * containerSet will either be logged as an error or deleted based on + * unknownContainerHandleAction. * * @param datanodeDetails Datanode from which this report was received * @param container ContainerInfo representing the container diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java index 998682d92af6..660452b2d8b0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java @@ -164,7 +164,14 @@ private boolean containerMoveSatisfiesPlacementPolicy( replicaList.add(target); ContainerPlacementStatus placementStatus = placementPolicyValidateProxy .validateContainerPlacement(replicaList, containerInfo); - return placementStatus.isPolicySatisfied(); + + boolean isPolicySatisfied = placementStatus.isPolicySatisfied(); + if (!isPolicySatisfied) { + logger.debug("Moving container {} from source {} to target {} will not " + + "satisfy placement policy.", containerID, source.getUuidString(), + target.getUuidString()); + } + return isPolicySatisfied; } /** @@ -185,10 +192,26 @@ private boolean canSizeEnterTarget(DatanodeDetails target, long size) { // MaxSizeEnteringTarget //2 current usage of target datanode plus sizeEnteringAfterMove // is smaller than or equal to upperLimit - return sizeEnteringAfterMove <= config.getMaxSizeEnteringTarget() && - Double.compare(nodeManager.getUsageInfo(target) - .calculateUtilization(sizeEnteringAfterMove), upperLimit) <= 0; + if (sizeEnteringAfterMove > config.getMaxSizeEnteringTarget()) { + logger.debug("{} bytes cannot enter datanode {} because 'size" + + ".entering.target.max' limit is {} and {} bytes have already " + + "entered.", size, target.getUuidString(), + config.getMaxSizeEnteringTarget(), + sizeEnteringNode.get(target)); + return false; + } + if (Double.compare(nodeManager.getUsageInfo(target) + .calculateUtilization(sizeEnteringAfterMove), upperLimit) > 0) { + logger.debug("{} bytes cannot enter datanode {} because its " + + "utilization will exceed the upper limit of {}.", size, + target.getUuidString(), upperLimit); + return false; + } + return true; } + + logger.warn("No record of how much size has entered datanode {}", + target.getUuidString()); return false; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java index 41a56822a543..4f5868f2456e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java @@ -157,10 +157,24 @@ public boolean canSizeLeaveSource(DatanodeDetails source, long size) { // MaxSizeLeavingTarget //2 after subtracting sizeLeavingAfterMove, the usage is bigger // than or equal to lowerLimit - return sizeLeavingAfterMove <= config.getMaxSizeLeavingSource() && - Double.compare(nodeManager.getUsageInfo(source) - .calculateUtilization(-sizeLeavingAfterMove), lowerLimit) >= 0; + if (sizeLeavingAfterMove > config.getMaxSizeLeavingSource()) { + LOG.debug("{} bytes cannot leave datanode {} because 'size.leaving" + + ".source.max' limit is {} and {} bytes have already left.", + size, source.getUuidString(), config.getMaxSizeLeavingSource(), + sizeLeavingNode.get(source)); + return false; + } + if (Double.compare(nodeManager.getUsageInfo(source) + .calculateUtilization(-sizeLeavingAfterMove), lowerLimit) < 0) { + LOG.debug("{} bytes cannot leave datanode {} because its utilization " + + "will drop below the lower limit of {}.", size, + source.getUuidString(), lowerLimit); + return false; + } + return true; } + + LOG.warn("No record of how much size has left datanode {}", source); return false; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementStatusDefault.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementStatusDefault.java index 3fdcd2f40169..220775540d00 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementStatusDefault.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementStatusDefault.java @@ -18,6 +18,9 @@ import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; +import java.util.Collections; +import java.util.List; + /** * Simple Status object to check if a container is replicated across enough * racks. @@ -29,16 +32,32 @@ public class ContainerPlacementStatusDefault private final int currentRacks; private final int totalRacks; + private final int maxReplicasPerRack; + private final List rackReplicaCnts; + + public ContainerPlacementStatusDefault(int currentRacks, int requiredRacks, - int totalRacks) { + int totalRacks, int maxReplicasPerRack, List rackReplicaCnts) { this.requiredRacks = requiredRacks; this.currentRacks = currentRacks; this.totalRacks = totalRacks; + this.maxReplicasPerRack = maxReplicasPerRack; + this.rackReplicaCnts = rackReplicaCnts; + } + + public ContainerPlacementStatusDefault(int currentRacks, int requiredRacks, + int totalRacks) { + this(currentRacks, requiredRacks, totalRacks, 1, + currentRacks == 0 ? Collections.emptyList() + : Collections.nCopies(currentRacks, 1)); } @Override public boolean isPolicySatisfied() { - return currentRacks >= totalRacks || currentRacks >= requiredRacks; + if (currentRacks < Math.min(totalRacks, requiredRacks)) { + return false; + } + return rackReplicaCnts.stream().allMatch(cnt -> cnt <= maxReplicasPerRack); } @Override @@ -46,8 +65,13 @@ public String misReplicatedReason() { if (isPolicySatisfied()) { return null; } - return "The container is mis-replicated as it is on " + currentRacks + - " racks but should be on " + requiredRacks + " racks."; + if (currentRacks < expectedPlacementCount()) { + return "The container is mis-replicated as it is on " + currentRacks + + " racks but should be on " + expectedPlacementCount() + " racks."; + } + return "The container is mis-replicated as max number of replicas per rack " + + "is " + maxReplicasPerRack + " but number of replicas per rack" + + " are " + rackReplicaCnts.toString(); } @Override @@ -55,7 +79,9 @@ public int misReplicationCount() { if (isPolicySatisfied()) { return 0; } - return requiredRacks - currentRacks; + return Math.max(expectedPlacementCount() - currentRacks, + rackReplicaCnts.stream().mapToInt( + cnt -> Math.max(cnt - maxReplicasPerRack, 0)).sum()); } @Override diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java index 4ee7408fbb5f..4f07024f16db 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java @@ -376,6 +376,14 @@ private List chooseNodes(List excludedNodes, } } + @Override + protected int getMaxReplicasPerRack(int numReplicas, int numberOfRacks) { + if (numberOfRacks == 1) { + return numReplicas; + } + return Math.max(numReplicas - 1, 1); + } + @Override protected int getRequiredRackCount(int numReplicas) { return REQUIRED_RACKS; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java index eff2dc86c426..495f97e0c36c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.container.placement.algorithms; import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; @@ -26,6 +27,7 @@ import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.net.Node; import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.pipeline.PipelineStateManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,10 +37,15 @@ import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE; + /** * Container placement policy that scatter datanodes on different racks * , together with the space to satisfy the size constraints. @@ -77,14 +84,33 @@ public SCMContainerPlacementRackScatter(final NodeManager nodeManager, this.metrics = metrics; } + /** + * Constructor for Pipeline Provider Pipeline Placement with rack awareness. + * @param nodeManager Node Manager + * @param stateManager State Manager + * @param conf Configuration + */ + public SCMContainerPlacementRackScatter(NodeManager nodeManager, + PipelineStateManager stateManager, ConfigurationSource conf) { + super(nodeManager, conf); + this.networkTopology = nodeManager.getClusterNetworkTopologyMap(); + this.metrics = null; + } + public Set chooseNodesFromRacks(List racks, - List unavailableNodes, - List mutableFavoredNodes, - int nodesRequired, final long metadataSizeRequired, - final long dataSizeRequired, int maxOuterLoopIterations) { + List unavailableNodes, + List mutableFavoredNodes, + int nodesRequired, final Pair metadatasizeDatasizePair, + int maxOuterLoopIterations, final Pair, Integer> + rackCntMapMaxReplicaPerRackPair) { if (nodesRequired <= 0) { return Collections.emptySet(); } + final long metadataSizeRequired = metadatasizeDatasizePair.getKey(); + final long dataSizeRequired = metadatasizeDatasizePair.getValue(); + final Map rackCntMap = + rackCntMapMaxReplicaPerRackPair.getKey(); + final int maxReplicasPerRack = rackCntMapMaxReplicaPerRackPair.getValue(); List toChooseRacks = new LinkedList<>(); Set chosenNodes = new LinkedHashSet<>(); Set skippedRacks = new HashSet<>(); @@ -97,8 +123,10 @@ public Set chooseNodesFromRacks(List racks, int chosenListSize = chosenNodes.size(); // Refill toChooseRacks, we put skippedRacks in front of toChooseRacks - // for a even distribution - toChooseRacks.addAll(racks); + // for an even distribution + toChooseRacks.addAll(racks.stream() + .filter(rack -> rackCntMap.getOrDefault(rack, 0) + < maxReplicasPerRack).collect(Collectors.toList())); if (!skippedRacks.isEmpty()) { toChooseRacks.removeAll(skippedRacks); toChooseRacks.addAll(0, skippedRacks); @@ -111,6 +139,7 @@ public Set chooseNodesFromRacks(List racks, Node curRack = getRackOfDatanodeDetails(favoredNode); if (toChooseRacks.contains(curRack)) { chosenNodes.add(favoredNode); + rackCntMap.merge(curRack, 1, Math::addExact); toChooseRacks.remove(curRack); chosenFavoredNodesInForLoop.add(favoredNode); unavailableNodes.add(favoredNode); @@ -137,6 +166,7 @@ public Set chooseNodesFromRacks(List racks, metadataSizeRequired, dataSizeRequired); if (node != null) { chosenNodes.add((DatanodeDetails) node); + rackCntMap.merge(rack, 1, Math::addExact); mutableFavoredNodes.remove(node); unavailableNodes.add(node); nodesRequired--; @@ -191,7 +221,9 @@ protected List chooseDatanodesInternal( "than 0, but the given num is " + nodesRequiredToChoose; throw new SCMException(errorMsg, null); } - metrics.incrDatanodeRequestCount(nodesRequiredToChoose); + if (metrics != null) { + metrics.incrDatanodeRequestCount(nodesRequiredToChoose); + } int nodesRequired = nodesRequiredToChoose; int excludedNodesCount = excludedNodes == null ? 0 : excludedNodes.size(); List availableNodes = networkTopology.getNodes( @@ -206,9 +238,8 @@ protected List chooseDatanodesInternal( " AvailableNode = " + availableNodes.size() + " RequiredNode = " + nodesRequired + " ExcludedNode = " + excludedNodesCount, - SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + FAILED_TO_FIND_SUITABLE_NODE); } - List mutableFavoredNodes = new ArrayList<>(); if (favoredNodes != null) { // Generate mutableFavoredNodes, only stores valid favoredNodes @@ -223,19 +254,19 @@ protected List chooseDatanodesInternal( if (excludedNodes != null) { mutableFavoredNodes.removeAll(excludedNodes); } - if (usedNodes == null) { usedNodes = Collections.emptyList(); } List racks = getAllRacks(); - Set usedRacks = usedNodes.stream() + Map usedRacksCntMap = usedNodes.stream() .map(node -> networkTopology.getAncestor(node, RACK_LEVEL)) .filter(node -> node != null) - .collect(Collectors.toSet()); + .collect(Collectors.toMap(Function.identity(), e -> 1, + Math::addExact)); int requiredReplicationFactor = usedNodes.size() + nodesRequired; - int numberOfRacksRequired = - getRequiredRackCount(requiredReplicationFactor); - int additionalRacksRequired = numberOfRacksRequired - usedRacks.size(); + int numberOfRacksRequired = getRequiredRackCount(requiredReplicationFactor); + int additionalRacksRequired = + numberOfRacksRequired - usedRacksCntMap.size(); if (nodesRequired < additionalRacksRequired) { String reason = "Required nodes size: " + nodesRequired + " is less than required number of racks to choose: " @@ -243,12 +274,15 @@ protected List chooseDatanodesInternal( LOG.warn("Placement policy cannot choose the enough racks. {}" + "Total number of Required Racks: {} Used Racks Count:" + " {}, Required Nodes count: {}", - reason, numberOfRacksRequired, usedRacks.size(), nodesRequired); + reason, numberOfRacksRequired, usedRacksCntMap.size(), + nodesRequired); throw new SCMException(reason, - SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + FAILED_TO_FIND_SUITABLE_NODE); } + int maxReplicasPerRack = getMaxReplicasPerRack(requiredReplicationFactor, + numberOfRacksRequired); // For excluded nodes, we sort their racks at rear - racks = sortRackWithExcludedNodes(racks, excludedNodes, usedRacks); + racks = sortRackWithExcludedNodes(racks, excludedNodes, usedRacksCntMap); List unavailableNodes = new ArrayList<>(); if (excludedNodes != null) { @@ -265,43 +299,39 @@ protected List chooseDatanodesInternal( LOG.warn("Placement policy cannot choose the enough racks. {}" + "Total number of Required Racks: {} Used Racks Count:" + " {}, Required Nodes count: {}", - reason, numberOfRacksRequired, usedRacks.size(), nodesRequired); + reason, numberOfRacksRequired, usedRacksCntMap.size(), + nodesRequired); throw new SCMException(reason, - SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + FAILED_TO_FIND_SUITABLE_NODE); } chosenNodes.addAll(chooseNodesFromRacks(racks, unavailableNodes, mutableFavoredNodes, additionalRacksRequired, - metadataSizeRequired, dataSizeRequired, 1)); + Pair.of(metadataSizeRequired, dataSizeRequired), 1, + Pair.of(usedRacksCntMap, maxReplicasPerRack))); if (chosenNodes.size() < additionalRacksRequired) { String reason = "Chosen nodes size from Unique Racks: " + chosenNodes .size() + ", but required nodes to choose from Unique Racks: " + additionalRacksRequired + " do not match."; LOG.warn("Placement policy could not choose the enough nodes from " + - "available racks." - + " {} Available racks count: {}," + - " Excluded nodes count: {}", + "available racks. {} Available racks count: {}," + + " Excluded nodes count: {}", reason, racks.size(), excludedNodesCount); throw new SCMException(reason, SCMException.ResultCodes.FAILED_TO_FIND_HEALTHY_NODES); } if (chosenNodes.size() < nodesRequired) { - racks.addAll(usedRacks); - usedRacks.addAll(chosenNodes.stream() - .map(node -> networkTopology.getAncestor(node, RACK_LEVEL)) - .filter(node -> node != null) - .collect(Collectors.toSet())); - sortRackWithExcludedNodes(racks, excludedNodes, usedRacks); - racks.addAll(usedRacks); + racks.addAll(usedRacksCntMap.keySet()); + racks = sortRackWithExcludedNodes(racks, excludedNodes, usedRacksCntMap); + racks.addAll(usedRacksCntMap.keySet()); chosenNodes.addAll(chooseNodesFromRacks(racks, unavailableNodes, mutableFavoredNodes, nodesRequired - chosenNodes.size(), - metadataSizeRequired, dataSizeRequired, Integer.MAX_VALUE)); + Pair.of(metadataSizeRequired, dataSizeRequired), + Integer.MAX_VALUE, Pair.of(usedRacksCntMap, maxReplicasPerRack))); } - List result = new ArrayList<>(chosenNodes); - if (nodesRequiredToChoose != chosenNodes.size()) { String reason = "Chosen nodes size: " + chosenNodes .size() + ", but required nodes to choose: " @@ -312,17 +342,25 @@ protected List chooseDatanodesInternal( throw new SCMException(reason, SCMException.ResultCodes.FAILED_TO_FIND_HEALTHY_NODES); } - ContainerPlacementStatus placementStatus = - validateContainerPlacement( - Stream.of(usedNodes, result) - .flatMap(List::stream).collect(Collectors.toList()), + validateContainerPlacement(Stream.of(usedNodes, result) + .flatMap(List::stream).collect(Collectors.toList()), requiredReplicationFactor); if (!placementStatus.isPolicySatisfied()) { - String errorMsg = "ContainerPlacementPolicy not met, currentRacks is " + - placementStatus.actualPlacementCount() + " desired racks is " + - placementStatus.expectedPlacementCount(); - throw new SCMException(errorMsg, null); + ContainerPlacementStatus initialPlacementStatus = + validateContainerPlacement(Stream.of(usedNodes).flatMap( + List::stream).collect(Collectors.toList()), + requiredReplicationFactor); + if (initialPlacementStatus.misReplicationCount() + < placementStatus.misReplicationCount()) { + String errorMsg = "ContainerPlacementPolicy not met. Misreplication" + + " Reason: " + placementStatus.misReplicatedReason() + + " Initial Used nodes mis-replication Count: " + + initialPlacementStatus.misReplicationCount() + + " Used nodes + Chosen nodes mis-replication Count: " + + placementStatus.misReplicationCount(); + throw new SCMException(errorMsg, FAILED_TO_FIND_SUITABLE_NODE); + } } return result; } @@ -348,7 +386,9 @@ private Node chooseNode(String scope, List excludedNodes, long metadataSizeRequired, long dataSizeRequired) { int maxRetry = INNER_LOOP_MAX_RETRY; while (true) { - metrics.incrDatanodeChooseAttemptCount(); + if (metrics != null) { + metrics.incrDatanodeChooseAttemptCount(); + } Node node = null; try { node = networkTopology.chooseRandom(scope, excludedNodes); @@ -364,7 +404,9 @@ private Node chooseNode(String scope, List excludedNodes, DatanodeDetails datanodeDetails = (DatanodeDetails) node; if (isValidNode(datanodeDetails, metadataSizeRequired, dataSizeRequired)) { - metrics.incrDatanodeChooseSuccessCount(); + if (metrics != null) { + metrics.incrDatanodeChooseSuccessCount(); + } return node; } // exclude the unavailable node for the following retries. @@ -416,7 +458,7 @@ private Node getRackOfDatanodeDetails(DatanodeDetails datanodeDetails) { * @return */ private List sortRackWithExcludedNodes(List racks, - List excludedNodes, Set usedRacks) { + List excludedNodes, Map usedRacks) { if ((excludedNodes == null || excludedNodes.isEmpty()) && usedRacks.isEmpty()) { return racks; @@ -425,12 +467,12 @@ private List sortRackWithExcludedNodes(List racks, .map(node -> networkTopology.getAncestor(node, RACK_LEVEL)) // Dead Nodes have been removed from the topology and so have a // null rack. We need to exclude those from the rack list. - .filter(node -> node != null) - .filter(node -> !usedRacks.contains(node)) + .filter(Objects::nonNull) + .filter(node -> !usedRacks.containsKey(node)) .collect(Collectors.toSet()); List result = new ArrayList<>(); for (Node rack : racks) { - if (!usedRacks.contains(rack) && !lessPreferredRacks.contains(rack)) { + if (!usedRacks.containsKey(rack) && !lessPreferredRacks.contains(rack)) { result.add(rack); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java index cdfd57d1d09b..2aa11211015c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.node.NodeManager; @@ -40,7 +41,7 @@ * can be practically used. */ public final class SCMContainerPlacementRandom extends SCMCommonPlacementPolicy - implements PlacementPolicy { + implements PlacementPolicy { @VisibleForTesting public static final Logger LOG = LoggerFactory.getLogger(SCMContainerPlacementRandom.class); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/AbstractOverReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/AbstractOverReplicationHandler.java index da8d52bb1e2d..5d647593effc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/AbstractOverReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/AbstractOverReplicationHandler.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; @@ -55,7 +56,8 @@ protected AbstractOverReplicationHandler(PlacementPolicy placementPolicy) { */ public abstract Map> processAndCreateCommands( Set replicas, List pendingOps, - ContainerHealthResult result, int remainingMaintenanceRedundancy); + ContainerHealthResult result, int remainingMaintenanceRedundancy) throws + IOException; /** * Identify whether the placement status is actually equal for a @@ -78,6 +80,20 @@ public boolean isPlacementStatusActuallyEqualAfterRemove( return isPlacementStatusActuallyEqual(currentCPS, newCPS); } + /** + * Allow the placement policy to indicate which replicas can be removed for + * an over replicated container, so that the placement policy is not violated + * by removing them. + * @param replicas + * @param expectedCountPerUniqueReplica + * @return + */ + protected Set selectReplicasToRemove( + Set replicas, int expectedCountPerUniqueReplica) { + return placementPolicy.replicasToRemoveToFixOverreplication( + replicas, expectedCountPerUniqueReplica); + } + /** * Given a set of ContainerReplica, transform it to a list of DatanodeDetails * and then check if the list meets the container placement policy. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java index 5d80268ef4fc..d5b92935ce79 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java @@ -108,12 +108,21 @@ public static class UnderReplicatedHealthResult private final boolean dueToDecommission; private final boolean sufficientlyReplicatedAfterPending; private final boolean unrecoverable; + private boolean hasUnReplicatedOfflineIndexes = false; private int requeueCount = 0; public UnderReplicatedHealthResult(ContainerInfo containerInfo, int remainingRedundancy, boolean dueToDecommission, boolean replicatedOkWithPending, boolean unrecoverable) { - super(containerInfo, HealthState.UNDER_REPLICATED); + this(containerInfo, remainingRedundancy, dueToDecommission, + replicatedOkWithPending, unrecoverable, HealthState.UNDER_REPLICATED); + } + + protected UnderReplicatedHealthResult(ContainerInfo containerInfo, + int remainingRedundancy, boolean dueToDecommission, + boolean replicatedOkWithPending, boolean unrecoverable, + HealthState healthState) { + super(containerInfo, healthState); this.remainingRedundancy = remainingRedundancy; this.dueToDecommission = dueToDecommission; this.sufficientlyReplicatedAfterPending = replicatedOkWithPending; @@ -148,7 +157,7 @@ public int getWeightedRedundancy() { if (dueToDecommission) { result += DECOMMISSION_REDUNDANCY; } else { - result += remainingRedundancy; + result += getRemainingRedundancy(); } return result; } @@ -198,6 +207,27 @@ public boolean isReplicatedOkAfterPending() { public boolean isUnrecoverable() { return unrecoverable; } + + /** + * Pass true if a container has some indexes which are only on nodes + * which are DECOMMISSIONING or ENTERING_MAINTENANCE. These containers may + * need to be processed even if they are unrecoverable. + * @param val pass true if the container has indexes on nodes going offline + * or false otherwise. + */ + public void setHasUnReplicatedOfflineIndexes(boolean val) { + hasUnReplicatedOfflineIndexes = val; + } + /** + * Indicates whether a container has some indexes which are only on nodes + * which are DECOMMISSIONING or ENTERING_MAINTENANCE. These containers may + * need to be processed even if they are unrecoverable. + * @return True if the container has some decommission or maintenance only + * indexes. + */ + public boolean hasUnreplicatedOfflineIndexes() { + return hasUnReplicatedOfflineIndexes; + } } /** @@ -207,19 +237,28 @@ public boolean isUnrecoverable() { * containers are not spread across enough racks. */ public static class MisReplicatedHealthResult - extends ContainerHealthResult { + extends UnderReplicatedHealthResult { - private final boolean replicatedOkAfterPending; + /** + * In UnderReplicatedHealthState, DECOMMISSION_REDUNDANCY is defined as + * 5 so that containers which are really under replicated get fixed as a + * priority over decommissioning hosts. We have defined that a container + * can only be mis replicated if it is not over or under replicated. Fixing + * mis replication is arguably less important than competing a decommission. + * So as a lot of mis replicated container do not block decommission, we + * set the redundancy of mis replicated containers to 6 so they sort after + * under / over replicated and decommissioning replicas in the under + * replication queue. + */ + private static final int MIS_REP_REDUNDANCY = 6; public MisReplicatedHealthResult(ContainerInfo containerInfo, boolean replicatedOkAfterPending) { - super(containerInfo, HealthState.MIS_REPLICATED); - this.replicatedOkAfterPending = replicatedOkAfterPending; + super(containerInfo, MIS_REP_REDUNDANCY, false, + replicatedOkAfterPending, false, + HealthState.MIS_REPLICATED); } - public boolean isReplicatedOkAfterPending() { - return replicatedOkAfterPending; - } } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java index a3160208e33e..9c47ac244e61 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java @@ -17,11 +17,12 @@ */ package org.apache.hadoop.hdds.scm.container.replication; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; -import java.util.Set; +import java.util.List; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; @@ -32,10 +33,12 @@ public interface ContainerReplicaCount { ContainerInfo getContainer(); - Set getReplicas(); + List getReplicas(); boolean isSufficientlyReplicated(); + boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode); + boolean isOverReplicated(); int getDecommissionCount(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaPendingOps.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaPendingOps.java index 6a329af97fa4..7add67d80921 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaPendingOps.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaPendingOps.java @@ -52,6 +52,8 @@ public class ContainerReplicaPendingOps { private final ConcurrentHashMap pendingOpCount = new ConcurrentHashMap<>(); private ReplicationManagerMetrics replicationMetrics = null; + private List subscribers = + new ArrayList<>(); public ContainerReplicaPendingOps(final ConfigurationSource conf, Clock clock) { @@ -119,7 +121,7 @@ public boolean completeAddReplica(ContainerID containerID, boolean completed = completeOp(ADD, containerID, target, replicaIndex); if (isMetricsNotNull() && completed) { if (replicaIndex > 0) { - replicationMetrics.incrEcReplicationCmdsCompletedTotal(); + replicationMetrics.incrEcReplicasCreatedTotal(); } else if (replicaIndex == 0) { replicationMetrics.incrNumReplicationCmdsCompleted(); } @@ -141,7 +143,7 @@ public boolean completeDeleteReplica(ContainerID containerID, boolean completed = completeOp(DELETE, containerID, target, replicaIndex); if (isMetricsNotNull() && completed) { if (replicaIndex > 0) { - replicationMetrics.incrEcDeletionCmdsCompletedTotal(); + replicationMetrics.incrEcReplicasDeletedTotal(); } else if (replicaIndex == 0) { replicationMetrics.incrNumDeletionCmdsCompleted(); } @@ -168,6 +170,9 @@ public boolean removeOp(ContainerID containerID, */ public void removeExpiredEntries(long expiryMilliSeconds) { for (ContainerID containerID : pendingOps.keySet()) { + // List of expired ops that subscribers will be notified about + List expiredOps = new ArrayList<>(); + // Rather than use an entry set, we get the map entry again. This is // to protect against another thread modifying the value after this // iterator started. Once we lock on the ContainerID object, no other @@ -187,6 +192,7 @@ public void removeExpiredEntries(long expiryMilliSeconds) { if (op.getScheduledEpochMillis() + expiryMilliSeconds < clock.millis()) { iterator.remove(); + expiredOps.add(op); pendingOpCount.get(op.getOpType()).decrementAndGet(); updateTimeoutMetrics(op); } @@ -197,19 +203,24 @@ public void removeExpiredEntries(long expiryMilliSeconds) { } finally { lock.unlock(); } + + // notify if there are expired ops + if (!expiredOps.isEmpty()) { + notifySubscribers(expiredOps, containerID, true); + } } } private void updateTimeoutMetrics(ContainerReplicaOp op) { if (op.getOpType() == ADD && isMetricsNotNull()) { if (op.getReplicaIndex() > 0) { - replicationMetrics.incrEcReplicationCmdsTimeoutTotal(); + replicationMetrics.incrEcReplicaCreateTimeoutTotal(); } else if (op.getReplicaIndex() == 0) { replicationMetrics.incrNumReplicationCmdsTimeout(); } } else if (op.getOpType() == DELETE && isMetricsNotNull()) { if (op.getReplicaIndex() > 0) { - replicationMetrics.incrEcDeletionCmdsTimeoutTotal(); + replicationMetrics.incrEcReplicaDeleteTimeoutTotal(); } else if (op.getReplicaIndex() == 0) { replicationMetrics.incrNumDeletionCmdsTimeout(); } @@ -234,6 +245,8 @@ private void addReplica(ContainerReplicaOp.PendingOpType opType, private boolean completeOp(ContainerReplicaOp.PendingOpType opType, ContainerID containerID, DatanodeDetails target, int replicaIndex) { boolean found = false; + // List of completed ops that subscribers will be notified about + List completedOps = new ArrayList<>(); Lock lock = writeLock(containerID); lock.lock(); try { @@ -246,6 +259,7 @@ private boolean completeOp(ContainerReplicaOp.PendingOpType opType, && op.getTarget().equals(target) && op.getReplicaIndex() == replicaIndex) { found = true; + completedOps.add(op); iterator.remove(); pendingOpCount.get(op.getOpType()).decrementAndGet(); } @@ -257,9 +271,40 @@ private boolean completeOp(ContainerReplicaOp.PendingOpType opType, } finally { lock.unlock(); } + + if (found) { + notifySubscribers(completedOps, containerID, false); + } return found; } + /** + * Notifies subscribers about the specified ops by calling + * ContainerReplicaPendingOpsSubscriber#opCompleted. + * + * @param ops the ops to send notifications for + * @param containerID the container that ops belong to + * @param timedOut true if the ops (each one) expired, false if they completed + */ + private void notifySubscribers(List ops, + ContainerID containerID, boolean timedOut) { + for (ContainerReplicaOp op : ops) { + for (ContainerReplicaPendingOpsSubscriber subscriber : subscribers) { + subscriber.opCompleted(op, containerID, timedOut); + } + } + } + + /** + * Registers a subscriber that will be notified about completed ops. + * + * @param subscriber object that wants to subscribe + */ + public void registerSubscriber( + ContainerReplicaPendingOpsSubscriber subscriber) { + subscribers.add(subscriber); + } + private Lock writeLock(ContainerID containerID) { return stripedLock.get(containerID).writeLock(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaPendingOpsSubscriber.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaPendingOpsSubscriber.java new file mode 100644 index 000000000000..96d4d3adf57b --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaPendingOpsSubscriber.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.hadoop.hdds.scm.container.ContainerID; + +/** + * A subscriber can register with ContainerReplicaPendingOps to receive + * updates on pending ops. + */ +public interface ContainerReplicaPendingOpsSubscriber { + + /** + * Notifies that the specified op has been completed for the specified + * containerID. Might have completed normally or timed out. + * + * @param op Add or Delete op + * @param containerID container on which the operation is being performed + * @param timedOut true if the timed out, else false + */ + void opCompleted(ContainerReplicaOp op, ContainerID containerID, + boolean timedOut); +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java index 1360c8401653..15108131d7ec 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.container.replication; import org.apache.hadoop.hdds.client.ECReplicationConfig; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.ContainerInfo; @@ -25,6 +26,7 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -36,6 +38,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; /** * This class provides a set of methods to test for over / under replication of @@ -74,25 +77,44 @@ public class ECContainerReplicaCount implements ContainerReplicaCount { private final Map healthyIndexes = new HashMap<>(); private final Map decommissionIndexes = new HashMap<>(); private final Map maintenanceIndexes = new HashMap<>(); - private final Set replicas; + private final List replicas; public ECContainerReplicaCount(ContainerInfo containerInfo, Set replicas, List replicaPendingOps, int remainingMaintenanceRedundancy) { this.containerInfo = containerInfo; - this.replicas = replicas; + // Iterate replicas in deterministic order to avoid potential data loss + // on delete. + // See https://issues.apache.org/jira/browse/HDDS-4589. + // N.B., sort replicas by (containerID, datanodeDetails). + this.replicas = replicas.stream() + .sorted(Comparator.comparingLong(ContainerReplica::hashCode)) + .collect(Collectors.toList()); this.repConfig = (ECReplicationConfig)containerInfo.getReplicationConfig(); this.pendingAdd = new ArrayList<>(); this.pendingDelete = new ArrayList<>(); this.remainingMaintenanceRedundancy = Math.min(repConfig.getParity(), remainingMaintenanceRedundancy); + Set unhealthyReplicaDNs = new HashSet<>(); + for (ContainerReplica r : replicas) { + if (r.getState() == ContainerReplicaProto.State.UNHEALTHY) { + unhealthyReplicaDNs.add(r.getDatanodeDetails()); + } + } + for (ContainerReplicaOp op : replicaPendingOps) { if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) { pendingAdd.add(op.getReplicaIndex()); } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { - pendingDelete.add(op.getReplicaIndex()); + if (!unhealthyReplicaDNs.contains(op.getTarget())) { + // We ignore unhealthy replicas later in this method, so we also + // need to ignore pending deletes on those unhealthy replicas, + // otherwise the pending delete will decrement the healthy count and + // make the container appear under-replicated when it is not. + pendingDelete.add(op.getReplicaIndex()); + } } } @@ -158,7 +180,7 @@ public ContainerInfo getContainer() { } @Override - public Set getReplicas() { + public List getReplicas() { return replicas; } @@ -421,6 +443,49 @@ public boolean isSufficientlyReplicated(boolean includePendingAdd) { >= repConfig.getData() + remainingMaintenanceRedundancy; } + /** + * If we are checking a container for sufficient replication for "offline", + * ie decommission or maintenance, then it is not really a requirement that + * all replicas for the container are present. Instead, we can ensure the + * replica on the node going offline has a copy elsewhere on another + * IN_SERVICE node, and if so that replica is sufficiently replicated. + * @param datanode The datanode being checked to go offline. + * @return True if the container is sufficiently replicated or if this replica + * on the passed node is present elsewhere on an IN_SERVICE node. + */ + @Override + public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode) { + boolean sufficientlyReplicated = isSufficientlyReplicated(false); + if (sufficientlyReplicated) { + return true; + } + // If it is not sufficiently replicated (ie the container has all replicas) + // then we need to check if the replica that is on this node is available + // on another ONLINE node, ie in the healthy set. This means we avoid + // blocking decommission or maintenance caused by un-recoverable EC + // containers. + if (datanode.getPersistedOpState() == IN_SERVICE) { + // The node passed into this method must be a node going offline, so it + // cannot be IN_SERVICE. If an IN_SERVICE mode is passed, just return + // false. + return false; + } + ContainerReplica thisReplica = null; + for (ContainerReplica r : replicas) { + if (r.getDatanodeDetails().equals(datanode)) { + thisReplica = r; + break; + } + } + if (thisReplica == null) { + // From the set of replicas, none are on the passed datanode. + // This should not happen in practice but if it does we cannot indicate + // the container is sufficiently replicated. + return false; + } + return healthyIndexes.containsKey(thisReplica.getReplicaIndex()); + } + @Override public boolean isSufficientlyReplicated() { return isSufficientlyReplicated(false); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECMisReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECMisReplicationHandler.java new file mode 100644 index 000000000000..5998c93134ba --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECMisReplicationHandler.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +/** + * Handles the EC Mis replication processing and forming the respective SCM + * commands. + */ +public class ECMisReplicationHandler extends MisReplicationHandler { + public ECMisReplicationHandler( + PlacementPolicy containerPlacement, + ConfigurationSource conf, NodeManager nodeManager) { + super(containerPlacement, conf, nodeManager); + } + + @Override + protected ContainerReplicaCount getContainerReplicaCount( + ContainerInfo containerInfo, Set replicas, + List pendingOps, int remainingMaintenanceRedundancy) + throws IOException { + if (containerInfo.getReplicationType() != HddsProtos.ReplicationType.EC) { + throw new IOException(String.format("Invalid Container Replication Type :" + + " %s.Expected Container Replication Type : EC", + containerInfo.getReplicationType().toString())); + } + return new ECContainerReplicaCount(containerInfo, replicas, pendingOps, + remainingMaintenanceRedundancy); + } + + @Override + protected ReplicateContainerCommand getReplicateCommand( + ContainerInfo containerInfo, ContainerReplica replica) { + final ReplicateContainerCommand replicateCommand = + new ReplicateContainerCommand(containerInfo.getContainerID(), + Collections.singletonList(replica.getDatanodeDetails())); + // For EC containers, we need to track the replica index which is + // to be replicated, so add it to the command. + replicateCommand.setReplicaIndex(replica.getReplicaIndex()); + return replicateCommand; + } + + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECOverReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECOverReplicationHandler.java index 85295c974961..00c1a264af23 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECOverReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECOverReplicationHandler.java @@ -18,11 +18,13 @@ package org.apache.hadoop.hdds.scm.container.replication; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.slf4j.Logger; @@ -30,12 +32,10 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import static java.util.Collections.emptyMap; @@ -74,12 +74,34 @@ public Map> processAndCreateCommands( ContainerHealthResult result, int remainingMaintenanceRedundancy) { ContainerInfo container = result.getContainerInfo(); + // We are going to check for over replication, so we should filter out any + // replicas that are not in a HEALTHY state. This is because a replica can + // be healthy, stale or dead. If it is dead is will be quickly removed from + // scm. If it is state, there is a good chance the DN is offline and the + // replica will go away soon. So, if we have a container that is over + // replicated with a HEALTHY and STALE replica, and we decide to delete the + // HEALTHY one, and then the STALE ones goes away, we will lose them both. + // To avoid this, we will filter out any non-healthy replicas first. + // EcContainerReplicaCount will ignore nodes which are not IN_SERVICE for + // over replication checks, but we need to filter these out later in this + // method anyway, so it makes sense to filter them here too, to avoid a + // second lookup of the NodeStatus + Set healthyReplicas = replicas.stream() + .filter(r -> { + NodeStatus ns = ReplicationManager.getNodeStatus( + r.getDatanodeDetails(), nodeManager); + return ns.isHealthy() && ns.getOperationalState() == + HddsProtos.NodeOperationalState.IN_SERVICE; + }) + .collect(Collectors.toSet()); + final ECContainerReplicaCount replicaCount = - new ECContainerReplicaCount(container, replicas, pendingOps, + new ECContainerReplicaCount(container, healthyReplicas, pendingOps, remainingMaintenanceRedundancy); if (!replicaCount.isOverReplicated()) { - LOG.info("The container {} state changed and it's not in over" - + " replication any more", container.getContainerID()); + LOG.info("The container {} state changed and it is no longer over" + + " replication. Replica count: {}, healthy replica count: {}", + container.getContainerID(), replicas.size(), healthyReplicas.size()); return emptyMap(); } @@ -93,10 +115,9 @@ public Map> processAndCreateCommands( replicaCount.overReplicatedIndexes(true); //sanity check if (overReplicatedIndexes.size() == 0) { - LOG.warn("The container {} with replicas {} is found over replicated " + - "by ContainerHealthCheck, but found not over replicated by " + - "ECContainerReplicaCount", - container.getContainerID(), replicas); + LOG.warn("The container {} with replicas {} was found over replicated " + + "by EcContainerReplicaCount, but there are no over replicated " + + "indexes returned", container.getContainerID(), replicas); return emptyMap(); } @@ -106,48 +127,51 @@ public Map> processAndCreateCommands( deletionInFlight.add(op.getTarget()); } } - Map> index2replicas = new HashMap<>(); - replicas.stream() - .filter(r -> overReplicatedIndexes.contains(r.getReplicaIndex())) - .filter(r -> r - .getState() == StorageContainerDatanodeProtocolProtos - .ContainerReplicaProto.State.CLOSED) - .filter(r -> ReplicationManager - .getNodeStatus(r.getDatanodeDetails(), nodeManager).isHealthy()) + + Set candidates = healthyReplicas.stream() .filter(r -> !deletionInFlight.contains(r.getDatanodeDetails())) - .forEach(r -> { - int index = r.getReplicaIndex(); - index2replicas.computeIfAbsent(index, k -> new LinkedList<>()); - index2replicas.get(index).add(r); - }); - - if (index2replicas.size() > 0) { - final Map> commands = new HashMap<>(); - final int replicationFactor = - container.getReplicationConfig().getRequiredNodes(); - index2replicas.values().forEach(l -> { - Iterator it = l.iterator(); - Set tempReplicaSet = new HashSet<>(replicas); - while (it.hasNext() && l.size() > 1) { - ContainerReplica r = it.next(); - if (isPlacementStatusActuallyEqualAfterRemove( - tempReplicaSet, r, replicationFactor)) { - DeleteContainerCommand deleteCommand = - new DeleteContainerCommand(container.getContainerID(), true); - deleteCommand.setReplicaIndex(r.getReplicaIndex()); - commands.put(r.getDatanodeDetails(), deleteCommand); - it.remove(); - tempReplicaSet.remove(r); - } - } - }); - if (commands.size() == 0) { - LOG.info("With the current state of avilable replicas {}, no" + - " commands to process due to over replication.", replicas); + .filter(r -> r.getState() == StorageContainerDatanodeProtocolProtos + .ContainerReplicaProto.State.CLOSED) + .collect(Collectors.toSet()); + + Set replicasToRemove = + selectReplicasToRemove(candidates, 1); + + if (replicasToRemove.size() == 0) { + LOG.warn("The container {} is over replicated, but no replicas were " + + "selected to remove by the placement policy. Replicas: {}", + container, replicas); + return emptyMap(); + } + + final Map> commands = new HashMap<>(); + // As a sanity check, sum up the current counts of each replica index. When + // processing replicasToRemove, ensure that removing the replica would not + // drop the count of that index to zero. + Map replicaIndexCounts = new HashMap<>(); + for (ContainerReplica r : candidates) { + replicaIndexCounts.put(r.getReplicaIndex(), + replicaIndexCounts.getOrDefault(r.getReplicaIndex(), 0) + 1); + } + for (ContainerReplica r : replicasToRemove) { + int currentCount = replicaIndexCounts.getOrDefault( + r.getReplicaIndex(), 0); + if (currentCount < 2) { + LOG.warn("The replica {} selected to remove would reduce the count " + + "for that index to zero. Candidate Replicas: {}", r, candidates); + continue; } - return commands; + replicaIndexCounts.put(r.getReplicaIndex(), currentCount - 1); + DeleteContainerCommand deleteCommand = + new DeleteContainerCommand(container.getContainerID(), true); + deleteCommand.setReplicaIndex(r.getReplicaIndex()); + commands.put(r.getDatanodeDetails(), deleteCommand); } - return emptyMap(); + if (commands.size() == 0) { + LOG.warn("With the current state of available replicas {}, no" + + " commands were created to remove excess replicas.", replicas); + } + return commands; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java index 3402f9927bdf..a1e7f1a73f1b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java @@ -136,17 +136,20 @@ public Map> processAndCreateCommands( container.getContainerID(), replicaCount.getReplicas()); return emptyMap(); } - if (replicaCount.isUnrecoverable()) { - LOG.warn("The container {} is unrecoverable. The available replicas" + - " are: {}.", container.containerID(), replicaCount.getReplicas()); - return emptyMap(); - } // don't place reconstructed replicas on exclude nodes, since they already // have replicas List excludedNodes = replicas.stream() .map(ContainerReplica::getDatanodeDetails) .collect(Collectors.toList()); + // DNs that are already waiting to receive replicas cannot be targets + excludedNodes.addAll( + pendingOps.stream() + .filter(containerReplicaOp -> containerReplicaOp.getOpType() == + ContainerReplicaOp.PendingOpType.ADD) + .map(ContainerReplicaOp::getTarget) + .collect(Collectors.toList())); + final ContainerID id = container.containerID(); final Map> commands = new HashMap<>(); try { @@ -392,6 +395,9 @@ private void processMaintenanceOnlyIndexes( // this many maintenance replicas need another copy int additionalMaintenanceCopiesNeeded = replicaCount.additionalMaintenanceCopiesNeeded(true); + if (additionalMaintenanceCopiesNeeded == 0) { + return; + } List targets = getTargetDatanodes(excludedNodes, container, additionalMaintenanceCopiesNeeded); excludedNodes.addAll(targets); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java new file mode 100644 index 000000000000..a2139707ca24 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; + +import java.util.Set; + +/** + * When HDDS-6447 was done to improve the LegacyReplicationManager, work on + * the new replication manager had already started. When this class was added, + * the LegacyReplicationManager needed separate handling for healthy and + * unhealthy container replicas, but the new replication manager did not yet + * have this functionality. This class is used by the + * LegacyReplicationManager to allow {@link RatisContainerReplicaCount} to + * function for both use cases. When the new replication manager is finished + * and LegacyReplicationManager is removed, this class should be deleted and + * all necessary functionality consolidated to + * {@link RatisContainerReplicaCount} + */ +public class LegacyRatisContainerReplicaCount extends + RatisContainerReplicaCount { + public LegacyRatisContainerReplicaCount(ContainerInfo container, + Set replicas, + int inFlightAdd, + int inFlightDelete, int replicationFactor, + int minHealthyForMaintenance) { + super(container, replicas, inFlightAdd, inFlightDelete, replicationFactor, + minHealthyForMaintenance); + } + + @Override + protected int healthyReplicaCountAdapter() { + return 0; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java index 7e86455357c4..80a61ee5db31 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java @@ -72,11 +72,11 @@ import java.lang.reflect.Proxy; import java.time.Clock; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -93,6 +93,7 @@ import static org.apache.hadoop.hdds.conf.ConfigTag.OZONE; import static org.apache.hadoop.hdds.conf.ConfigTag.SCM; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; import static org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType.MOVE; /** @@ -522,24 +523,32 @@ protected void processContainer(ContainerInfo container, */ boolean sufficientlyReplicated = replicaSet.isSufficientlyReplicated(); boolean placementSatisfied = placementStatus.isPolicySatisfied(); + ContainerID containerID = container.containerID(); + if (!placementStatus.isPolicySatisfied()) { + report.incrementAndSample(HealthState.MIS_REPLICATED, containerID); + } + if (!replicaSet.isHealthy()) { + report.incrementAndSample(HealthState.UNHEALTHY, containerID); + } if (!sufficientlyReplicated || !placementSatisfied) { - if (!sufficientlyReplicated) { - report.incrementAndSample( - HealthState.UNDER_REPLICATED, container.containerID()); + // Replicate container if needed. + if (!inflightReplication.isFull() || !inflightDeletion.isFull()) { if (replicaSet.isUnrecoverable()) { - report.incrementAndSample(HealthState.MISSING, - container.containerID()); + // There are no healthy or unhealthy replicas. + report.incrementAndSample(HealthState.MISSING, containerID); + report.incrementAndSample(HealthState.UNDER_REPLICATED, + containerID); + } else { + if (replicaSet.getHealthyReplicaCount() == 0 && + replicaSet.getUnhealthyReplicaCount() != 0) { + handleAllReplicasUnhealthy(container, replicaSet, + placementStatus, report); + } else { + handleUnderReplicatedHealthy(container, + replicaSet, placementStatus, report); + } } } - if (!placementSatisfied) { - report.incrementAndSample(HealthState.MIS_REPLICATED, - container.containerID()); - - } - if (!inflightReplication.isFull() || !inflightDeletion.isFull()) { - handleUnderReplicatedContainer(container, - replicaSet, placementStatus); - } return; } @@ -548,9 +557,7 @@ protected void processContainer(ContainerInfo container, * action. */ if (replicaSet.isOverReplicated()) { - report.incrementAndSample(HealthState.OVER_REPLICATED, - container.containerID()); - handleOverReplicatedContainer(container, replicaSet); + handleOverReplicatedHealthy(container, replicaSet, report); return; } @@ -560,9 +567,7 @@ protected void processContainer(ContainerInfo container, are not in the same state as the container itself. */ if (!replicaSet.isHealthy()) { - report.incrementAndSample(HealthState.UNHEALTHY, - container.containerID()); - handleUnstableContainer(container, replicas); + handleOverReplicatedExcessUnhealthy(container, replicaSet, report); } } } catch (ContainerNotFoundException ex) { @@ -997,7 +1002,7 @@ public ContainerReplicaCount getContainerReplicaCount(ContainerInfo container) */ private RatisContainerReplicaCount getContainerReplicaCount( ContainerInfo container, Set replica) { - return new RatisContainerReplicaCount( + return new LegacyRatisContainerReplicaCount( container, replica, getInflightAdd(container.containerID()), @@ -1126,180 +1131,209 @@ private void forceCloseContainer(final ContainerInfo container, * @param replicaSet An instance of ContainerReplicaCount, containing the * current replica count and inflight adds and deletes */ - private void handleUnderReplicatedContainer(final ContainerInfo container, + private void handleUnderReplicatedHealthy(final ContainerInfo container, final RatisContainerReplicaCount replicaSet, - final ContainerPlacementStatus placementStatus) { + final ContainerPlacementStatus placementStatus, + ReplicationManagerReport report) { LOG.debug("Handling under-replicated container: {}", container); - Set replicas = replicaSet.getReplicas(); - try { + if (replicaSet.isSufficientlyReplicated() + && placementStatus.isPolicySatisfied()) { + LOG.info("The container {} with replicas {} is sufficiently " + + "replicated and is not mis-replicated", + container.getContainerID(), replicaSet); + return; + } - if (replicaSet.isSufficientlyReplicated() - && placementStatus.isPolicySatisfied()) { - LOG.info("The container {} with replicas {} is sufficiently " + - "replicated and is not mis-replicated", - container.getContainerID(), replicaSet); - return; - } - int repDelta = replicaSet.additionalReplicaNeeded(); - final ContainerID id = container.containerID(); - final List deletionInFlight - = inflightDeletion.getDatanodeDetails(id); - final List replicationInFlight - = inflightReplication.getDatanodeDetails(id); - final List source = replicas.stream() - .filter(r -> - r.getState() == State.QUASI_CLOSED || - r.getState() == State.CLOSED) - // Exclude stale and dead nodes. This is particularly important for - // maintenance nodes, as the replicas will remain present in the - // container manager, even when they go dead. - .filter(r -> - getNodeStatus(r.getDatanodeDetails()).isHealthy()) - .filter(r -> !deletionInFlight.contains(r.getDatanodeDetails())) - .sorted((r1, r2) -> r2.getSequenceId().compareTo(r1.getSequenceId())) - .map(ContainerReplica::getDatanodeDetails) - .collect(Collectors.toList()); - if (source.size() > 0) { - final int replicationFactor = container - .getReplicationConfig().getRequiredNodes(); - // Want to check if the container is mis-replicated after considering - // inflight add and delete. - // Create a new list from source (healthy replicas minus pending delete) - List targetReplicas = new ArrayList<>(source); - // Then add any pending additions - targetReplicas.addAll(replicationInFlight); - final ContainerPlacementStatus inFlightplacementStatus = - containerPlacement.validateContainerPlacement( - targetReplicas, replicationFactor); - final int misRepDelta = inFlightplacementStatus.misReplicationCount(); - final int replicasNeeded - = repDelta < misRepDelta ? misRepDelta : repDelta; - if (replicasNeeded <= 0) { - LOG.debug("Container {} meets replication requirement with " + - "inflight replicas", id); - return; - } + List allReplicas = replicaSet.getReplicas(); + int numCloseCommandsSent = closeReplicasIfPossible(container, allReplicas); + int replicasNeeded = + replicaSet.additionalReplicaNeeded() - numCloseCommandsSent; - // We should ensure that the target datanode has enough space - // for a complete container to be created, but since the container - // size may be changed smaller than origin, we should be defensive. - final long dataSizeRequired = Math.max(container.getUsedBytes(), - currentContainerSize); - final List excludeList = replicas.stream() - .map(ContainerReplica::getDatanodeDetails) - .collect(Collectors.toList()); - excludeList.addAll(replicationInFlight); - final List selectedDatanodes = containerPlacement - .chooseDatanodes(excludeList, null, replicasNeeded, - 0, dataSizeRequired); - if (repDelta > 0) { - LOG.info("Container {} is under replicated. Expected replica count" + - " is {}, but found {}.", id, replicationFactor, - replicationFactor - repDelta); - } - int newMisRepDelta = misRepDelta; - if (misRepDelta > 0) { - LOG.info("Container: {}. {}", - id, placementStatus.misReplicatedReason()); - // Check if the new target nodes (original plus newly selected nodes) - // makes the placement policy valid. - targetReplicas.addAll(selectedDatanodes); - newMisRepDelta = containerPlacement.validateContainerPlacement( - targetReplicas, replicationFactor).misReplicationCount(); - } - if (repDelta > 0 || newMisRepDelta < misRepDelta) { - // Only create new replicas if we are missing a replicas or - // the number of pending mis-replication has improved. No point in - // creating new replicas for mis-replicated containers unless it - // improves things. - for (DatanodeDetails datanode : selectedDatanodes) { - sendReplicateCommand(container, datanode, source); - } - } else { - LOG.warn("Container {} is mis-replicated, requiring {} additional " + - "replicas. After selecting new nodes, mis-replication has not " + - "improved. No additional replicas will be scheduled", - id, misRepDelta); - } - } else { - LOG.warn("Cannot replicate container {}, no healthy replica found.", - container.containerID()); - } - } catch (IOException | IllegalStateException ex) { - LOG.warn("Exception while replicating container {}.", - container.getContainerID(), ex); + if (replicasNeeded > 0) { + report.incrementAndSample(HealthState.UNDER_REPLICATED, + container.containerID()); } + + List replicationSources = getReplicationSources(container, + replicaSet.getReplicas(), State.CLOSED, State.QUASI_CLOSED); + // This method will handle topology even if replicasNeeded <= 0. + replicateAnyWithTopology(container, replicationSources, + placementStatus, replicasNeeded); } /** * If the given container is over replicated, identify the datanode(s) * to delete the container and send delete container command to the - * identified datanode(s). + * identified datanode(s). This method ignores unhealthy containers. * * @param container ContainerInfo * @param replicaSet An instance of ContainerReplicaCount, containing the * current replica count and inflight adds and deletes */ - private void handleOverReplicatedContainer(final ContainerInfo container, - final RatisContainerReplicaCount replicaSet) { + private void handleOverReplicatedHealthy(final ContainerInfo container, + final RatisContainerReplicaCount replicaSet, + ReplicationManagerReport report) { - final Set replicas = replicaSet.getReplicas(); final ContainerID id = container.containerID(); final int replicationFactor = container.getReplicationConfig().getRequiredNodes(); int excess = replicaSet.additionalReplicaNeeded() * -1; if (excess > 0) { - LOG.info("Container {} is over replicated. Expected replica count" + - " is {}, but found {}.", id, replicationFactor, - replicationFactor + excess); + " is {}, but found {}.", id, replicationFactor, + replicationFactor + excess); + + report.incrementAndSample(HealthState.OVER_REPLICATED, + container.containerID()); + + // The list of replicas that we can potentially delete to fix the over + // replicated state. This method is only concerned with healthy replicas. + final List deleteCandidates = + getHealthyDeletionCandidates(container, replicaSet.getReplicas()); + + if (container.getState() == LifeCycleState.CLOSED) { + // Container is closed, so all healthy replicas are equal. + // We can choose which ones to delete based on topology. + // TODO Legacy RM implementation can only handle topology when all + // container replicas are closed and equal. + deleteExcessWithTopology(excess, container, deleteCandidates); + } else { + // Container is not yet closed. Choose which healthy replicas to + // delete so that we do not lose any origin node IDs. + deleteExcessWithNonUniqueOriginNodeIDs(container, + replicaSet.getReplicas(), + deleteCandidates, excess); + } + } + } - final List eligibleReplicas = new ArrayList<>(replicas); + /** + * Processes replicas of the container when all replicas are unhealthy (in + * a state that does not match the container state). + * + * Unhealthy replicas will first be checked to see if they can be closed. + * If there are more unhealthy replicas than required, some may be deleted. + * If there are fewer unhealthy replicas than required, some may be + * replicated. + */ + private void handleAllReplicasUnhealthy(ContainerInfo container, + RatisContainerReplicaCount replicaSet, + ContainerPlacementStatus placementStatus, + ReplicationManagerReport report) { - // Iterate replicas in deterministic order to avoid potential data loss. - // See https://issues.apache.org/jira/browse/HDDS-4589. - // N.B., sort replicas by (containerID, datanodeDetails). - eligibleReplicas.sort( - Comparator.comparingLong(ContainerReplica::hashCode)); + List replicas = replicaSet.getReplicas(); + int excessReplicas = replicas.size() - + container.getReplicationConfig().getRequiredNodes(); + int missingReplicas = excessReplicas * -1; + + if (missingReplicas > 0) { + handleUnderReplicatedAllUnhealthy(container, replicas, + placementStatus, missingReplicas, report); + } else if (excessReplicas > 0) { + handleOverReplicatedAllUnhealthy(container, replicas, + excessReplicas, report); + } else { + // We have the correct number of unhealthy replicas. See if any of them + // can be closed. + closeReplicasIfPossible(container, replicas); + } + } - final Map uniqueReplicas = - new LinkedHashMap<>(); + /** + * Handles a container which has the correct number of healthy replicas, + * but an excess of unhealthy replicas. + * + * If the container is closed, the unhealthy replicas can be deleted. If the + * container is not yet closed, the unhealthy replicas with non-unique + * origin node IDs can be deleted. + */ + private void handleOverReplicatedExcessUnhealthy( + final ContainerInfo container, + final RatisContainerReplicaCount replicaSet, + ReplicationManagerReport report) { + // Note - ReplicationManager would reach here only if the + // following conditions are met: + // 1. Container is in either CLOSED or QUASI-CLOSED state + // 2. We have adequate healthy replicas with extra unhealthy + // replicas. + + List replicas = replicaSet.getReplicas(); + List unhealthyReplicas = + getUnhealthyDeletionCandidates(container, replicas); + + // Only unhealthy replicas which cannot be closed will remain eligible + // for deletion, since this method is deleting unhealthy containers only. + closeReplicasIfPossible(container, unhealthyReplicas); + if (!unhealthyReplicas.isEmpty()) { + LOG.info("Container {} has {} excess unhealthy replicas. Excess " + + "unhealthy replicas will be deleted.", + container.getContainerID(), unhealthyReplicas.size()); + + report.incrementAndSample(HealthState.OVER_REPLICATED, + container.containerID()); + + int excessReplicaCount = replicas.size() - + container.getReplicationConfig().getRequiredNodes(); + if (container.getState() == LifeCycleState.CLOSED) { + // The container is already closed. The unhealthy replicas are extras + // and unnecessary. + deleteExcess(container, unhealthyReplicas, excessReplicaCount); + } else { + // Container is not yet closed. + // We only need to save the unhealthy replicas if they + // represent unique origin node IDs. If recovering these replicas is + // possible in the future they could be used to close the container. + deleteExcessWithNonUniqueOriginNodeIDs(container, + replicaSet.getReplicas(), unhealthyReplicas, excessReplicaCount); + } + } + } - if (container.getState() != LifeCycleState.CLOSED) { - replicas.stream() - .filter(r -> compareState(container.getState(), r.getState())) - .forEach(r -> uniqueReplicas - .putIfAbsent(r.getOriginDatanodeId(), r)); + /** + * Returns the replicas from {@code replicas} that: + * - Do not have in flight deletions + * - Exist on healthy datanodes + * - Have a replica state matching one of {@code validReplicaStates}. If + * this parameter is empty, any replica state is valid. + */ + private List getReplicationSources(ContainerInfo container, + List replicas, State... validReplicaStates) { + final List deletionInFlight + = inflightDeletion.getDatanodeDetails(container.containerID()); + final Set validReplicaStateSet = Arrays.stream(validReplicaStates) + .collect(Collectors.toSet()); + return replicas.stream() + // Exclude stale and dead nodes. This is particularly important for + // maintenance nodes, as the replicas will remain present in the + // container manager, even when they go dead. + .filter(r -> getNodeStatus(r.getDatanodeDetails()).isHealthy() + && !deletionInFlight.contains(r.getDatanodeDetails()) + && (validReplicaStateSet.isEmpty() || + validReplicaStateSet.contains(r.getState()))) + .collect(Collectors.toList()); + } - eligibleReplicas.removeAll(uniqueReplicas.values()); - } - // Replica which are maintenance or decommissioned are not eligible to - // be removed, as they do not count toward over-replication and they - // also many not be available - eligibleReplicas.removeIf(r -> - r.getDatanodeDetails().getPersistedOpState() != - NodeOperationalState.IN_SERVICE); - - final List unhealthyReplicas = eligibleReplicas - .stream() - .filter(r -> !compareState(container.getState(), r.getState())) - .collect(Collectors.toList()); + private List getHealthyDeletionCandidates( + ContainerInfo container, List replicas) { + return getDeletionCandidates(container, replicas, true); + } - // If there are unhealthy replicas, then we should remove them even if it - // makes the container violate the placement policy, as excess unhealthy - // containers are not really useful. It will be corrected later as a - // mis-replicated container will be seen as under-replicated. - for (ContainerReplica r : unhealthyReplicas) { - if (excess > 0) { - sendDeleteCommand(container, r.getDatanodeDetails(), true); - excess -= 1; - } else { - break; - } - } - eligibleReplicas.removeAll(unhealthyReplicas); - removeExcessReplicasIfNeeded(excess, container, eligibleReplicas); - } + private List getUnhealthyDeletionCandidates( + ContainerInfo container, List replicas) { + return getDeletionCandidates(container, replicas, false); + } + + /** + * A replica is eligible for deletion if its datanode is healthy and + * IN_SERVICE. + */ + private List getDeletionCandidates(ContainerInfo container, + List replicas, boolean healthy) { + return replicas.stream() + .filter(r -> getNodeStatus(r.getDatanodeDetails()).isHealthy() + && compareState(container.getState(), r.getState()) == healthy + && r.getDatanodeDetails().getPersistedOpState() == IN_SERVICE) + .collect(Collectors.toList()); } /** @@ -1355,57 +1389,6 @@ private void deleteSrcDnForMove(final ContainerInfo cif, } } - /** - * remove execess replicas if needed, replicationFactor and placement policy - * will be take into consideration. - * - * @param excess the excess number after subtracting replicationFactor - * @param container ContainerInfo - * @param eligibleReplicas An list of replicas, which may have excess replicas - */ - private void removeExcessReplicasIfNeeded(int excess, - final ContainerInfo container, - final List eligibleReplicas) { - // After removing all unhealthy replicas, if the container is still over - // replicated then we need to check if it is already mis-replicated. - // If it is, we do no harm by removing excess replicas. However, if it is - // not mis-replicated, then we can only remove replicas if they don't - // make the container become mis-replicated. - if (excess > 0) { - Set eligibleSet = new HashSet<>(eligibleReplicas); - final int replicationFactor = - container.getReplicationConfig().getRequiredNodes(); - ContainerPlacementStatus ps = - getPlacementStatus(eligibleSet, replicationFactor); - - for (ContainerReplica r : eligibleReplicas) { - if (excess <= 0) { - break; - } - // First remove the replica we are working on from the set, and then - // check if the set is now mis-replicated. - eligibleSet.remove(r); - ContainerPlacementStatus nowPS = - getPlacementStatus(eligibleSet, replicationFactor); - if (isPlacementStatusActuallyEqual(ps, nowPS)) { - // Remove the replica if the container was already unsatisfied - // and losing this replica keep actual placement count unchanged. - // OR if losing this replica still keep satisfied - sendDeleteCommand(container, r.getDatanodeDetails(), true); - excess -= 1; - continue; - } - // If we decided not to remove this replica, put it back into the set - eligibleSet.add(r); - } - if (excess > 0) { - LOG.info("The container {} is over replicated with {} excess " + - "replica. The excess replicas cannot be removed without " + - "violating the placement policy", container, excess); - } - } - } - /** * whether the given two ContainerPlacementStatus are actually equal. * @@ -1436,65 +1419,6 @@ private ContainerPlacementStatus getPlacementStatus( replicaDns, replicationFactor); } - /** - * Handles unstable container. - * A container is inconsistent if any of the replica state doesn't - * match the container state. We have to take appropriate action - * based on state of the replica. - * - * @param container ContainerInfo - * @param replicas Set of ContainerReplicas - */ - private void handleUnstableContainer(final ContainerInfo container, - final Set replicas) { - // Find unhealthy replicas - List unhealthyReplicas = replicas.stream() - .filter(r -> !compareState(container.getState(), r.getState())) - .collect(Collectors.toList()); - - Iterator iterator = unhealthyReplicas.iterator(); - while (iterator.hasNext()) { - final ContainerReplica replica = iterator.next(); - final State state = replica.getState(); - if (state == State.OPEN || state == State.CLOSING) { - sendCloseCommand(container, replica.getDatanodeDetails(), false); - iterator.remove(); - } - - if (state == State.QUASI_CLOSED) { - // Send force close command if the BCSID matches - if (container.getSequenceId() == replica.getSequenceId()) { - sendCloseCommand(container, replica.getDatanodeDetails(), true); - iterator.remove(); - } - } - } - - // Now we are left with the replicas which are either unhealthy or - // the BCSID doesn't match. These replicas should be deleted. - - /* - * If we have unhealthy replicas we go under replicated and then - * replicate the healthy copy. - * - * We also make sure that we delete only one unhealthy replica at a time. - * - * If there are two unhealthy replica: - * - Delete first unhealthy replica - * - Re-replicate the healthy copy - * - Delete second unhealthy replica - * - Re-replicate the healthy copy - * - * Note: Only one action will be executed in a single ReplicationMonitor - * iteration. So to complete all the above actions we need four - * ReplicationMonitor iterations. - */ - - unhealthyReplicas.stream().findFirst().ifPresent(replica -> - sendDeleteCommand(container, replica.getDatanodeDetails(), true)); - - } - /** * Sends close container command for the given container to the given * datanode. @@ -2009,5 +1933,325 @@ private void compleleteMoveFutureWithResult(ContainerID cid, MoveResult mr) { inflightMoveFuture.remove(cid); } } -} + private int closeReplicasIfPossible(ContainerInfo container, + List replicas) { + // This method should not be used on open containers. + if (container.getState() == LifeCycleState.OPEN) { + return 0; + } + + int numCloseCmdsSent = 0; + Iterator iterator = replicas.iterator(); + while (iterator.hasNext()) { + final ContainerReplica replica = iterator.next(); + final State state = replica.getState(); + if (state == State.OPEN || state == State.CLOSING) { + sendCloseCommand(container, replica.getDatanodeDetails(), false); + numCloseCmdsSent++; + iterator.remove(); + } else if (state == State.QUASI_CLOSED) { + // Send force close command if the BCSID matches + if (container.getSequenceId() == replica.getSequenceId()) { + sendCloseCommand(container, replica.getDatanodeDetails(), true); + numCloseCmdsSent++; + iterator.remove(); + } + } + } + + return numCloseCmdsSent; + } + + /* HELPER METHODS FOR UNHEALTHY OVER AND UNDER REPLICATED CONTAINERS */ + + /** + * Process a container with more replicas than required where all replicas + * are unhealthy. + * + * First try to close any replicas that are unhealthy due to pending + * closure. Replicas that can be closed will become healthy and will not be + * processed by this method. + * If the container is closed, delete replicas with lower BCSIDs first. + * If the container is not yet closed, delete replicas with origin node IDs + * already represented by other replicas. + */ + private void handleOverReplicatedAllUnhealthy(ContainerInfo container, + List replicas, int excess, + ReplicationManagerReport report) { + List deleteCandidates = + getUnhealthyDeletionCandidates(container, replicas); + + // Only unhealthy replicas which cannot be closed will remain eligible + // for deletion, since this method is deleting unhealthy containers only. + closeReplicasIfPossible(container, deleteCandidates); + if (deleteCandidates.isEmpty()) { + return; + } + + if (excess > 0) { + report.incrementAndSample(HealthState.OVER_REPLICATED, + container.containerID()); + int replicationFactor = container.getReplicationFactor().getNumber(); + LOG.info("Container {} has all unhealthy replicas and is over " + + "replicated. Expected replica count" + + " is {}, but found {}.", container.getContainerID(), + replicationFactor, replicationFactor + excess); + } + + if (container.getState() == LifeCycleState.CLOSED) { + // Prefer to delete unhealthy replicas with lower BCS IDs. + // If the replica became unhealthy after the container was closed but + // before the replica could be closed, it may have a smaller BCSID. + deleteExcessLowestBcsIDs(container, deleteCandidates, excess); + } else { + // Container is not yet closed. + // We only need to save the unhealthy replicas if they + // represent unique origin node IDs. If recovering these replicas is + // possible in the future they could be used to close the container. + deleteExcessWithNonUniqueOriginNodeIDs(container, + replicas, deleteCandidates, excess); + } + } + + /** + * Processes container replicas when all replicas are unhealthy and there + * are fewer than the required number of replicas. + * + * If any of these replicas unhealthy because they are pending closure and + * they can be closed, close them to create a healthy replica that can be + * replicated. + * If none of the replicas can be closed, use one of the unhealthy replicas + * to restore replica count while satisfying topology requirements. + */ + private void handleUnderReplicatedAllUnhealthy(ContainerInfo container, + List replicas, ContainerPlacementStatus placementStatus, + int additionalReplicasNeeded, ReplicationManagerReport report) { + + report.incrementAndSample(HealthState.UNDER_REPLICATED, + container.containerID()); + int numCloseCmdsSent = closeReplicasIfPossible(container, replicas); + // Only replicate unhealthy containers if none of the unhealthy replicas + // could be closed. If we sent a close command to an unhealthy replica, + // we should wait for that to complete and replicate it when it becomes + // healthy on a future iteration. + if (numCloseCmdsSent == 0) { + LOG.info("Container {} is under replicated missing {} replicas with all" + + " replicas unhealthy. Copying unhealthy replicas.", + container.getContainerID(), additionalReplicasNeeded); + // TODO Datanodes currently shuffle sources, so we cannot prioritize + // some replicas based on BCSID or origin node ID. + replicateAnyWithTopology(container, + getReplicationSources(container, replicas), placementStatus, + additionalReplicasNeeded); + } + } + + /* HELPER METHODS FOR ALL OVER AND UNDER REPLICATED CONTAINERS */ + + /** + * Deletes the first {@code excess} replicas from {@code deleteCandidates}. + * Replicas whose datanode operation state is not IN_SERVICE will be skipped. + */ + private void deleteExcess(ContainerInfo container, + List deleteCandidates, int excess) { + // Replica which are maintenance or decommissioned are not eligible to + // be removed, as they do not count toward over-replication and they + // also may not be available + deleteCandidates.removeIf(r -> + r.getDatanodeDetails().getPersistedOpState() != + NodeOperationalState.IN_SERVICE); + + deleteCandidates.stream().limit(excess).forEach(r -> + sendDeleteCommand(container, r.getDatanodeDetails(), true)); + } + + /** + * remove execess replicas if needed, replicationFactor and placement policy + * will be take into consideration. + * + * @param excess the excess number after subtracting replicationFactor + * @param container ContainerInfo + * @param eligibleReplicas An list of replicas, which may have excess replicas + */ + private void deleteExcessWithTopology(int excess, + final ContainerInfo container, + final List eligibleReplicas) { + // After removing all unhealthy replicas, if the container is still over + // replicated then we need to check if it is already mis-replicated. + // If it is, we do no harm by removing excess replicas. However, if it is + // not mis-replicated, then we can only remove replicas if they don't + // make the container become mis-replicated. + if (excess > 0) { + Set eligibleSet = new HashSet<>(eligibleReplicas); + final int replicationFactor = + container.getReplicationConfig().getRequiredNodes(); + ContainerPlacementStatus ps = + getPlacementStatus(eligibleSet, replicationFactor); + + for (ContainerReplica r : eligibleReplicas) { + if (excess <= 0) { + break; + } + // First remove the replica we are working on from the set, and then + // check if the set is now mis-replicated. + eligibleSet.remove(r); + ContainerPlacementStatus nowPS = + getPlacementStatus(eligibleSet, replicationFactor); + if (isPlacementStatusActuallyEqual(ps, nowPS)) { + // Remove the replica if the container was already unsatisfied + // and losing this replica keep actual placement count unchanged. + // OR if losing this replica still keep satisfied + sendDeleteCommand(container, r.getDatanodeDetails(), true); + excess -= 1; + continue; + } + // If we decided not to remove this replica, put it back into the set + eligibleSet.add(r); + } + if (excess > 0) { + LOG.info("The container {} is over replicated with {} excess " + + "replica. The excess replicas cannot be removed without " + + "violating the placement policy", container, excess); + } + } + } + + private void deleteExcessWithNonUniqueOriginNodeIDs(ContainerInfo container, + List allReplicas, + List deleteCandidates, int excess) { + // Remove delete candidates whose origin node ID is not already covered + // by an existing replica. + // TODO topology handling must be improved to make an optimal + // choice as to which replica to keep. + + // Gather the origin node IDs of replicas which are not candidates for + // deletion. + Set existingOriginNodeIDs = allReplicas.stream() + .filter(r -> !deleteCandidates.contains(r)) + .map(ContainerReplica::getOriginDatanodeId) + .collect(Collectors.toSet()); + + List nonUniqueDeleteCandidates = new ArrayList<>(); + for (ContainerReplica replica: deleteCandidates) { + if (existingOriginNodeIDs.contains(replica.getOriginDatanodeId())) { + nonUniqueDeleteCandidates.add(replica); + } else { + // Spare this replica with this new origin node ID from deletion. + // delete candidates seen later in the loop with this same origin + // node ID can be deleted. + existingOriginNodeIDs.add(replica.getOriginDatanodeId()); + } + } + + if (LOG.isDebugEnabled() && nonUniqueDeleteCandidates.size() < excess) { + LOG.debug("Unable to delete {} excess replicas of container {}. Only {}" + + " replicas can be deleted to preserve unique origin node IDs for " + + "this unclosed container.", excess, container.getContainerID(), + nonUniqueDeleteCandidates.size()); + } + deleteExcess(container, nonUniqueDeleteCandidates, excess); + } + + /** + * Delete {@code excess} replicas from {@code deleteCandidates}, deleting + * those with lowest BCSIDs first. + */ + private void deleteExcessLowestBcsIDs(ContainerInfo container, + List deleteCandidates, int excess) { + // Sort containers with lowest BCSID first. These will be the first ones + // deleted. + deleteCandidates.sort( + Comparator.comparingLong(ContainerReplica::getSequenceId).reversed()); + deleteExcess(container, deleteCandidates, excess); + } + + /** + * Choose {@code additionalReplicasNeeded} datanodes to make copies of some + * of the container replicas to restore replication factor or satisfy + * topology requirements. + */ + private void replicateAnyWithTopology(ContainerInfo container, + List replicas, + ContainerPlacementStatus placementStatus, int additionalReplicasNeeded) { + try { + final ContainerID id = container.containerID(); + + final List sourceDNs = replicas.stream() + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + final List replicationInFlight + = inflightReplication.getDatanodeDetails(id); + if (sourceDNs.size() > 0) { + final int replicationFactor = container + .getReplicationConfig().getRequiredNodes(); + // Want to check if the container is mis-replicated after considering + // inflight add and delete. + // Create a new list from source (healthy replicas minus pending delete) + List targetReplicas = new ArrayList<>(sourceDNs); + // Then add any pending additions + targetReplicas.addAll(replicationInFlight); + final ContainerPlacementStatus inFlightplacementStatus = + containerPlacement.validateContainerPlacement( + targetReplicas, replicationFactor); + final int misRepDelta = inFlightplacementStatus.misReplicationCount(); + final int replicasNeeded = + Math.max(additionalReplicasNeeded, misRepDelta); + if (replicasNeeded <= 0) { + LOG.debug("Container {} meets replication requirement with " + + "inflight replicas", id); + return; + } + + // We should ensure that the target datanode has enough space + // for a complete container to be created, but since the container + // size may be changed smaller than origin, we should be defensive. + final long dataSizeRequired = Math.max(container.getUsedBytes(), + currentContainerSize); + final List excludeList = replicas.stream() + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + excludeList.addAll(replicationInFlight); + final List selectedDatanodes = containerPlacement + .chooseDatanodes(excludeList, null, replicasNeeded, + 0, dataSizeRequired); + if (additionalReplicasNeeded > 0) { + LOG.info("Container {} is under replicated. Expected replica count" + + " is {}, but found {}.", id, replicationFactor, + replicationFactor - additionalReplicasNeeded); + } + int newMisRepDelta = misRepDelta; + if (misRepDelta > 0) { + LOG.info("Container: {}. {}", + id, placementStatus.misReplicatedReason()); + // Check if the new target nodes (original plus newly selected nodes) + // makes the placement policy valid. + targetReplicas.addAll(selectedDatanodes); + newMisRepDelta = containerPlacement.validateContainerPlacement( + targetReplicas, replicationFactor).misReplicationCount(); + } + if (additionalReplicasNeeded > 0 || newMisRepDelta < misRepDelta) { + // Only create new replicas if we are missing a replicas or + // the number of pending mis-replication has improved. No point in + // creating new replicas for mis-replicated containers unless it + // improves things. + for (DatanodeDetails datanode : selectedDatanodes) { + sendReplicateCommand(container, datanode, sourceDNs); + } + } else { + LOG.warn("Container {} is mis-replicated, requiring {} additional " + + "replicas. After selecting new nodes, mis-replication has" + + "not improved. No additional replicas will be scheduled", + id, misRepDelta); + } + } else { + LOG.warn("Cannot replicate container {}, no healthy datanodes with " + + "replica found.", + container.containerID()); + } + } catch (IOException | IllegalStateException ex) { + LOG.warn("Exception while replicating container {}.", + container.getContainerID(), ex); + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/MisReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/MisReplicationHandler.java new file mode 100644 index 000000000000..b418b9236eab --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/MisReplicationHandler.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import com.google.common.collect.Maps; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Handles the Mis replication processing and forming the respective SCM + * commands. + * Mis-replication: State of replicas where containers are neither + * under-replicated or over-replicated, but the existing placement + * of containers do not conform to the configured placement policy + * of the container. + */ +public abstract class MisReplicationHandler implements + UnhealthyReplicationHandler { + + public static final Logger LOG = + LoggerFactory.getLogger(MisReplicationHandler.class); + private final PlacementPolicy containerPlacement; + private final long currentContainerSize; + private final NodeManager nodeManager; + + public MisReplicationHandler( + final PlacementPolicy containerPlacement, + final ConfigurationSource conf, NodeManager nodeManager) { + this.containerPlacement = containerPlacement; + this.currentContainerSize = (long) conf.getStorageSize( + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + this.nodeManager = nodeManager; + } + + protected abstract ContainerReplicaCount getContainerReplicaCount( + ContainerInfo containerInfo, Set replicas, + List pendingOps, int remainingMaintenanceRedundancy) + throws IOException; + + private List getTargetDatanodes( + List usedNodes, List excludedNodes, + ContainerInfo container, int requiredNodes) throws IOException { + final long dataSizeRequired = + Math.max(container.getUsedBytes(), currentContainerSize); + while (requiredNodes > 0) { + try { + return containerPlacement.chooseDatanodes(usedNodes, excludedNodes, + null, requiredNodes, 0, dataSizeRequired); + } catch (IOException e) { + requiredNodes -= 1; + } + } + throw new SCMException(String.format("Placement Policy: %s did not return" + + " any number of nodes. Number of required " + + "Nodes %d, Datasize Required: %d", + containerPlacement.getClass(), requiredNodes, dataSizeRequired), + SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE); + } + + private Set filterSources(Set replicas) { + return replicas.stream() + .filter(r -> r.getState() == StorageContainerDatanodeProtocolProtos + .ContainerReplicaProto.State.CLOSED || r.getState() == + StorageContainerDatanodeProtocolProtos + .ContainerReplicaProto.State.QUASI_CLOSED + ) + .filter(r -> ReplicationManager.getNodeStatus( + r.getDatanodeDetails(), nodeManager).isHealthy()) + .filter(r -> r.getDatanodeDetails().getPersistedOpState() + == HddsProtos.NodeOperationalState.IN_SERVICE) + .collect(Collectors.toSet()); + } + + protected abstract ReplicateContainerCommand getReplicateCommand( + ContainerInfo containerInfo, ContainerReplica replica); + + private Map> getReplicateCommands( + ContainerInfo containerInfo, + Set replicasToBeReplicated, + List targetDns) { + Map> commandMap = Maps.newHashMap(); + int datanodeIdx = 0; + for (ContainerReplica replica : replicasToBeReplicated) { + if (datanodeIdx == targetDns.size()) { + break; + } + commandMap.put(targetDns.get(datanodeIdx), + getReplicateCommand(containerInfo, replica)); + datanodeIdx += 1; + } + return commandMap; + + } + @Override + public Map> processAndCreateCommands( + Set replicas, List pendingOps, + ContainerHealthResult result, int remainingMaintenanceRedundancy) + throws IOException { + ContainerInfo container = result.getContainerInfo(); + if (!pendingOps.isEmpty()) { + LOG.info("Skipping Mis-Replication for Container {}, " + + "as there are still some pending ops for the container: {}", + container, pendingOps); + return Collections.emptyMap(); + } + ContainerReplicaCount replicaCount = getContainerReplicaCount(container, + replicas, Collections.emptyList(), remainingMaintenanceRedundancy); + + if (!replicaCount.isSufficientlyReplicated() || + replicaCount.isOverReplicated()) { + LOG.info("Container {} state should be neither under replicated " + + "nor over replicated before resolving misreplication." + + "Container UnderReplication status: {}," + + "Container OverReplication status: {}", + container.getContainerID(), + !replicaCount.isSufficientlyReplicated(), + replicaCount.isOverReplicated()); + return Collections.emptyMap(); + } + + List usedDns = replicas.stream() + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + if (containerPlacement.validateContainerPlacement(usedDns, + usedDns.size()).isPolicySatisfied()) { + LOG.info("Container {} is currently not misreplicated", + container.getContainerID()); + return Collections.emptyMap(); + } + + Set sources = filterSources(replicas); + Set replicasToBeReplicated = containerPlacement + .replicasToCopyToFixMisreplication(replicas.stream() + .collect(Collectors.toMap(Function.identity(), sources::contains))); + usedDns = replicas.stream().filter(r -> !replicasToBeReplicated.contains(r)) + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + List excludedDns = replicasToBeReplicated.stream() + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + List targetDatanodes = getTargetDatanodes(usedDns, + excludedDns, container, replicasToBeReplicated.size()); + if (targetDatanodes.size() < replicasToBeReplicated.size()) { + LOG.warn("Placement Policy {} found only {} nodes for Container: {}," + + " number of required nodes: {}, usedNodes : {}", + containerPlacement.getClass(), targetDatanodes.size(), + container.getContainerID(), replicasToBeReplicated.size(), + usedDns); + } + return getReplicateCommands(container, replicasToBeReplicated, + targetDatanodes); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/OverReplicatedProcessor.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/OverReplicatedProcessor.java index 78e625d34a57..93f536a6442d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/OverReplicatedProcessor.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/OverReplicatedProcessor.java @@ -17,11 +17,8 @@ */ package org.apache.hadoop.hdds.scm.container.replication; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Map; @@ -31,89 +28,32 @@ * queue, calculate the delete commands and assign to the datanodes * via the eventQueue. */ -public class OverReplicatedProcessor implements Runnable { - - private static final Logger LOG = LoggerFactory - .getLogger(OverReplicatedProcessor.class); - private final ReplicationManager replicationManager; - private volatile boolean runImmediately = false; - private final long intervalInMillis; +public class OverReplicatedProcessor extends UnhealthyReplicationProcessor + { public OverReplicatedProcessor(ReplicationManager replicationManager, long intervalInMillis) { - this.replicationManager = replicationManager; - this.intervalInMillis = intervalInMillis; - } + super(replicationManager, intervalInMillis); - /** - * Read messages from the ReplicationManager over replicated queue and, - * form commands to correct the over replication. The commands are added - * to the event queue and the PendingReplicaOps are adjusted. - * - * Note: this is a temporary implementation of this feature. A future - * version will need to limit the amount of messages assigned to each - * datanode, so they are not assigned too much work. - */ - public void processAll() { - int processed = 0; - int failed = 0; - while (true) { - if (!replicationManager.shouldRun()) { - break; - } - ContainerHealthResult.OverReplicatedHealthResult overRep = - replicationManager.dequeueOverReplicatedContainer(); - if (overRep == null) { - break; - } - try { - processContainer(overRep); - processed++; - } catch (Exception e) { - LOG.error("Error processing over replicated container {}", - overRep.getContainerInfo(), e); - failed++; - replicationManager.requeueOverReplicatedContainer(overRep); - } - } - LOG.info("Processed {} over replicated containers, failed processing {}", - processed, failed); } - protected void processContainer(ContainerHealthResult - .OverReplicatedHealthResult overRep) throws IOException { - Map> cmds = replicationManager - .processOverReplicatedContainer(overRep); - for (Map.Entry> cmd : cmds.entrySet()) { - SCMCommand scmCmd = cmd.getValue(); - replicationManager.sendDatanodeCommand(scmCmd, overRep.getContainerInfo(), - cmd.getKey()); - } + @Override + protected ContainerHealthResult.OverReplicatedHealthResult + dequeueHealthResultFromQueue(ReplicationManager replicationManager) { + return replicationManager.dequeueOverReplicatedContainer(); } @Override - public void run() { - try { - while (!Thread.currentThread().isInterrupted()) { - if (replicationManager.shouldRun()) { - processAll(); - } - synchronized (this) { - if (!runImmediately) { - wait(intervalInMillis); - } - runImmediately = false; - } - } - } catch (InterruptedException e) { - LOG.warn("{} interrupted. Exiting...", Thread.currentThread().getName()); - Thread.currentThread().interrupt(); - } + protected void requeueHealthResultFromQueue( + ReplicationManager replicationManager, + ContainerHealthResult.OverReplicatedHealthResult healthResult) { + replicationManager.requeueOverReplicatedContainer(healthResult); } - - @VisibleForTesting - synchronized void runImmediately() { - runImmediately = true; - notify(); + @Override + protected Map> getDatanodeCommands( + ReplicationManager replicationManager, + ContainerHealthResult.OverReplicatedHealthResult healthResult) + throws IOException { + return replicationManager.processOverReplicatedContainer(healthResult); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java index 577fc6004d0c..ca30d2d5b86a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java @@ -17,11 +17,16 @@ */ package org.apache.hadoop.hdds.scm.container.replication; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; @@ -37,7 +42,8 @@ */ public class RatisContainerReplicaCount implements ContainerReplicaCount { - private int healthyCount; + private int healthyReplicaCount; + private int unhealthyReplicaCount; private int decommissionCount; private int maintenanceCount; private final int inFlightAdd; @@ -45,24 +51,32 @@ public class RatisContainerReplicaCount implements ContainerReplicaCount { private final int repFactor; private final int minHealthyForMaintenance; private final ContainerInfo container; - private final Set replica; + private final List replicas; public RatisContainerReplicaCount(ContainerInfo container, - Set replica, int inFlightAdd, + Set replicas, + int inFlightAdd, int inFlightDelete, int replicationFactor, int minHealthyForMaintenance) { - this.healthyCount = 0; + this.unhealthyReplicaCount = 0; + this.healthyReplicaCount = 0; this.decommissionCount = 0; this.maintenanceCount = 0; this.inFlightAdd = inFlightAdd; this.inFlightDel = inFlightDelete; this.repFactor = replicationFactor; - this.replica = replica; + // Iterate replicas in deterministic order to avoid potential data loss + // on delete. + // See https://issues.apache.org/jira/browse/HDDS-4589. + // N.B., sort replicas by (containerID, datanodeDetails). + this.replicas = replicas.stream() + .sorted(Comparator.comparingLong(ContainerReplica::hashCode)) + .collect(Collectors.toList()); this.minHealthyForMaintenance = Math.min(this.repFactor, minHealthyForMaintenance); this.container = container; - for (ContainerReplica cr : this.replica) { + for (ContainerReplica cr : this.replicas) { HddsProtos.NodeOperationalState state = cr.getDatanodeDetails().getPersistedOpState(); if (state == DECOMMISSIONED || state == DECOMMISSIONING) { @@ -70,13 +84,33 @@ public RatisContainerReplicaCount(ContainerInfo container, } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) { maintenanceCount++; } else { - healthyCount++; + if (LegacyReplicationManager.compareState(container.getState(), + cr.getState())) { + healthyReplicaCount++; + } else { + unhealthyReplicaCount++; + } } } } - public int getHealthyCount() { - return healthyCount; + public int getHealthyReplicaCount() { + return healthyReplicaCount + healthyReplicaCountAdapter(); + } + + public int getUnhealthyReplicaCount() { + return unhealthyReplicaCount; + } + + /** + * The new replication manager currently counts unhealthy and healthy + * replicas together. This should be updated when changes from HDDS-6447 + * are integrated into the new replication manager. See + * {@link LegacyRatisContainerReplicaCount}, which overrides this method, for + * details. + */ + protected int healthyReplicaCountAdapter() { + return getUnhealthyReplicaCount(); } @Override @@ -99,15 +133,16 @@ public ContainerInfo getContainer() { } @Override - public Set getReplicas() { - return replica; + public List getReplicas() { + return new ArrayList<>(replicas); } @Override public String toString() { return "Container State: " + container.getState() + - " Replica Count: " + replica.size() + - " Healthy Count: " + healthyCount + + " Replica Count: " + replicas.size() + + " Healthy Count: " + healthyReplicaCount + + " Unhealthy Count: " + unhealthyReplicaCount + " Decommission Count: " + decommissionCount + " Maintenance Count: " + maintenanceCount + " inFlightAdd Count: " + inFlightAdd + @@ -210,7 +245,7 @@ public int additionalReplicaNeeded() { * for under replicated and a negative value for over replicated. */ private int missingReplicas() { - int delta = repFactor - healthyCount; + int delta = repFactor - getHealthyReplicaCount(); if (delta < 0) { // Over replicated, so may need to remove a container. @@ -219,7 +254,7 @@ private int missingReplicas() { // May be under-replicated, depending on maintenance. delta = Math.max(0, delta - maintenanceCount); int neededHealthy = - Math.max(0, minHealthyForMaintenance - healthyCount); + Math.max(0, minHealthyForMaintenance - getHealthyReplicaCount()); delta = Math.max(neededHealthy, delta); return delta; } else { // delta == 0 @@ -247,6 +282,17 @@ public boolean isSufficientlyReplicated() { return isSufficientlyReplicated(false); } + /** + * For Ratis, this method is the same as isSufficientlyReplicated. + * @param datanode Not used in this implementation + * @return True if the container is sufficiently replicated and False + * otherwise. + */ + @Override + public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode) { + return isSufficientlyReplicated(); + } + /** * Return true if the container is sufficiently replicated. Decommissioning * and Decommissioned containers are ignored in this check, assuming they will @@ -334,7 +380,8 @@ private int redundancyDelta(boolean includePendingDelete, */ public int getRemainingRedundancy() { return Math.max(0, - healthyCount + decommissionCount + maintenanceCount - inFlightDel - 1); + getHealthyReplicaCount() + decommissionCount + maintenanceCount + - inFlightDel - 1); } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisMisReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisMisReplicationHandler.java new file mode 100644 index 000000000000..7dfe50e955b6 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisMisReplicationHandler.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +/** + * Handles the Ratis mis replication processing and forming the respective SCM + * commands. + */ +public class RatisMisReplicationHandler extends MisReplicationHandler { + + public RatisMisReplicationHandler( + PlacementPolicy containerPlacement, + ConfigurationSource conf, NodeManager nodeManager) { + super(containerPlacement, conf, nodeManager); + } + + @Override + protected ContainerReplicaCount getContainerReplicaCount( + ContainerInfo containerInfo, Set replicas, + List pendingOps, int minHealthyForMaintenance) + throws IOException { + if (containerInfo.getReplicationType() != + HddsProtos.ReplicationType.RATIS) { + throw new IOException(String.format("Invalid Container Replication Type :" + + " %s.Expected Container Replication Type : RATIS", + containerInfo.getReplicationType().toString())); + } + // count pending adds and deletes + int pendingAdd = 0, pendingDelete = 0; + for (ContainerReplicaOp op : pendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) { + pendingAdd++; + } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + pendingDelete++; + } + } + return new RatisContainerReplicaCount( + containerInfo, replicas, pendingAdd, + pendingDelete, containerInfo.getReplicationFactor().getNumber(), + minHealthyForMaintenance); + } + + @Override + protected ReplicateContainerCommand getReplicateCommand( + ContainerInfo containerInfo, ContainerReplica replica) { + return new ReplicateContainerCommand(containerInfo.getContainerID(), + Collections.singletonList(replica.getDatanodeDetails())); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java new file mode 100644 index 000000000000..982325d500f7 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java @@ -0,0 +1,247 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; + +/** + * This class handles Ratis containers that are over replicated. It should + * be used to obtain SCMCommands that can be sent to datanodes to solve + * over replication. + */ +public class RatisOverReplicationHandler + extends AbstractOverReplicationHandler { + public static final Logger LOG = + LoggerFactory.getLogger(RatisOverReplicationHandler.class); + + public RatisOverReplicationHandler(PlacementPolicy placementPolicy) { + super(placementPolicy); + } + + /** + * Identifies datanodes where the specified container's replicas can be + * deleted. Creates the SCMCommands to be sent to datanodes. + * + * @param replicas Set of container replicas. + * @param pendingOps Pending (in flight) replications or + * deletions for this + * container. + * @param result Health check result indicating over + * replication + * @param minHealthyForMaintenance Number of healthy replicas that must be + * available for a DN to enter maintenance + * @return Returns a map of Datanodes and SCMCommands that can be sent to + * delete replicas on those datanodes. + */ + @Override + public Map> processAndCreateCommands( + Set replicas, List pendingOps, + ContainerHealthResult result, int minHealthyForMaintenance) throws + IOException { + ContainerInfo containerInfo = result.getContainerInfo(); + LOG.debug("Handling container {}.", containerInfo); + + // count pending adds and deletes + int pendingAdd = 0, pendingDelete = 0; + for (ContainerReplicaOp op : pendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) { + pendingAdd++; + } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + pendingDelete++; + } + } + RatisContainerReplicaCount replicaCount = + new RatisContainerReplicaCount(containerInfo, replicas, pendingAdd, + pendingDelete, containerInfo.getReplicationFactor().getNumber(), + minHealthyForMaintenance); + + // verify that this container is actually over replicated + if (!verifyOverReplication(replicaCount)) { + return Collections.emptyMap(); + } + + // get number of excess replicas + int excess = replicaCount.getExcessRedundancy(true); + LOG.info("Container {} is over replicated. Actual replica count is {}, " + + "with {} pending delete(s). Expected replica count is {}.", + containerInfo.containerID(), + replicaCount.getReplicas().size(), pendingDelete, + replicaCount.getReplicationFactor()); + + // get replicas that can be deleted, in sorted order + List eligibleReplicas = + getEligibleReplicas(replicaCount, pendingOps); + if (eligibleReplicas.size() == 0) { + LOG.info("Did not find any replicas that are eligible to be deleted for" + + " container {}.", containerInfo); + return Collections.emptyMap(); + } + + return createCommands(containerInfo, eligibleReplicas, excess); + } + + private boolean verifyOverReplication( + RatisContainerReplicaCount replicaCount) { + if (!replicaCount.isOverReplicated()) { + LOG.info("Container {} is actually not over-replicated any more.", + replicaCount.getContainer().containerID()); + return false; + } + return true; + } + + /** + * Finds replicas that are eligible to be deleted, sorted to avoid + * potential data loss. + * @see + * HDDS-4589 + * @param replicaCount ContainerReplicaCount object for the container + * @param pendingOps Pending adds and deletes + * @return List of ContainerReplica sorted using + * {@link RatisOverReplicationHandler#sortReplicas(Collection)} + */ + private List getEligibleReplicas( + ContainerReplicaCount replicaCount, List pendingOps) { + // sort replicas so that they can be selected in a deterministic way + List eligibleReplicas = + sortReplicas(replicaCount.getReplicas()); + + // retain one replica per unique origin datanode if the container is not + // closed + final Map uniqueReplicas = + new LinkedHashMap<>(); + if (replicaCount.getContainer().getState() != + HddsProtos.LifeCycleState.CLOSED) { + eligibleReplicas.stream() + // get replicas with state that matches container state + .filter(r -> ReplicationManager.compareState( + replicaCount.getContainer().getState(), + r.getState())) + .forEach(r -> uniqueReplicas + .putIfAbsent(r.getOriginDatanodeId(), r)); + + // note that this preserves order of the List + eligibleReplicas.removeAll(uniqueReplicas.values()); + } + + Set pendingDeletion = new HashSet<>(); + // collect the DNs that are going to have their container replica deleted + for (ContainerReplicaOp op : pendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + pendingDeletion.add(op.getTarget()); + } + } + + // replicas that are not on IN_SERVICE nodes or are already pending + // delete are not eligible + // TODO what about nodes that are not healthy? + eligibleReplicas.removeIf( + replica -> replica.getDatanodeDetails().getPersistedOpState() != + HddsProtos.NodeOperationalState.IN_SERVICE || + pendingDeletion.contains(replica.getDatanodeDetails())); + + return eligibleReplicas; + } + + /** + * Sorts replicas using {@link ContainerReplica#hashCode()} (ContainerID and + * DatanodeDetails). + * @param replicas replicas to sort + * @return sorted List + */ + private List sortReplicas( + Collection replicas) { + return replicas.stream() + .sorted(Comparator.comparingLong(ContainerReplica::hashCode)) + .collect(Collectors.toList()); + } + + private Map> createCommands( + ContainerInfo containerInfo, List replicas, + int excess) { + Map> commands = new HashMap<>(); + + /* + Over replication means we have enough healthy replicas, so unhealthy + replicas can be deleted. This might make the container violate placement + policy. + */ + List unhealthyReplicas = new ArrayList<>(); + for (ContainerReplica replica : replicas) { + if (excess == 0) { + return commands; + } + if (!ReplicationManager.compareState( + containerInfo.getState(), replica.getState())) { + commands.put(replica.getDatanodeDetails(), + createDeleteCommand(containerInfo)); + unhealthyReplicas.add(replica); + excess--; + } + } + replicas.removeAll(unhealthyReplicas); + + /* + Remove excess replicas if that does not make the container mis replicated. + If the container was already mis replicated, then remove replicas if that + does not change the placement count. + */ + Set replicaSet = new HashSet<>(replicas); + // iterate through replicas in deterministic order + for (ContainerReplica replica : replicas) { + if (excess == 0) { + return commands; + } + + if (super.isPlacementStatusActuallyEqualAfterRemove(replicaSet, replica, + containerInfo.getReplicationFactor().getNumber())) { + commands.put(replica.getDatanodeDetails(), + createDeleteCommand(containerInfo)); + excess--; + } + } + return commands; + } + + private DeleteContainerCommand createDeleteCommand(ContainerInfo container) { + return new DeleteContainerCommand(container.containerID(), true); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java new file mode 100644 index 000000000000..e5b2fe8bda75 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java @@ -0,0 +1,242 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * This class handles Ratis containers that are under replicated. It should + * be used to obtain SCMCommands that can be sent to datanodes to solve + * under replication. + */ +public class RatisUnderReplicationHandler + implements UnhealthyReplicationHandler { + public static final Logger LOG = + LoggerFactory.getLogger(RatisUnderReplicationHandler.class); + private final PlacementPolicy placementPolicy; + private final NodeManager nodeManager; + private final long currentContainerSize; + + public RatisUnderReplicationHandler(final PlacementPolicy placementPolicy, + final ConfigurationSource conf, final NodeManager nodeManager) { + this.placementPolicy = placementPolicy; + this.currentContainerSize = (long) conf + .getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + this.nodeManager = nodeManager; + } + + /** + * Identifies new set of datanodes as targets for container replication. + * Forms the SCMCommands to be sent to these datanodes. + * + * @param replicas Set of container replicas. + * @param pendingOps Pending ContainerReplicaOp including adds and deletes + * for this container. + * @param result Health check result indicating under replication. + * @param minHealthyForMaintenance Number of healthy replicas that must be + * available for a DN to enter maintenance + * + * @return Returns the key value pair of destination dn where the command gets + * executed and the command itself. If an empty map is returned, it indicates + * the container is no longer unhealthy and can be removed from the unhealthy + * queue. Any exception indicates that the container is still unhealthy and + * should be retried later. + */ + @Override + public Map> processAndCreateCommands( + Set replicas, List pendingOps, + ContainerHealthResult result, int minHealthyForMaintenance) + throws IOException { + ContainerInfo containerInfo = result.getContainerInfo(); + LOG.debug("Handling under replicated Ratis container {}", containerInfo); + + // count pending adds and deletes + int pendingAdd = 0, pendingDelete = 0; + for (ContainerReplicaOp op : pendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) { + pendingAdd++; + } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + pendingDelete++; + } + } + RatisContainerReplicaCount replicaCount = + new RatisContainerReplicaCount(containerInfo, replicas, pendingAdd, + pendingDelete, containerInfo.getReplicationFactor().getNumber(), + minHealthyForMaintenance); + + // verify that this container is still under replicated and we don't have + // sufficient replication after considering pending adds + if (!verifyUnderReplication(replicaCount)) { + return Collections.emptyMap(); + } + + // find sources that can provide replicas + List sourceDatanodes = + getSources(replicaCount, pendingOps); + if (sourceDatanodes.isEmpty()) { + LOG.warn("Cannot replicate container {} because no healthy replicas " + + "were found.", containerInfo); + return Collections.emptyMap(); + } + + // find targets to send replicas to + List targetDatanodes = + getTargets(replicaCount, pendingOps); + if (targetDatanodes.isEmpty()) { + LOG.warn("Cannot replicate container {} because no eligible targets " + + "were found.", containerInfo); + return Collections.emptyMap(); + } + + return createReplicationCommands(containerInfo.getContainerID(), + sourceDatanodes, targetDatanodes); + } + + /** + * Verify that this container is under replicated, even after considering + * pending adds. Note that the container might be under replicated but + * unrecoverable (no replicas), in which case this returns false. + * + * @param replicaCount RatisContainerReplicaCount object to check + * @return true if the container is under replicated, false if the + * container is sufficiently replicated or unrecoverable. + */ + private boolean verifyUnderReplication( + RatisContainerReplicaCount replicaCount) { + if (replicaCount.isSufficientlyReplicated()) { + LOG.info("The container {} state changed and it's not under " + + "replicated any more.", replicaCount.getContainer().containerID()); + return false; + } + if (replicaCount.isSufficientlyReplicated(true)) { + LOG.info("Container {} with replicas {} will be sufficiently " + + "replicated after pending replicas are created.", + replicaCount.getContainer().getContainerID(), + replicaCount.getReplicas()); + return false; + } + if (replicaCount.getReplicas().isEmpty()) { + LOG.warn("Container {} does not have any replicas and is unrecoverable" + + ".", replicaCount.getContainer()); + return false; + } + return true; + } + + /** + * Returns a list of datanodes that can be used as sources for replication + * for the container specified in replicaCount. + * + * @param replicaCount RatisContainerReplicaCount object for this container + * @param pendingOps List of pending ContainerReplicaOp + * @return List of healthy datanodes that have closed/quasi-closed replicas + * and are not pending replica deletion. Sorted in descending order of + * sequence id. + */ + private List getSources( + RatisContainerReplicaCount replicaCount, + List pendingOps) { + Set pendingDeletion = new HashSet<>(); + // collect the DNs that are going to have their container replica deleted + for (ContainerReplicaOp op : pendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + pendingDeletion.add(op.getTarget()); + } + } + + /* + * Return healthy datanodes that have closed/quasi-closed replicas and + * are not pending replica deletion. Sorted in descending order of + * sequence id. + */ + return replicaCount.getReplicas().stream() + .filter(r -> r.getState() == State.QUASI_CLOSED || + r.getState() == State.CLOSED) + .filter(r -> ReplicationManager.getNodeStatus(r.getDatanodeDetails(), + nodeManager).isHealthy()) + .filter(r -> !pendingDeletion.contains(r.getDatanodeDetails())) + .sorted((r1, r2) -> r2.getSequenceId().compareTo(r1.getSequenceId())) + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + } + + private List getTargets( + RatisContainerReplicaCount replicaCount, + List pendingOps) throws IOException { + // DNs that already have replicas cannot be targets and should be excluded + final List excludeList = + replicaCount.getReplicas().stream() + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + + // DNs that are already waiting to receive replicas cannot be targets + final List pendingReplication = + pendingOps.stream() + .filter(containerReplicaOp -> containerReplicaOp.getOpType() == + ContainerReplicaOp.PendingOpType.ADD) + .map(ContainerReplicaOp::getTarget) + .collect(Collectors.toList()); + excludeList.addAll(pendingReplication); + + /* + Ensure that target datanodes have enough space to hold a complete + container. + */ + final long dataSizeRequired = + Math.max(replicaCount.getContainer().getUsedBytes(), + currentContainerSize); + return placementPolicy.chooseDatanodes(excludeList, null, + replicaCount.additionalReplicaNeeded(), 0, dataSizeRequired); + } + + private Map> createReplicationCommands( + long containerID, List sources, + List targets) { + Map> commands = new HashMap<>(); + for (DatanodeDetails target : targets) { + ReplicateContainerCommand command = + new ReplicateContainerCommand(containerID, sources); + commands.put(target, command); + } + + return commands; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 2c206224f189..6f93e81ead0d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.conf.ConfigGroup; import org.apache.hadoop.hdds.conf.ConfigType; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.PostConstruct; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; @@ -158,6 +159,9 @@ public class ReplicationManager implements SCMService { private ReplicationQueue replicationQueue; private final ECUnderReplicationHandler ecUnderReplicationHandler; private final ECOverReplicationHandler ecOverReplicationHandler; + private final ECMisReplicationHandler ecMisReplicationHandler; + private final RatisUnderReplicationHandler ratisUnderReplicationHandler; + private final RatisOverReplicationHandler ratisOverReplicationHandler; private final int maintenanceRedundancy; private final int ratisMaintenanceMinReplicas; private Thread underReplicatedProcessorThread; @@ -220,6 +224,12 @@ public ReplicationManager(final ConfigurationSource conf, ecContainerPlacement, conf, nodeManager, this); ecOverReplicationHandler = new ECOverReplicationHandler(ecContainerPlacement, nodeManager); + ecMisReplicationHandler = new ECMisReplicationHandler(ecContainerPlacement, + conf, nodeManager); + ratisUnderReplicationHandler = new RatisUnderReplicationHandler( + ratisContainerPlacement, conf, nodeManager); + ratisOverReplicationHandler = + new RatisOverReplicationHandler(ratisContainerPlacement); underReplicatedProcessor = new UnderReplicatedProcessor(this, rmConf.getUnderReplicatedInterval()); @@ -398,12 +408,15 @@ public void sendCloseContainerEvent(ContainerID containerID) { * @param container Container to be deleted * @param replicaIndex Index of the container replica to be deleted * @param datanode The datanode on which the replica should be deleted + * @param force true to force delete a container that is open or not empty * @throws NotLeaderException when this SCM is not the leader */ public void sendDeleteCommand(final ContainerInfo container, int replicaIndex, - final DatanodeDetails datanode) throws NotLeaderException { + final DatanodeDetails datanode, boolean force) throws NotLeaderException { + LOG.debug("Sending delete command for container {} and index {} on {}", + container, replicaIndex, datanode); final DeleteContainerCommand deleteCommand = - new DeleteContainerCommand(container.containerID(), false); + new DeleteContainerCommand(container.containerID(), force); deleteCommand.setReplicaIndex(replicaIndex); sendDatanodeCommand(deleteCommand, container, datanode); } @@ -411,9 +424,11 @@ public void sendDeleteCommand(final ContainerInfo container, int replicaIndex, public void sendDatanodeCommand(SCMCommand command, ContainerInfo containerInfo, DatanodeDetails target) throws NotLeaderException { - LOG.info("Sending command of type {} for container {} to {}", - command.getType(), containerInfo, target); + LOG.info("Sending command [{}] for container {} to {}", + command, containerInfo, target); command.setTerm(getScmTerm()); + command.setDeadline(clock.millis() + + Math.round(rmConf.eventTimeout * rmConf.commandDeadlineFactor)); final CommandForDatanode datanodeCommand = new CommandForDatanode<>(target.getUuid(), command); eventPublisher.fireEvent(SCMEvents.DATANODE_COMMAND, datanodeCommand); @@ -513,8 +528,22 @@ public Map> processUnderReplicatedContainer( containerID); List pendingOps = containerReplicaPendingOps.getPendingOps(containerID); - return ecUnderReplicationHandler.processAndCreateCommands(replicas, - pendingOps, result, maintenanceRedundancy); + if (result.getContainerInfo().getReplicationType() == EC) { + if (result.getHealthState() + == ContainerHealthResult.HealthState.UNDER_REPLICATED) { + return ecUnderReplicationHandler.processAndCreateCommands(replicas, + pendingOps, result, maintenanceRedundancy); + } else if (result.getHealthState() + == ContainerHealthResult.HealthState.MIS_REPLICATED) { + return ecMisReplicationHandler.processAndCreateCommands(replicas, + pendingOps, result, maintenanceRedundancy); + } else { + throw new IllegalArgumentException("Unexpected health state: " + + result.getHealthState()); + } + } + return ratisUnderReplicationHandler.processAndCreateCommands(replicas, + pendingOps, result, ratisMaintenanceMinReplicas); } public Map> processOverReplicatedContainer( @@ -524,8 +553,12 @@ public Map> processOverReplicatedContainer( containerID); List pendingOps = containerReplicaPendingOps.getPendingOps(containerID); - return ecOverReplicationHandler.processAndCreateCommands(replicas, - pendingOps, result, maintenanceRedundancy); + if (result.getContainerInfo().getReplicationType() == EC) { + return ecOverReplicationHandler.processAndCreateCommands(replicas, + pendingOps, result, maintenanceRedundancy); + } + return ratisOverReplicationHandler.processAndCreateCommands(replicas, + pendingOps, result, ratisMaintenanceMinReplicas); } public long getScmTerm() throws NotLeaderException { @@ -758,6 +791,30 @@ public void setEventTimeout(Duration timeout) { this.eventTimeout = timeout.toMillis(); } + /** + * Deadline which should be set on commands sent from ReplicationManager + * to the datanodes, as a percentage of the event.timeout. If the command + * has not been processed on the datanode by this time, it will be dropped + * by the datanode and Replication Manager will need to resend it. + */ + @Config(key = "command.deadline.factor", + type = ConfigType.DOUBLE, + defaultValue = "0.9", + tags = {SCM, OZONE}, + description = "Fraction of the hdds.scm.replication.event.timeout " + + "from the current time which should be set as a deadline for " + + "commands sent from ReplicationManager to datanodes. " + + "Commands which are not processed before this deadline will be " + + "dropped by the datanodes. Should be a value > 0 and <= 1.") + private double commandDeadlineFactor = 0.9; + public double getCommandDeadlineFactor() { + return commandDeadlineFactor; + } + + public void setCommandDeadlineFactor(double val) { + commandDeadlineFactor = val; + } + /** * The number of container replica which must be available for a node to * enter maintenance. @@ -804,6 +861,15 @@ public void setMaintenanceReplicaMinimum(int replicaCount) { ) private int maintenanceRemainingRedundancy = 1; + @PostConstruct + public void validate() { + if (!(commandDeadlineFactor > 0) || (commandDeadlineFactor > 1)) { + throw new IllegalArgumentException("command.deadline.factor is set to " + + commandDeadlineFactor + + " and must be greater than 0 and less than equal to 1"); + } + } + public void setMaintenanceRemainingRedundancy(int redundancy) { this.maintenanceRemainingRedundancy = redundancy; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java index 9fcdc9c10d8f..e4f4e10053fe 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java @@ -153,24 +153,23 @@ public final class ReplicationManagerMetrics implements MetricsSource { @Metric("Number of EC Replication commands sent.") private MutableCounterLong ecReplicationCmdsSentTotal; - @Metric("Number of EC Deletion commands timeout.") + @Metric("Number of EC Replica Deletion commands sent.") private MutableCounterLong ecDeletionCmdsSentTotal; - @Metric("Number of EC Replication commands completed.") - private MutableCounterLong ecReplicationCmdsCompletedTotal; - - @Metric("Number of EC Deletion commands completed.") - private MutableCounterLong ecDeletionCmdsCompletedTotal; - - @Metric("Number of EC Deletion commands completed.") + @Metric("Number of EC Reconstruction commands sent.") private MutableCounterLong ecReconstructionCmdsSentTotal; - @Metric("Number of EC Replication commands timeout.") - private MutableCounterLong ecReplicationCmdsTimeoutTotal; + @Metric("Number of EC replicas successfully created by Replication Manager.") + private MutableCounterLong ecReplicasCreatedTotal; + + @Metric("Number of EC replicas successfully deleted by Replication Manager.") + private MutableCounterLong ecReplicasDeletedTotal; - @Metric("Number of EC Deletion commands timeout.") - private MutableCounterLong ecDeletionCmdsTimeoutTotal; + @Metric("Number of EC replicas scheduled to be created which timed out.") + private MutableCounterLong ecReplicaCreateTimeoutTotal; + @Metric("Number of EC replicas scheduled for delete which timed out.") + private MutableCounterLong ecReplicaDeleteTimeoutTotal; public ReplicationManagerMetrics(ReplicationManager manager) { this.registry = new MetricsRegistry(METRICS_SOURCE_NAME); @@ -219,11 +218,11 @@ public void getMetrics(MetricsCollector collector, boolean all) { deletionTime.snapshot(builder, all); ecReplicationCmdsSentTotal.snapshot(builder, all); ecDeletionCmdsSentTotal.snapshot(builder, all); - ecReplicationCmdsCompletedTotal.snapshot(builder, all); - ecDeletionCmdsCompletedTotal.snapshot(builder, all); + ecReplicasCreatedTotal.snapshot(builder, all); + ecReplicasDeletedTotal.snapshot(builder, all); ecReconstructionCmdsSentTotal.snapshot(builder, all); - ecReplicationCmdsTimeoutTotal.snapshot(builder, all); - ecDeletionCmdsTimeoutTotal.snapshot(builder, all); + ecReplicaCreateTimeoutTotal.snapshot(builder, all); + ecReplicasDeletedTotal.snapshot(builder, all); } public void unRegister() { @@ -361,12 +360,12 @@ public void incrEcDeletionCmdsSentTotal() { this.ecDeletionCmdsSentTotal.incr(); } - public void incrEcReplicationCmdsCompletedTotal() { - this.ecReplicationCmdsCompletedTotal.incr(); + public void incrEcReplicasCreatedTotal() { + this.ecReplicasCreatedTotal.incr(); } - public void incrEcDeletionCmdsCompletedTotal() { - this.ecDeletionCmdsCompletedTotal.incr(); + public void incrEcReplicasDeletedTotal() { + this.ecReplicasDeletedTotal.incr(); } public void incrEcReconstructionCmdsSentTotal() { @@ -383,8 +382,8 @@ public long getEcDeletion() { .getPendingOpCount(ContainerReplicaOp.PendingOpType.DELETE); } - public void incrEcReplicationCmdsTimeoutTotal() { - this.ecReplicationCmdsTimeoutTotal.incr(); + public void incrEcReplicaCreateTimeoutTotal() { + this.ecReplicaCreateTimeoutTotal.incr(); } public long getEcDeletionCmdsSentTotal() { @@ -399,23 +398,23 @@ public long getEcReplicationCmdsSentTotal() { return ecReplicationCmdsSentTotal.value(); } - public void incrEcDeletionCmdsTimeoutTotal() { - this.ecDeletionCmdsTimeoutTotal.incr(); + public void incrEcReplicaDeleteTimeoutTotal() { + this.ecReplicaDeleteTimeoutTotal.incr(); } - public long getEcReplicationCmdsTimeoutTotal() { - return ecReplicationCmdsTimeoutTotal.value(); + public long getEcReplicaCreateTimeoutTotal() { + return ecReplicaCreateTimeoutTotal.value(); } - public long getEcDeletionCmdsTimeoutTotal() { - return ecDeletionCmdsTimeoutTotal.value(); + public long getEcReplicaDeleteTimeoutTotal() { + return ecReplicaDeleteTimeoutTotal.value(); } - public long getEcReplicationCmdsCompletedTotal() { - return ecReplicationCmdsCompletedTotal.value(); + public long getEcReplicasCreatedTotal() { + return ecReplicasCreatedTotal.value(); } - public long getEcDeletionCmdsCompletedTotal() { - return ecDeletionCmdsCompletedTotal.value(); + public long getEcReplicasDeletedTotal() { + return ecReplicasDeletedTotal.value(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java index a14d21c76c31..d27c1d9c6106 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java @@ -32,8 +32,6 @@ public class ReplicationQueue { underRepQueue; private final Queue overRepQueue; - private final Queue - misRepQueue; public ReplicationQueue() { underRepQueue = new PriorityQueue<>( @@ -42,7 +40,6 @@ public ReplicationQueue() { .thenComparing(ContainerHealthResult .UnderReplicatedHealthResult::getRequeueCount)); overRepQueue = new LinkedList<>(); - misRepQueue = new LinkedList<>(); } public void enqueue(ContainerHealthResult.UnderReplicatedHealthResult @@ -55,11 +52,6 @@ public void enqueue(ContainerHealthResult.OverReplicatedHealthResult overRepQueue.add(overReplicatedHealthResult); } - public void enqueue(ContainerHealthResult.MisReplicatedHealthResult - misReplicatedHealthResult) { - misRepQueue.add(misReplicatedHealthResult); - } - public ContainerHealthResult.UnderReplicatedHealthResult dequeueUnderReplicatedContainer() { return underRepQueue.poll(); @@ -70,11 +62,6 @@ public void enqueue(ContainerHealthResult.MisReplicatedHealthResult return overRepQueue.poll(); } - public ContainerHealthResult.MisReplicatedHealthResult - dequeueMisReplicatedContainer() { - return misRepQueue.poll(); - } - public int underReplicatedQueueSize() { return underRepQueue.size(); } @@ -83,8 +70,4 @@ public int overReplicatedQueueSize() { return overRepQueue.size(); } - public int misReplicatedQueueSize() { - return misRepQueue.size(); - } - } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnderReplicatedProcessor.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnderReplicatedProcessor.java index aa7e28069934..429c0e14eb0c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnderReplicatedProcessor.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnderReplicatedProcessor.java @@ -17,11 +17,8 @@ */ package org.apache.hadoop.hdds.scm.container.replication; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Map; @@ -31,88 +28,32 @@ * queue, calculate the reconstruction commands and assign to the datanodes * via the eventQueue. */ -public class UnderReplicatedProcessor implements Runnable { - - private static final Logger LOG = LoggerFactory - .getLogger(UnderReplicatedProcessor.class); - private final ReplicationManager replicationManager; - private volatile boolean runImmediately = false; - private final long intervalInMillis; +public class UnderReplicatedProcessor extends UnhealthyReplicationProcessor + { public UnderReplicatedProcessor(ReplicationManager replicationManager, - long intervalInMillis) { - this.replicationManager = replicationManager; - this.intervalInMillis = intervalInMillis; - } - - /** - * Read messages from the ReplicationManager under replicated queue and, - * form commands to correct the under replication. The commands are added - * to the event queue and the PendingReplicaOps are adjusted. - * - * Note: this is a temporary implementation of this feature. A future - * version will need to limit the amount of messages assigned to each - * datanode, so they are not assigned too much work. - */ - public void processAll() { - int processed = 0; - int failed = 0; - while (true) { - if (!replicationManager.shouldRun()) { - break; - } - ContainerHealthResult.UnderReplicatedHealthResult underRep = - replicationManager.dequeueUnderReplicatedContainer(); - if (underRep == null) { - break; - } - try { - processContainer(underRep); - processed++; - } catch (Exception e) { - LOG.error("Error processing under replicated container {}", - underRep.getContainerInfo(), e); - failed++; - replicationManager.requeueUnderReplicatedContainer(underRep); - } - } - LOG.info("Processed {} under replicated containers, failed processing {}", - processed, failed); + long intervalInMillis) { + super(replicationManager, intervalInMillis); } - protected void processContainer(ContainerHealthResult - .UnderReplicatedHealthResult underRep) throws IOException { - Map> cmds = replicationManager - .processUnderReplicatedContainer(underRep); - for (Map.Entry> cmd : cmds.entrySet()) { - replicationManager.sendDatanodeCommand(cmd.getValue(), - underRep.getContainerInfo(), cmd.getKey()); - } + @Override + protected ContainerHealthResult.UnderReplicatedHealthResult + dequeueHealthResultFromQueue(ReplicationManager replicationManager) { + return replicationManager.dequeueUnderReplicatedContainer(); } @Override - public void run() { - try { - while (!Thread.currentThread().isInterrupted()) { - if (replicationManager.shouldRun()) { - processAll(); - } - synchronized (this) { - if (!runImmediately) { - wait(intervalInMillis); - } - runImmediately = false; - } - } - } catch (InterruptedException e) { - LOG.warn("{} interrupted. Exiting...", Thread.currentThread().getName()); - Thread.currentThread().interrupt(); - } + protected void requeueHealthResultFromQueue( + ReplicationManager replicationManager, + ContainerHealthResult.UnderReplicatedHealthResult healthResult) { + replicationManager.requeueUnderReplicatedContainer(healthResult); } - @VisibleForTesting - synchronized void runImmediately() { - runImmediately = true; - notify(); + @Override + protected Map> getDatanodeCommands( + ReplicationManager replicationManager, + ContainerHealthResult.UnderReplicatedHealthResult healthResult) + throws IOException { + return replicationManager.processUnderReplicatedContainer(healthResult); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnhealthyReplicationProcessor.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnhealthyReplicationProcessor.java new file mode 100644 index 000000000000..9623222f0531 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/UnhealthyReplicationProcessor.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Maps; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Map; + +/** + * Class used to pick messages from the respective ReplicationManager + * unhealthy replicated queue, + * calculate the delete commands and assign to the datanodes via the eventQueue. + * + */ +public abstract class UnhealthyReplicationProcessor implements Runnable { + private static final Logger LOG = LoggerFactory + .getLogger(UnhealthyReplicationProcessor.class); + private final ReplicationManager replicationManager; + private volatile boolean runImmediately = false; + private final long intervalInMillis; + + public UnhealthyReplicationProcessor(ReplicationManager replicationManager, + long intervalInMillis) { + this.replicationManager = replicationManager; + this.intervalInMillis = intervalInMillis; + } + + /** + * Read messages from the respective queue from ReplicationManager + * for processing the health result. + * @return next HealthResult from the replication manager + */ + protected abstract HealthResult dequeueHealthResultFromQueue( + ReplicationManager rm); + + /** + * Requeue HealthResult to ReplicationManager + * for reprocessing the health result. + * @return next HealthResult from the replication manager + */ + protected abstract void requeueHealthResultFromQueue( + ReplicationManager rm, HealthResult healthResult); + + /** + * Read messages from the ReplicationManager under replicated queue and, + * form commands to correct replication. The commands are added + * to the event queue and the PendingReplicaOps are adjusted. + * + * Note: this is a temporary implementation of this feature. A future + * version will need to limit the amount of messages assigned to each + * datanode, so they are not assigned too much work. + */ + public void processAll() { + int processed = 0; + int failed = 0; + Map healthStateCntMap = + Maps.newHashMap(); + while (true) { + if (!replicationManager.shouldRun()) { + break; + } + HealthResult healthResult = + dequeueHealthResultFromQueue(replicationManager); + if (healthResult == null) { + break; + } + try { + processContainer(healthResult); + processed++; + healthStateCntMap.compute(healthResult.getHealthState(), + (healthState, cnt) -> cnt == null ? 1 : (cnt + 1)); + } catch (Exception e) { + LOG.error("Error processing Health result of class: {} for " + + "container {}", healthResult.getClass(), + healthResult.getContainerInfo(), e); + failed++; + requeueHealthResultFromQueue(replicationManager, healthResult); + } + } + LOG.info("Processed {} containers with health state counts {}," + + "failed processing {}", processed, healthStateCntMap, failed); + } + + /** + * Gets the commands to be run datanode to process the + * container health result. + * @param rm + * @param healthResult + * @return Commands to be run on Datanodes + */ + protected abstract Map> getDatanodeCommands( + ReplicationManager rm, HealthResult healthResult) + throws IOException; + private void processContainer(HealthResult healthResult) throws IOException { + Map> cmds = getDatanodeCommands( + replicationManager, healthResult); + for (Map.Entry> cmd : cmds.entrySet()) { + replicationManager.sendDatanodeCommand(cmd.getValue(), + healthResult.getContainerInfo(), cmd.getKey()); + } + } + + @Override + public void run() { + try { + while (!Thread.currentThread().isInterrupted()) { + if (replicationManager.shouldRun()) { + processAll(); + } + synchronized (this) { + if (!runImmediately) { + wait(intervalInMillis); + } + runImmediately = false; + } + } + } catch (InterruptedException e) { + LOG.warn("{} interrupted. Exiting...", Thread.currentThread().getName()); + Thread.currentThread().interrupt(); + } + } + + @VisibleForTesting + synchronized void runImmediately() { + runImmediately = true; + notify(); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java index f5bb19323a86..4428428d17c6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java @@ -23,11 +23,10 @@ import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.util.Iterator; -import java.util.List; import java.util.Set; -import java.util.stream.Collectors; /** * Handler to process containers which are closed, but some replicas are still @@ -36,6 +35,9 @@ */ public class ClosedWithMismatchedReplicasHandler extends AbstractCheck { + public static final Logger LOG = + LoggerFactory.getLogger(ClosedWithMismatchedReplicasHandler.class); + private ReplicationManager replicationManager; public ClosedWithMismatchedReplicasHandler( @@ -43,6 +45,13 @@ public ClosedWithMismatchedReplicasHandler( this.replicationManager = replicationManager; } + /** + * Handles CLOSED EC or RATIS container. If some replicas are CLOSING or + * OPEN, this sends a force-close command for them. + * @param request ContainerCheckRequest object representing the container + * @return always returns true so that other handlers in the chain can fix + * issues such as under replication + */ @Override public boolean handle(ContainerCheckRequest request) { ContainerInfo containerInfo = request.getContainerInfo(); @@ -51,39 +60,34 @@ public boolean handle(ContainerCheckRequest request) { // Handler is only relevant for CLOSED containers. return false; } - List unhealthyReplicas = replicas.stream() - .filter(r -> !ReplicationManager - .compareState(containerInfo.getState(), r.getState())) - .collect(Collectors.toList()); + LOG.debug("Checking container {} in ClosedWithMismatchedReplicasHandler", + containerInfo); - if (unhealthyReplicas.size() > 0) { - handleUnhealthyReplicas(containerInfo, unhealthyReplicas); - return true; + // close replica if its state is OPEN or CLOSING + for (ContainerReplica replica : replicas) { + if (isMismatched(replica)) { + LOG.debug("Sending close command for mismatched replica {} of " + + "container {}.", replica, containerInfo); + replicationManager.sendCloseContainerReplicaCommand( + containerInfo, replica.getDatanodeDetails(), true); + } } + + /* + This handler is unique because it always returns false. This allows + handlers further in the chain to fix issues such as under replication. + */ return false; } /** - * Handles unhealthy container. - * A container is inconsistent if any of the replica state doesn't - * match the container state. We have to take appropriate action - * based on state of the replica. - * - * @param container ContainerInfo - * @param unhealthyReplicas List of ContainerReplica + * If a CLOSED container has an OPEN or CLOSING replica, there is a state + * mismatch. + * @param replica replica to check for mismatch + * @return true if the replica is in CLOSING or OPEN state, else false */ - private void handleUnhealthyReplicas(final ContainerInfo container, - List unhealthyReplicas) { - Iterator iterator = unhealthyReplicas.iterator(); - while (iterator.hasNext()) { - final ContainerReplica replica = iterator.next(); - final ContainerReplicaProto.State state = replica.getState(); - if (state == ContainerReplicaProto.State.OPEN - || state == ContainerReplicaProto.State.CLOSING) { - replicationManager.sendCloseContainerReplicaCommand( - container, replica.getDatanodeDetails(), true); - iterator.remove(); - } - } + private boolean isMismatched(ContainerReplica replica) { + return replica.getState() == ContainerReplicaProto.State.OPEN || + replica.getState() == ContainerReplicaProto.State.CLOSING; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithUnhealthyReplicasHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithUnhealthyReplicasHandler.java index fd4e3005ca76..205d41923cf7 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithUnhealthyReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithUnhealthyReplicasHandler.java @@ -77,7 +77,8 @@ public boolean handle(ContainerCheckRequest request) { if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSED) { return false; } - + LOG.debug("Checking container {} in ClosedWithUnhealthyReplicasHandler", + containerInfo); Set replicas = request.getContainerReplicas(); // create a set of indexes that are closed Set closedIndexes = replicas.stream() @@ -111,6 +112,7 @@ public boolean handle(ContainerCheckRequest request) { ReplicationManagerReport.HealthState.UNHEALTHY, containerInfo.containerID()); } + LOG.debug("Returning {} for container {}", foundUnhealthy, containerInfo); return foundUnhealthy; } @@ -119,7 +121,7 @@ private void sendDeleteCommand(ContainerInfo containerInfo, LOG.debug("Trying to delete UNHEALTHY replica [{}]", replica); try { replicationManager.sendDeleteCommand(containerInfo, - replica.getReplicaIndex(), replica.getDatanodeDetails()); + replica.getReplicaIndex(), replica.getDatanodeDetails(), true); } catch (NotLeaderException e) { LOG.warn("Failed to delete UNHEALTHY replica [{}]", replica, e); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java index 103f7d66463d..c06581bf2a8b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java @@ -24,11 +24,16 @@ import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class used in Replication Manager to close replicas of CLOSING containers. */ public class ClosingContainerHandler extends AbstractCheck { + private static final Logger LOG = + LoggerFactory.getLogger(ClosingContainerHandler.class); + private final ReplicationManager replicationManager; public ClosingContainerHandler(ReplicationManager replicationManager) { @@ -50,6 +55,8 @@ public boolean handle(ContainerCheckRequest request) { if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSING) { return false; } + LOG.debug("Checking container {} in ClosingContainerHandler", + containerInfo); boolean forceClose = request.getContainerInfo().getReplicationConfig() .getReplicationType() != HddsProtos.ReplicationType.RATIS; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/DeletingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/DeletingContainerHandler.java index a31e5ecb161f..6da647b70d45 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/DeletingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/DeletingContainerHandler.java @@ -60,14 +60,19 @@ public boolean handle(ContainerCheckRequest request) { HddsProtos.LifeCycleState containerState = containerInfo.getState(); if (containerState == HddsProtos.LifeCycleState.DELETED) { + LOG.debug("Container {} is DELETED so returning true", containerInfo); return true; } if (containerState != HddsProtos.LifeCycleState.DELETING) { return false; } + LOG.debug("Checking container {} in DeletingContainerHandler", + containerInfo); if (request.getContainerReplicas().size() == 0) { + LOG.debug("Deleting Container {} has no replicas so marking for cleanup" + + " and returning true", containerInfo); replicationManager.updateContainerState( cID, HddsProtos.LifeCycleEvent.CLEANUP); return true; @@ -82,7 +87,8 @@ public boolean handle(ContainerCheckRequest request) { .forEach(rp -> { try { replicationManager.sendDeleteCommand( - containerInfo, rp.getReplicaIndex(), rp.getDatanodeDetails()); + containerInfo, rp.getReplicaIndex(), rp.getDatanodeDetails(), + false); } catch (NotLeaderException e) { LOG.warn("Failed to delete empty replica with index {} for " + "container {} on datanode {}", rp.getReplicaIndex(), diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java index 2aba554498d8..af1287521916 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java @@ -28,6 +28,8 @@ import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult; import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp; import org.apache.hadoop.hdds.scm.container.replication.ECContainerReplicaCount; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Collections; @@ -44,6 +46,9 @@ */ public class ECReplicationCheckHandler extends AbstractCheck { + private static final Logger LOG = + LoggerFactory.getLogger(ECReplicationCheckHandler.class); + private final PlacementPolicy placementPolicy; public ECReplicationCheckHandler(PlacementPolicy placementPolicy) { @@ -60,6 +65,7 @@ public boolean handle(ContainerCheckRequest request) { ContainerInfo container = request.getContainerInfo(); ContainerID containerID = container.containerID(); ContainerHealthResult health = checkHealth(request); + LOG.debug("Checking container {} in ECReplicationCheckHandler", container); if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) { // If the container is healthy, there is nothing else to do in this // handler so return as unhandled so any further handlers will be tried. @@ -79,9 +85,15 @@ public boolean handle(ContainerCheckRequest request) { ReplicationManagerReport.HealthState.MISSING, containerID); } if (!underHealth.isReplicatedOkAfterPending() && - !underHealth.isUnrecoverable()) { + (!underHealth.isUnrecoverable() + || underHealth.hasUnreplicatedOfflineIndexes())) { request.getReplicationQueue().enqueue(underHealth); } + LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending " + + "is [{}]. isUnrecoverable is [{}]. hasUnreplicatedOfflineIndexes " + + "is [{}]", container, underHealth.isReplicatedOkAfterPending(), + underHealth.isUnrecoverable(), + underHealth.hasUnreplicatedOfflineIndexes()); return true; } else if (health.getHealthState() == ContainerHealthResult.HealthState.OVER_REPLICATED) { @@ -92,6 +104,8 @@ public boolean handle(ContainerCheckRequest request) { if (!overHealth.isReplicatedOkAfterPending()) { request.getReplicationQueue().enqueue(overHealth); } + LOG.debug("Container {} is Over Replicated. isReplicatedOkAfterPending " + + "is [{}]", container, overHealth.isReplicatedOkAfterPending()); return true; } else if (health.getHealthState() == ContainerHealthResult.HealthState.MIS_REPLICATED) { @@ -102,10 +116,14 @@ public boolean handle(ContainerCheckRequest request) { if (!misRepHealth.isReplicatedOkAfterPending()) { request.getReplicationQueue().enqueue(misRepHealth); } + LOG.debug("Container {} is Mis Replicated. isReplicatedOkAfterPending " + + "is [{}]", container, misRepHealth.isReplicatedOkAfterPending()); return true; } - // Should not get here, but incase it does the container is not healthy, + // Should not get here, but in case it does the container is not healthy, // but is also not under or over replicated. + LOG.warn("Container {} is not healthy but is not under, over or " + + " mis-replicated. Should not happen.", container); return false; } @@ -133,10 +151,16 @@ public ContainerHealthResult checkHealth(ContainerCheckRequest request) { dueToDecommission = false; remainingRedundancy = repConfig.getParity() - missingIndexes.size(); } - return new ContainerHealthResult.UnderReplicatedHealthResult( - container, remainingRedundancy, dueToDecommission, - replicaCount.isSufficientlyReplicated(true), - replicaCount.isUnrecoverable()); + ContainerHealthResult.UnderReplicatedHealthResult result = + new ContainerHealthResult.UnderReplicatedHealthResult( + container, remainingRedundancy, dueToDecommission, + replicaCount.isSufficientlyReplicated(true), + replicaCount.isUnrecoverable()); + if (replicaCount.decommissioningOnlyIndexes(true).size() > 0 + || replicaCount.maintenanceOnlyIndexes(true).size() > 0) { + result.setHasUnReplicatedOfflineIndexes(true); + } + return result; } if (replicaCount.isOverReplicated(false)) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/EmptyContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/EmptyContainerHandler.java index 19c8d0a93aaa..30f2cb4bb6b2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/EmptyContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/EmptyContainerHandler.java @@ -60,7 +60,8 @@ public boolean handle(ContainerCheckRequest request) { request.getReport() .incrementAndSample(ReplicationManagerReport.HealthState.EMPTY, containerInfo.containerID()); - + LOG.debug("Container {} is empty and closed, marking as DELETING", + containerInfo); // delete replicas if they are closed and empty deleteContainerReplicas(containerInfo, replicas); @@ -109,12 +110,9 @@ private void deleteContainerReplicas(final ContainerInfo containerInfo, rp.getState() == ContainerReplicaProto.State.CLOSED); Preconditions.assertTrue(rp.getKeyCount() == 0); - LOG.debug("Trying to delete empty replica with index {} for container " + - "{} on datanode {}", rp.getReplicaIndex(), - containerInfo.containerID(), rp.getDatanodeDetails().getUuidString()); try { replicationManager.sendDeleteCommand(containerInfo, - rp.getReplicaIndex(), rp.getDatanodeDetails()); + rp.getReplicaIndex(), rp.getDatanodeDetails(), false); } catch (NotLeaderException e) { LOG.warn("Failed to delete empty replica with index {} for container" + " {} on datanode {}", diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java index 666b1a22877f..a644f5e8341a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.Set; @@ -36,7 +38,10 @@ */ public class OpenContainerHandler extends AbstractCheck { - private ReplicationManager replicationManager; + private static final Logger LOG = + LoggerFactory.getLogger(OpenContainerHandler.class); + + private final ReplicationManager replicationManager; public OpenContainerHandler(ReplicationManager replicationManager) { this.replicationManager = replicationManager; @@ -46,10 +51,14 @@ public OpenContainerHandler(ReplicationManager replicationManager) { public boolean handle(ContainerCheckRequest request) { ContainerInfo containerInfo = request.getContainerInfo(); if (containerInfo.getState() == HddsProtos.LifeCycleState.OPEN) { + LOG.debug("Checking open container {} in OpenContainerHandler", + containerInfo); if (!isOpenContainerHealthy( containerInfo, request.getContainerReplicas())) { // This is an unhealthy open container, so we need to trigger the // close process on it. + LOG.debug("Container {} is open but unhealthy. Triggering close.", + containerInfo); request.getReport().incrementAndSample( ReplicationManagerReport.HealthState.OPEN_UNHEALTHY, containerInfo.containerID()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java index 449d0776e1d3..01f6a05d77d0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java @@ -64,12 +64,16 @@ public boolean handle(ContainerCheckRequest request) { if (containerInfo.getState() != HddsProtos.LifeCycleState.QUASI_CLOSED) { return false; } + LOG.debug("Checking container {} in QuasiClosedContainerHandler", + containerInfo); Set replicas = request.getContainerReplicas(); if (canForceCloseContainer(containerInfo, replicas)) { forceCloseContainer(containerInfo, replicas); return true; } else { + LOG.debug("Container {} cannot be force closed and is stuck in " + + "QUASI_CLOSED", containerInfo); request.getReport().incrementAndSample( ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK, containerInfo.containerID()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java index 9d33498ddade..91dd51a6073c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java @@ -66,6 +66,8 @@ public boolean handle(ContainerCheckRequest request) { ReplicationManagerReport report = request.getReport(); ContainerInfo container = request.getContainerInfo(); ContainerHealthResult health = checkHealth(request); + LOG.debug("Checking container {} in RatisReplicationCheckHandler", + container); if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) { // If the container is healthy, there is nothing else to do in this // handler so return as unhandled so any further handlers will be tried. @@ -88,6 +90,10 @@ public boolean handle(ContainerCheckRequest request) { !underHealth.isReplicatedOkAfterPending()) { request.getReplicationQueue().enqueue(underHealth); } + LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending" + + " is [{}]. isUnrecoverable is [{}]", container, + underHealth.isReplicatedOkAfterPending(), + underHealth.isUnrecoverable()); return true; } @@ -101,6 +107,8 @@ public boolean handle(ContainerCheckRequest request) { if (!overHealth.isReplicatedOkAfterPending()) { request.getReplicationQueue().enqueue(overHealth); } + LOG.debug("Container {} is Over Replicated. isReplicatedOkAfterPending" + + " is [{}]", container, overHealth.isReplicatedOkAfterPending()); return true; } if (health.getHealthState() == @@ -113,8 +121,14 @@ public boolean handle(ContainerCheckRequest request) { if (!misRepHealth.isReplicatedOkAfterPending()) { request.getReplicationQueue().enqueue(misRepHealth); } + LOG.debug("Container {} is Mid Replicated. isReplicatedOkAfterPending" + + " is [{}]", container, misRepHealth.isReplicatedOkAfterPending()); return true; } + // Should not get here, but in case it does the container is not healthy, + // but is also not under, over or mis replicated. + LOG.warn("Container {} is not healthy but is not under, over or " + + " mis-replicated. Should not happen.", container); return false; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java index 4c446acce7ff..528d2dfef3a9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java @@ -37,6 +37,7 @@ import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.HashMap; @@ -345,7 +346,7 @@ private boolean checkContainersReplicatedOnNode(DatanodeDetails dn) try { ContainerReplicaCount replicaSet = replicationManager.getContainerReplicaCount(cid); - if (replicaSet.isSufficientlyReplicated()) { + if (replicaSet.isSufficientlyReplicatedForOffline(dn)) { sufficientlyReplicated++; } else { if (LOG.isDebugEnabled()) { @@ -398,7 +399,7 @@ private boolean checkContainersReplicatedOnNode(DatanodeDetails dn) return underReplicated == 0 && unhealthy == 0; } - private String replicaDetails(Set replicas) { + private String replicaDetails(Collection replicas) { StringBuilder sb = new StringBuilder(); sb.append("Replicas{"); sb.append(replicas.stream() diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index b9bc13d5a528..e96313df78ac 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -394,16 +394,16 @@ public RegisteredCommand register( addEntryToDnsToUuidMap(dnsName, datanodeDetails.getUuidString()); // Updating Node Report, as registration is successful processNodeReport(datanodeDetails, nodeReport); - LOG.info("Registered Data node : {}", datanodeDetails); + LOG.info("Registered Data node : {}", datanodeDetails.toDebugString()); scmNodeEventPublisher.fireEvent(SCMEvents.NEW_NODE, datanodeDetails); } catch (NodeAlreadyExistsException e) { if (LOG.isTraceEnabled()) { LOG.trace("Datanode is already registered. Datanode: {}", - datanodeDetails.toString()); + datanodeDetails); } } catch (NodeNotFoundException e) { LOG.error("Cannot find datanode {} from nodeStateManager", - datanodeDetails.toString()); + datanodeDetails); } } else { // Update datanode if it is registered but the ip or hostname changes @@ -543,9 +543,11 @@ protected void updateDatanodeOpState(DatanodeDetails reportedDn) if (opStateDiffers(reportedDn, scmStatus)) { if (scmContext.isLeader()) { LOG.info("Scheduling a command to update the operationalState " + - "persisted on {} as the reported value does not " + + "persisted on {} as the reported value ({}, {}) does not " + "match the value stored in SCM ({}, {})", reportedDn, + reportedDn.getPersistedOpState(), + reportedDn.getPersistedOpStateExpiryEpochSec(), scmStatus.getOperationalState(), scmStatus.getOpStateExpiryEpochSeconds()); @@ -563,9 +565,11 @@ protected void updateDatanodeOpState(DatanodeDetails reportedDn) } } else { LOG.info("Update the operationalState saved in follower SCM " + - "for {} as the reported value does not " + + "for {} as the reported value ({}, {}) does not " + "match the value stored in SCM ({}, {})", reportedDn, + reportedDn.getPersistedOpState(), + reportedDn.getPersistedOpStateExpiryEpochSec(), scmStatus.getOperationalState(), scmStatus.getOpStateExpiryEpochSeconds()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java index befc0543a357..80374299b637 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java @@ -83,7 +83,10 @@ public PipelinePlacementPolicy(final NodeManager nodeManager, this.heavyNodeCriteria = dnLimit == null ? 0 : Integer.parseInt(dnLimit); } - int currentRatisThreePipelineCount(DatanodeDetails datanodeDetails) { + public static int currentRatisThreePipelineCount( + NodeManager nodeManager, + PipelineStateManager stateManager, + DatanodeDetails datanodeDetails) { // Safe to cast collection's size to int return (int) nodeManager.getPipelines(datanodeDetails).stream() .map(id -> { @@ -95,16 +98,24 @@ int currentRatisThreePipelineCount(DatanodeDetails datanodeDetails) { return null; } }) - .filter(this::isNonClosedRatisThreePipeline) + .filter(PipelinePlacementPolicy::isNonClosedRatisThreePipeline) .count(); } - private boolean isNonClosedRatisThreePipeline(Pipeline p) { - return p.getReplicationConfig() + private static boolean isNonClosedRatisThreePipeline(Pipeline p) { + return p != null && p.getReplicationConfig() .equals(RatisReplicationConfig.getInstance(ReplicationFactor.THREE)) && !p.isClosed(); } + @Override + protected int getMaxReplicasPerRack(int numReplicas, int numberOfRacks) { + if (numberOfRacks == 1) { + return numReplicas; + } + return Math.max(numReplicas - 1, 1); + } + /** * Filter out viable nodes based on * 1. nodes that are healthy @@ -155,7 +166,8 @@ List filterViableNodes( // TODO check if sorting could cause performance issue: HDDS-3466. List healthyList = healthyNodes.stream() .map(d -> - new DnWithPipelines(d, currentRatisThreePipelineCount(d))) + new DnWithPipelines(d, currentRatisThreePipelineCount(nodeManager, + stateManager, d))) .filter(d -> (d.getPipelines() < nodeManager.pipelineLimit(d.getDn()))) .sorted(Comparator.comparingInt(DnWithPipelines::getPipelines)) @@ -470,7 +482,11 @@ protected int getRequiredRackCount(int numReplicas) { return REQUIRED_RACKS; } - private static class DnWithPipelines { + /** + * static inner utility class for datanodes with pipeline, used for + * pipeline engagement checking. + */ + public static class DnWithPipelines { private DatanodeDetails dn; private int pipelines; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicyFactory.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicyFactory.java new file mode 100644 index 000000000000..c57448fdab38 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicyFactory.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.node.NodeManager; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY; + +/** + * Pipeline placement factor for pipeline providers to create placement instance + * based on configuration property. + * {@link ScmConfigKeys#OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY} + */ +public final class PipelinePlacementPolicyFactory { + + private PipelinePlacementPolicyFactory() { + } + + public static PlacementPolicy getPolicy(NodeManager nodeManager, + PipelineStateManager stateManager, ConfigurationSource conf) { + final Class clazz + = conf.getClass(OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY, + PipelinePlacementPolicy.class, PlacementPolicy.class); + + try { + return clazz.getDeclaredConstructor(NodeManager.class, + PipelineStateManager.class, ConfigurationSource.class) + .newInstance(nodeManager, stateManager, conf); + } catch (Exception e) { + throw new RuntimeException("Failed to getPolicy for " + clazz, e); + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java index 6b40f28fc0b1..5db887b6742d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateMap.java @@ -106,9 +106,9 @@ void addContainerToPipeline(PipelineID pipelineID, ContainerID containerID) Pipeline pipeline = getPipeline(pipelineID); if (pipeline.isClosed()) { - throw new IOException(String - .format("Cannot add container to pipeline=%s in closed state", - pipelineID)); + LOG.warn("Adding container {} to pipeline={} in CLOSED state." + + " This happens only for some exceptional cases." + + " Check for the previous exceptions.", containerID, pipelineID); } pipeline2container.get(pipelineID).add(containerID); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java index 43b2e01c9140..ad149fdec6b7 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/RatisPipelineProvider.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -36,7 +37,9 @@ import org.apache.hadoop.hdds.scm.ha.SCMContext; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.node.NodeStatus; +import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.pipeline.Pipeline.PipelineState; +import org.apache.hadoop.hdds.scm.pipeline.PipelinePlacementPolicy.DnWithPipelines; import org.apache.hadoop.hdds.scm.pipeline.leader.choose.algorithms.LeaderChoosePolicy; import org.apache.hadoop.hdds.scm.pipeline.leader.choose.algorithms.LeaderChoosePolicyFactory; import org.apache.hadoop.hdds.server.events.EventPublisher; @@ -59,7 +62,7 @@ public class RatisPipelineProvider private final ConfigurationSource conf; private final EventPublisher eventPublisher; - private final PipelinePlacementPolicy placementPolicy; + private final PlacementPolicy placementPolicy; private int pipelineNumberLimit; private int maxPipelinePerDatanode; private final LeaderChoosePolicy leaderChoosePolicy; @@ -77,8 +80,8 @@ public RatisPipelineProvider(NodeManager nodeManager, this.conf = conf; this.eventPublisher = eventPublisher; this.scmContext = scmContext; - this.placementPolicy = - new PipelinePlacementPolicy(nodeManager, stateManager, conf); + this.placementPolicy = PipelinePlacementPolicyFactory + .getPolicy(nodeManager, stateManager, conf); this.pipelineNumberLimit = conf.getInt( ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT, ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT_DEFAULT); @@ -163,6 +166,14 @@ public synchronized Pipeline create(RatisReplicationConfig replicationConfig, containerSizeBytes); break; case THREE: + List excludeDueToEngagement = filterPipelineEngagement(); + if (excludeDueToEngagement.size() > 0) { + if (excludedNodes.size() == 0) { + excludedNodes = excludeDueToEngagement; + } else { + excludedNodes.addAll(excludeDueToEngagement); + } + } dns = placementPolicy.chooseDatanodes(excludedNodes, favoredNodes, factor.getNumber(), minRatisVolumeSizeBytes, containerSizeBytes); @@ -222,6 +233,23 @@ public Pipeline createForRead( .collect(Collectors.toList())); } + private List filterPipelineEngagement() { + List healthyNodes = + getNodeManager().getNodes(NodeStatus.inServiceHealthy()); + List excluded = healthyNodes.stream() + .map(d -> + new DnWithPipelines(d, + PipelinePlacementPolicy + .currentRatisThreePipelineCount(getNodeManager(), + getPipelineStateManager(), d))) + .filter(d -> + (d.getPipelines() >= getNodeManager().pipelineLimit(d.getDn()))) + .sorted(Comparator.comparingInt(DnWithPipelines::getPipelines)) + .map(d -> d.getDn()) + .collect(Collectors.toList()); + return excluded; + } + @Override public void shutdown() { } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index e59c984174f5..617d17da1d95 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -640,6 +640,13 @@ public ScmContainerLocationResponse processRequest( .setGetContainerCountResponse(getContainerCount( request.getGetContainerCountRequest())) .build(); + case GetClosedContainerCount: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setGetContainerCountResponse(getClosedContainerCount( + request.getGetContainerCountRequest())) + .build(); case GetContainerReplicas: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) @@ -1149,6 +1156,16 @@ public GetContainerCountResponseProto getContainerCount( .build(); } + public GetContainerCountResponseProto getClosedContainerCount( + StorageContainerLocationProtocolProtos.GetContainerCountRequestProto + request) throws IOException { + + return GetContainerCountResponseProto.newBuilder() + .setContainerCount(impl.getContainerCount( + HddsProtos.LifeCycleState.CLOSED)) + .build(); + } + public ResetDeletedBlockRetryCountResponseProto getResetDeletedBlockRetryCount(ResetDeletedBlockRetryCountRequestProto request) throws IOException { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 21d179b59e45..0a7eeb81e5b2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -1123,6 +1123,20 @@ public long getContainerCount() throws IOException { return scm.getContainerManager().getContainers().size(); } + @Override + public long getContainerCount(HddsProtos.LifeCycleState state) + throws IOException { + return scm.getContainerManager().getContainers(state).size(); + } + + @Override + public List getListOfContainers( + long startContainerID, int count, HddsProtos.LifeCycleState state) + throws IOException { + return scm.getContainerManager().getContainers( + ContainerID.valueOf(startContainerID), count, state); + } + /** * Queries a list of Node that match a set of statuses. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java index f2654fac027b..bc00f4d142be 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.OptionalLong; import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; @@ -48,6 +49,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto; import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.ha.SCMContext; import org.apache.hadoop.hdds.scm.ha.SCMNodeDetails; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.ReportFromDatanode; @@ -80,6 +82,7 @@ import org.apache.hadoop.ozone.protocolPB.StorageContainerDatanodeProtocolPB; import org.apache.hadoop.ozone.protocolPB.StorageContainerDatanodeProtocolServerSideTranslatorPB; import org.apache.hadoop.security.authorize.PolicyProvider; +import org.apache.ratis.protocol.exceptions.NotLeaderException; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -104,6 +107,7 @@ import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.startRpcServer; import static org.apache.hadoop.hdds.server.ServerUtils.getRemoteUserName; import static org.apache.hadoop.hdds.server.ServerUtils.updateRPCListenAddress; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -130,9 +134,12 @@ public class SCMDatanodeProtocolServer implements private final EventPublisher eventPublisher; private ProtocolMessageMetrics protocolMessageMetrics; + private final SCMContext scmContext; + public SCMDatanodeProtocolServer(final OzoneConfiguration conf, OzoneStorageContainerManager scm, - EventPublisher eventPublisher) + EventPublisher eventPublisher, + SCMContext scmContext) throws IOException { // This constructor has broken down to smaller methods so that Recon's @@ -142,6 +149,7 @@ public SCMDatanodeProtocolServer(final OzoneConfiguration conf, this.scm = scm; this.eventPublisher = eventPublisher; + this.scmContext = scmContext; heartbeatDispatcher = new SCMDatanodeHeartbeatDispatcher( scm.getScmNodeManager(), eventPublisher); @@ -272,16 +280,21 @@ public SCMHeartbeatResponseProto sendHeartbeat( SCMHeartbeatRequestProto heartbeat) throws IOException, TimeoutException { List cmdResponses = new ArrayList<>(); for (SCMCommand cmd : heartbeatDispatcher.dispatch(heartbeat)) { - cmdResponses.add(getCommandResponse(cmd)); + cmdResponses.add(getCommandResponse(cmd, scm)); } + final OptionalLong term = getTermIfLeader(); boolean auditSuccess = true; Map auditMap = Maps.newHashMap(); auditMap.put("datanodeUUID", heartbeat.getDatanodeDetails().getUuid()); auditMap.put("command", flatten(cmdResponses.toString())); + term.ifPresent(t -> auditMap.put("term", String.valueOf(t))); try { - return SCMHeartbeatResponseProto.newBuilder() - .setDatanodeUUID(heartbeat.getDatanodeDetails().getUuid()) - .addAllCommands(cmdResponses).build(); + SCMHeartbeatResponseProto.Builder builder = + SCMHeartbeatResponseProto.newBuilder() + .setDatanodeUUID(heartbeat.getDatanodeDetails().getUuid()) + .addAllCommands(cmdResponses); + term.ifPresent(builder::setTerm); + return builder.build(); } catch (Exception ex) { auditSuccess = false; AUDIT.logWriteFailure( @@ -297,6 +310,17 @@ public SCMHeartbeatResponseProto sendHeartbeat( } } + private OptionalLong getTermIfLeader() { + if (scmContext != null && scmContext.isLeader()) { + try { + return OptionalLong.of(scmContext.getTermOfLeader()); + } catch (NotLeaderException e) { + // only leader should distribute current term + } + } + return OptionalLong.empty(); + } + /** * Returns a SCMCommandRepose from the SCM Command. * @@ -305,14 +329,17 @@ public SCMHeartbeatResponseProto sendHeartbeat( * @throws IOException */ @VisibleForTesting - public SCMCommandProto getCommandResponse(SCMCommand cmd) - throws IOException, TimeoutException { + public static SCMCommandProto getCommandResponse(SCMCommand cmd, + OzoneStorageContainerManager scm) throws IOException, TimeoutException { SCMCommandProto.Builder builder = SCMCommandProto.newBuilder() .setEncodedToken(cmd.getEncodedToken()); // In HA mode, it is the term of current leader SCM. // In non-HA mode, it is the default value 0. builder.setTerm(cmd.getTerm()); + // The default deadline is 0, which means no deadline. Individual commands + // may have a deadline set. + builder.setDeadlineMsSinceEpoch(cmd.getDeadline()); switch (cmd.getType()) { case reregisterCommand: diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 17d39be839a8..1452c7f6b3a5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -41,7 +41,9 @@ import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerManagerImpl; import org.apache.hadoop.hdds.scm.PlacementPolicyValidateProxy; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaPendingOps; +import org.apache.hadoop.hdds.scm.container.replication.DatanodeCommandCountUpdatedHandler; import org.apache.hadoop.hdds.scm.container.replication.LegacyReplicationManager; import org.apache.hadoop.hdds.scm.crl.CRLStatusReportHandler; import org.apache.hadoop.hdds.scm.ha.BackgroundSCMService; @@ -139,7 +141,6 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneSecurityUtil; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.hadoop.ozone.common.Storage.StorageState; import org.apache.hadoop.ozone.upgrade.DefaultUpgradeFinalizationExecutor; import org.apache.hadoop.ozone.upgrade.UpgradeFinalizationExecutor; @@ -260,12 +261,13 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl private final OzoneConfiguration configuration; private SCMContainerMetrics scmContainerMetrics; private SCMContainerPlacementMetrics placementMetrics; - private PlacementPolicy containerPlacementPolicy; - private PlacementPolicy ecContainerPlacementPolicy; + private PlacementPolicy containerPlacementPolicy; + private PlacementPolicy ecContainerPlacementPolicy; private PlacementPolicyValidateProxy placementPolicyValidateProxy; private MetricsSystem ms; private final Map ratisMetricsMap = new ConcurrentHashMap<>(); + private List ratisReporterList = null; private String primaryScmNodeId; /** @@ -283,7 +285,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl // container replicas. private ContainerReplicaPendingOps containerReplicaPendingOps; private final AtomicBoolean isStopped = new AtomicBoolean(false); - + /** * Creates a new StorageContainerManager. Configuration will be * updated with information on the actual listening addresses used @@ -387,7 +389,7 @@ private StorageContainerManager(OzoneConfiguration conf, scmAdmins = new OzoneAdmins(scmAdminUsernames, scmAdminGroups); datanodeProtocolServer = new SCMDatanodeProtocolServer(conf, this, - eventQueue); + eventQueue, scmContext); blockProtocolServer = new SCMBlockProtocolServer(conf, this); clientProtocolServer = new SCMClientProtocolServer(conf, this); @@ -454,6 +456,8 @@ private void initializeEventHandlers() { eventQueue.addHandler(SCMEvents.NODE_REPORT, nodeReportHandler); eventQueue.addHandler(SCMEvents.COMMAND_QUEUE_REPORT, commandQueueReportHandler); + eventQueue.addHandler(SCMEvents.DATANODE_COMMAND_COUNT_UPDATED, + new DatanodeCommandCountUpdatedHandler(replicationManager)); // Use the same executor for both ICR and FCR. // The Executor maps the event to a thread for DN. @@ -581,7 +585,9 @@ public static StorageContainerManager createSCM(OzoneConfiguration conf) @SuppressWarnings("methodLength") private void initializeSystemManagers(OzoneConfiguration conf, SCMConfigurator configurator) throws IOException { - Clock clock = new MonotonicClock(ZoneOffset.UTC); + // Use SystemClock when data is persisted + // and used again after system restarts. + Clock systemClock = Clock.system(ZoneOffset.UTC); if (configurator.getNetworkTopology() != null) { clusterMap = configurator.getNetworkTopology(); @@ -589,8 +595,8 @@ private void initializeSystemManagers(OzoneConfiguration conf, clusterMap = new NetworkTopologyImpl(conf); } // This needs to be done before initializing Ratis. - RatisDropwizardExports.registerRatisMetricReporters(ratisMetricsMap, - () -> isStopped.get()); + ratisReporterList = RatisDropwizardExports + .registerRatisMetricReporters(ratisMetricsMap, () -> isStopped.get()); if (configurator.getSCMHAManager() != null) { scmHAManager = configurator.getSCMHAManager(); } else { @@ -669,14 +675,15 @@ private void initializeSystemManagers(OzoneConfiguration conf, eventQueue, scmContext, serviceManager, - clock + systemClock ); } finalizationManager.buildUpgradeContext(scmNodeManager, pipelineManager, scmContext); - containerReplicaPendingOps = new ContainerReplicaPendingOps(conf, clock); + containerReplicaPendingOps = + new ContainerReplicaPendingOps(conf, systemClock); long containerReplicaOpScrubberIntervalMs = conf.getTimeDuration( ScmConfigKeys @@ -697,7 +704,7 @@ private void initializeSystemManagers(OzoneConfiguration conf, final String backgroundServiceName = "ExpiredContainerReplicaOpScrubber"; BackgroundSCMService expiredContainerReplicaOpScrubber = - new BackgroundSCMService.Builder().setClock(clock) + new BackgroundSCMService.Builder().setClock(systemClock) .setScmContext(scmContext) .setServiceName(backgroundServiceName) .setIntervalInMillis(containerReplicaOpScrubberIntervalMs) @@ -731,7 +738,7 @@ private void initializeSystemManagers(OzoneConfiguration conf, } else { LegacyReplicationManager legacyRM = new LegacyReplicationManager( conf, containerManager, containerPlacementPolicy, eventQueue, - scmContext, scmNodeManager, scmHAManager, clock, + scmContext, scmNodeManager, scmHAManager, systemClock, getScmMetadataStore().getMoveTable()); replicationManager = new ReplicationManager( conf, @@ -741,7 +748,7 @@ private void initializeSystemManagers(OzoneConfiguration conf, eventQueue, scmContext, scmNodeManager, - clock, + systemClock, legacyRM, containerReplicaPendingOps); } @@ -1616,7 +1623,7 @@ public void stop() { scmSafeModeManager.stop(); serviceManager.stop(); - RatisDropwizardExports.clear(ratisMetricsMap); + RatisDropwizardExports.clear(ratisMetricsMap, ratisReporterList); try { LOG.info("Stopping SCM MetadataStore."); @@ -1769,7 +1776,7 @@ public void checkAdminAccess(UserGroupInformation remoteUser) if (remoteUser != null && !scmAdmins.isAdmin(remoteUser)) { throw new AccessControlException( "Access denied for user " + remoteUser.getUserName() + - ". Superuser privilege is required."); + ". SCM superuser privilege is required."); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java index 99503679726f..eb0741662c60 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm; import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.RatisReplicationConfig; @@ -84,7 +85,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.UUID; @@ -691,11 +691,20 @@ public static Set getReplicas( } public static Set getReplicas( + final ContainerID containerId, + final ContainerReplicaProto.State state, + final long sequenceId, + final DatanodeDetails... datanodeDetails) { + return Sets.newHashSet(getReplicas(containerId, state, sequenceId, + Arrays.asList(datanodeDetails))); + } + + public static List getReplicas( final ContainerID containerId, final ContainerReplicaProto.State state, final long sequenceId, - final DatanodeDetails... datanodeDetails) { - Set replicas = new HashSet<>(); + final Iterable datanodeDetails) { + List replicas = new ArrayList<>(); for (DatanodeDetails datanode : datanodeDetails) { replicas.add(getReplicas(containerId, state, sequenceId, datanode.getUuid(), datanode)); @@ -744,14 +753,14 @@ public static ContainerReplica getReplicas( return builder.build(); } - public static Set getReplicasWithReplicaIndex( + public static List getReplicasWithReplicaIndex( final ContainerID containerId, final ContainerReplicaProto.State state, final long usedBytes, final long keyCount, final long sequenceId, - final DatanodeDetails... datanodeDetails) { - Set replicas = new HashSet<>(); + final Iterable datanodeDetails) { + List replicas = new ArrayList<>(); int replicaIndex = 1; for (DatanodeDetails datanode : datanodeDetails) { replicas.add(getReplicaBuilder(containerId, state, @@ -762,6 +771,17 @@ public static Set getReplicasWithReplicaIndex( return replicas; } + public static Set getReplicasWithReplicaIndex( + final ContainerID containerId, + final ContainerReplicaProto.State state, + final long usedBytes, + final long keyCount, + final long sequenceId, + final DatanodeDetails... datanodeDetails) { + return Sets.newHashSet(getReplicasWithReplicaIndex(containerId, state, + usedBytes, keyCount, sequenceId, Arrays.asList(datanodeDetails))); + } + public static Pipeline getRandomPipeline() { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java index d11ca91b369e..a64cc73b7840 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java @@ -18,21 +18,37 @@ package org.apache.hadoop.hdds.scm; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.MockNodeManager; import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.net.Node; import org.apache.hadoop.hdds.scm.node.NodeManager; -import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.ozone.container.common.SCMTestUtils; +import org.apache.ozone.test.GenericTestUtils; +import org.apache.ratis.thirdparty.com.google.common.collect.ImmutableMap; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import java.util.Arrays; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import java.util.function.Function; +import java.util.stream.Stream; /** * Test functions of SCMCommonPlacementPolicy. @@ -51,25 +67,426 @@ public void setup() { @Test public void testGetResultSet() throws SCMException { DummyPlacementPolicy dummyPlacementPolicy = - new DummyPlacementPolicy(nodeManager, conf); - List list = - nodeManager.getNodes(NodeStatus.inServiceHealthy()); + new DummyPlacementPolicy(nodeManager, conf, 5); + List list = nodeManager.getAllNodes(); List result = dummyPlacementPolicy.getResultSet(3, list); Set resultSet = new HashSet<>(result); Assertions.assertNotEquals(1, resultSet.size()); } + private Set testReplicasToFixMisreplication( + List replicas, + DummyPlacementPolicy placementPolicy, + int expectedNumberOfReplicasToCopy, + Map expectedNumberOfCopyOperationFromRack) { + return testReplicasToFixMisreplication(replicas.stream().distinct().collect( + Collectors.toMap(Function.identity(), r -> true)), placementPolicy, + expectedNumberOfReplicasToCopy, + expectedNumberOfCopyOperationFromRack); + } + + private Set testReplicasToFixMisreplication( + Map replicas, + DummyPlacementPolicy placementPolicy, + int expectedNumberOfReplicasToCopy, + Map expectedNumberOfCopyOperationFromRack) { + Set replicasToCopy = placementPolicy + .replicasToCopyToFixMisreplication(replicas); + Assertions.assertEquals(expectedNumberOfReplicasToCopy, + replicasToCopy.size()); + Map rackCopyMap = + replicasToCopy.stream().collect(Collectors.groupingBy( + replica -> placementPolicy + .getPlacementGroup(replica.getDatanodeDetails()), + Collectors.counting())); + Set racks = replicas.keySet().stream() + .map(ContainerReplica::getDatanodeDetails) + .map(placementPolicy::getPlacementGroup) + .collect(Collectors.toSet()); + for (Node rack: racks) { + Assertions.assertEquals( + expectedNumberOfCopyOperationFromRack.getOrDefault(rack, 0), + rackCopyMap.getOrDefault(rack, 0L).intValue()); + } + return replicasToCopy; + } + + @Test + public void testReplicasToFixMisreplicationWithOneMisreplication() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 5); + List racks = dummyPlacementPolicy.racks; + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 1, 2, 3, 5) + .map(list::get).collect(Collectors.toList()); + List replicas = + HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + CLOSED, 0, 0, 0, replicaDns); + testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 1, + ImmutableMap.of(racks.get(0), 1)); + } + + @Test + public void testReplicasToFixMisreplicationWithTwoMisreplication() { + DummyPlacementPolicy dummyPlacementPolicy = new DummyPlacementPolicy( + nodeManager, conf, + GenericTestUtils.getReverseMap( + ImmutableMap.of(0, ImmutableList.of(0, 1, 5), + 1, ImmutableList.of(6), + 2, ImmutableList.of(2, 7), + 3, ImmutableList.of(3, 8), + 4, ImmutableList.of(4, 9))), 5); + List racks = dummyPlacementPolicy.racks; + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 1, 2, 3, 5) + .map(list::get).collect(Collectors.toList()); + List replicas = + HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + CLOSED, 0, 0, 0, replicaDns); + testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 2, + ImmutableMap.of(racks.get(0), 2)); + } + + @Test + public void testReplicasToFixMisreplicationWithThreeMisreplication() { + DummyPlacementPolicy dummyPlacementPolicy = new DummyPlacementPolicy( + nodeManager, conf, + GenericTestUtils.getReverseMap( + ImmutableMap.of(0, ImmutableList.of(0, 1, 2, 5), + 1, ImmutableList.of(6), + 2, ImmutableList.of(7), + 3, ImmutableList.of(3, 8), + 4, ImmutableList.of(4, 9))), 5); + List racks = dummyPlacementPolicy.racks; + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 1, 2, 3, 5) + .map(list::get).collect(Collectors.toList()); + List replicas = + HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + CLOSED, 0, 0, 0, replicaDns); + testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 3, + ImmutableMap.of(racks.get(0), 3)); + } + + @Test + public void + testReplicasToFixMisreplicationWithThreeMisreplicationOnDifferentRack() { + DummyPlacementPolicy dummyPlacementPolicy = new DummyPlacementPolicy( + nodeManager, conf, + GenericTestUtils.getReverseMap( + ImmutableMap.of(0, ImmutableList.of(0, 1, 2, 5), + 1, ImmutableList.of(6), + 2, ImmutableList.of(7), + 3, ImmutableList.of(3, 4, 8), + 4, ImmutableList.of(9))), 5); + List racks = dummyPlacementPolicy.racks; + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 1, 2, 3, 4) + .map(list::get).collect(Collectors.toList()); + //Creating Replicas without replica Index + List replicas = HddsTestUtils + .getReplicas(new ContainerID(1), CLOSED, 0, replicaDns); + testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 3, + ImmutableMap.of(racks.get(0), 2, racks.get(3), 1)); + } + + @Test + public void + testReplicasToFixMisreplicationWithReplicationFactorLessThanNumberOfRack( + ) { + DummyPlacementPolicy dummyPlacementPolicy = new DummyPlacementPolicy( + nodeManager, conf, + GenericTestUtils.getReverseMap( + ImmutableMap.of(0, ImmutableList.of(0, 1, 5), + 1, ImmutableList.of(6), + 2, ImmutableList.of(2, 7), + 3, ImmutableList.of(3, 4, 8), + 4, ImmutableList.of(9))), 5); + List racks = dummyPlacementPolicy.racks; + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 1, 3, 4) + .map(list::get).collect(Collectors.toList()); + //Creating Replicas without replica Index for replicas < number of racks + List replicas = HddsTestUtils + .getReplicas(new ContainerID(1), CLOSED, 0, replicaDns); + testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 2, + ImmutableMap.of(racks.get(0), 1, racks.get(3), 1)); + } + + @Test + public void + testReplicasToFixMisreplicationWithReplicationFactorMoreThanNumberOfRack( + ) { + DummyPlacementPolicy dummyPlacementPolicy = new DummyPlacementPolicy( + nodeManager, conf, + GenericTestUtils.getReverseMap( + ImmutableMap.of(0, ImmutableList.of(0, 1, 2, 5), + 1, ImmutableList.of(6), + 2, ImmutableList.of(7), + 3, ImmutableList.of(3, 4, 8), + 4, ImmutableList.of(9))), 5); + List racks = dummyPlacementPolicy.racks; + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 1, 2, 3, 4, 6) + .map(list::get).collect(Collectors.toList()); + //Creating Replicas without replica Index for replicas >number of racks + List replicas = HddsTestUtils + .getReplicas(new ContainerID(1), CLOSED, 0, replicaDns); + testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 2, + ImmutableMap.of(racks.get(0), 1, racks.get(3), 1)); + } + + @Test + public void testReplicasToFixMisreplicationMaxReplicaPerRack() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 2); + List racks = dummyPlacementPolicy.racks; + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 2, 4, 6, 8) + .map(list::get).collect(Collectors.toList()); + List replicas = + HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + CLOSED, 0, 0, 0, replicaDns); + testReplicasToFixMisreplication(replicas, dummyPlacementPolicy, 2, + ImmutableMap.of(racks.get(0), 2)); + } + + @Test + public void + testReplicasToFixMisreplicationMaxReplicaPerRackWithUncopyableReplicas() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 2); + List racks = dummyPlacementPolicy.racks; + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 2, 4, 6, 8) + .map(list::get).collect(Collectors.toList()); + List replicas = + HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + CLOSED, 0, 0, 0, replicaDns); + Map replicaMap = replicas.stream().distinct() + .collect(Collectors.toMap(Function.identity(), r -> false)); + replicaMap.put(replicas.get(0), true); + Assertions.assertEquals(testReplicasToFixMisreplication(replicaMap, + dummyPlacementPolicy, 1, + ImmutableMap.of(racks.get(0), 1)), + Sets.newHashSet(replicas.get(0))); + } + + @Test + public void testReplicasWithoutMisreplication() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 5); + List list = nodeManager.getAllNodes(); + List replicaDns = Stream.of(0, 1, 2, 3, 4) + .map(list::get).collect(Collectors.toList()); + Map replicas = + HddsTestUtils.getReplicasWithReplicaIndex(new ContainerID(1), + CLOSED, 0, 0, 0, replicaDns) + .stream() + .collect(Collectors.toMap(Function.identity(), r -> true)); + Set replicasToCopy = dummyPlacementPolicy + .replicasToCopyToFixMisreplication(replicas); + Assertions.assertEquals(0, replicasToCopy.size()); + } + + @Test + public void testReplicasToRemoveWithOneOverreplication() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 5); + List list = nodeManager.getAllNodes(); + Set replicas = Sets.newHashSet( + HddsTestUtils.getReplicasWithReplicaIndex( + new ContainerID(1), CLOSED, 0, 0, 0, list.subList(1, 6))); + ContainerReplica replica = ContainerReplica.newBuilder() + .setContainerID(new ContainerID(1)) + .setContainerState(CLOSED) + .setReplicaIndex(1) + .setDatanodeDetails(list.get(7)).build(); + replicas.add(replica); + + Set replicasToRemove = dummyPlacementPolicy + .replicasToRemoveToFixOverreplication(replicas, 1); + Assertions.assertEquals(replicasToRemove.size(), 1); + Assertions.assertEquals(replicasToRemove.toArray()[0], replica); + } + + @Test + public void testReplicasToRemoveWithTwoOverreplication() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 5); + List list = nodeManager.getAllNodes(); + + Set replicas = Sets.newHashSet( + HddsTestUtils.getReplicasWithReplicaIndex( + new ContainerID(1), CLOSED, 0, 0, 0, list.subList(1, 6))); + + Set replicasToBeRemoved = Sets.newHashSet( + HddsTestUtils.getReplicasWithReplicaIndex( + new ContainerID(1), CLOSED, 0, 0, 0, list.subList(7, 9))); + replicas.addAll(replicasToBeRemoved); + + Set replicasToRemove = dummyPlacementPolicy + .replicasToRemoveToFixOverreplication(replicas, 1); + Assertions.assertEquals(replicasToRemove.size(), 2); + Assertions.assertEquals(replicasToRemove, replicasToBeRemoved); + } + + @Test + public void testReplicasToRemoveWith2CountPerUniqueReplica() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 3); + List list = nodeManager.getAllNodes(); + + Set replicas = Sets.newHashSet( + HddsTestUtils.getReplicasWithReplicaIndex( + new ContainerID(1), CLOSED, 0, 0, 0, list.subList(0, 3))); + replicas.addAll(HddsTestUtils.getReplicasWithReplicaIndex( + new ContainerID(1), CLOSED, 0, 0, 0, list.subList(3, 6))); + Set replicasToBeRemoved = Sets.newHashSet( + HddsTestUtils.getReplicaBuilder(new ContainerID(1), CLOSED, 0, 0, 0, + list.get(7).getUuid(), list.get(7)) + .setReplicaIndex(1).build(), + HddsTestUtils.getReplicaBuilder(new ContainerID(1), CLOSED, 0, 0, 0, + list.get(8).getUuid(), list.get(8)).setReplicaIndex(1) + .build()); + replicas.addAll(replicasToBeRemoved); + + Set replicasToRemove = dummyPlacementPolicy + .replicasToRemoveToFixOverreplication(replicas, 2); + Assertions.assertEquals(replicasToRemove.size(), 2); + Assertions.assertEquals(replicasToRemove, replicasToBeRemoved); + } + + @Test + public void testReplicasToRemoveWithoutReplicaIndex() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 3); + List list = nodeManager.getAllNodes(); + + Set replicas = Sets.newHashSet(HddsTestUtils.getReplicas( + new ContainerID(1), CLOSED, 0, list.subList(0, 5))); + + Set replicasToRemove = dummyPlacementPolicy + .replicasToRemoveToFixOverreplication(replicas, 3); + Assertions.assertEquals(replicasToRemove.size(), 2); + Set racksToBeRemoved = Arrays.asList(0, 1).stream() + .map(dummyPlacementPolicy.racks::get).collect(Collectors.toSet()); + Assertions.assertEquals(replicasToRemove.stream() + .map(ContainerReplica::getDatanodeDetails) + .map(dummyPlacementPolicy::getPlacementGroup) + .collect(Collectors.toSet()), racksToBeRemoved); + } + + @Test + public void testReplicasToRemoveWithOverreplicationWithinSameRack() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 3); + List list = nodeManager.getAllNodes(); + + Set replicas = Sets.newHashSet( + HddsTestUtils.getReplicasWithReplicaIndex( + new ContainerID(1), CLOSED, 0, 0, 0, list.subList(1, 6))); + + ContainerReplica replica1 = ContainerReplica.newBuilder() + .setContainerID(new ContainerID(1)) + .setContainerState(CLOSED) + .setReplicaIndex(1) + .setDatanodeDetails(list.get(6)).build(); + replicas.add(replica1); + ContainerReplica replica2 = ContainerReplica.newBuilder() + .setContainerID(new ContainerID(1)) + .setContainerState(CLOSED) + .setReplicaIndex(1) + .setDatanodeDetails(list.get(0)).build(); + replicas.add(replica2); + + Set replicasToRemove = dummyPlacementPolicy + .replicasToRemoveToFixOverreplication(replicas, 1); + Map removedReplicasRackCntMap = replicasToRemove.stream() + .map(ContainerReplica::getDatanodeDetails) + .map(dummyPlacementPolicy::getPlacementGroup) + .collect(Collectors.groupingBy(Function.identity(), + Collectors.counting())); + Assertions.assertEquals(replicasToRemove.size(), 2); + Assertions.assertTrue(Sets.newHashSet(1L, 2L).contains( + removedReplicasRackCntMap.get(dummyPlacementPolicy.racks.get(0)))); + Assertions.assertEquals( + removedReplicasRackCntMap.get(dummyPlacementPolicy.racks.get(1)), + removedReplicasRackCntMap.get(dummyPlacementPolicy.racks.get(0)) + == 2 ? 0 : 1); + } + + @Test + public void testReplicasToRemoveWithNoOverreplication() { + DummyPlacementPolicy dummyPlacementPolicy = + new DummyPlacementPolicy(nodeManager, conf, 5); + List list = nodeManager.getAllNodes(); + Set replicas = Sets.newHashSet( + HddsTestUtils.getReplicasWithReplicaIndex( + new ContainerID(1), CLOSED, 0, 0, 0, list.subList(1, 6))); + + Set replicasToRemove = dummyPlacementPolicy + .replicasToRemoveToFixOverreplication(replicas, 1); + Assertions.assertEquals(replicasToRemove.size(), 0); + } + + + private static class DummyPlacementPolicy extends SCMCommonPlacementPolicy { + private Map rackMap; + private List racks; + private int rackCnt; + - DummyPlacementPolicy( - NodeManager nodeManager, - ConfigurationSource conf) { + /** + * Creates Dummy Placement Policy with dn index to rack Mapping + * in round robin fashion (rack Index = dn Index % total number of racks). + * @param nodeManager + * @param conf + * @param rackCnt + */ + DummyPlacementPolicy(NodeManager nodeManager, ConfigurationSource conf, + int rackCnt) { + this(nodeManager, conf, + IntStream.range(0, nodeManager.getAllNodes().size()).boxed() + .collect(Collectors.toMap(Function.identity(), + idx -> idx % rackCnt)), rackCnt); + } + + /** + * Creates Dummy Placement Policy with dn index -> rack index mapping. + * @param nodeManager + * @param conf + * @param rackCnt + */ + DummyPlacementPolicy(NodeManager nodeManager, ConfigurationSource conf, + Map datanodeRackMap, int rackCnt) { super(nodeManager, conf); + this.rackCnt = rackCnt; + this.racks = IntStream.range(0, rackCnt) + .mapToObj(i -> Mockito.mock(Node.class)).collect(Collectors.toList()); + List datanodeDetails = nodeManager.getAllNodes(); + rackMap = datanodeRackMap.entrySet().stream() + .collect(Collectors.toMap( + entry -> datanodeDetails.get(entry.getKey()), + entry -> racks.get(entry.getValue()))); } + + @Override public DatanodeDetails chooseNode(List healthyNodes) { return healthyNodes.get(0); } + + @Override + protected Node getPlacementGroup(DatanodeDetails dn) { + return rackMap.get(dn); + } + + @Override + protected int getRequiredRackCount(int numReplicas) { + return Math.min(numReplicas, rackCnt); + } } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java index 908a21e162d0..5ac797d11c6c 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.block; import java.io.IOException; +import java.time.Clock; import java.time.ZoneId; import java.nio.file.Path; import java.time.ZoneOffset; @@ -70,7 +71,6 @@ import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.CreatePipelineCommand; @@ -143,7 +143,7 @@ public void setUp(@TempDir Path tempDir) throws Exception { eventQueue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, @@ -157,7 +157,7 @@ public void setUp(@TempDir Path tempDir) throws Exception { pipelineManager, scmMetadataStore.getContainerTable(), new ContainerReplicaPendingOps(conf, - new MonotonicClock(ZoneId.systemDefault()))); + Clock.system(ZoneId.systemDefault()))); SCMSafeModeManager safeModeManager = new SCMSafeModeManager(conf, containerManager.getContainers(), containerManager, pipelineManager, eventQueue, serviceManager, scmContext) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java index c35cb2b4551f..732cf208b97d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java @@ -19,7 +19,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; @@ -31,6 +34,7 @@ import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.HddsTestUtils; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl; @@ -176,7 +180,8 @@ public void testECPolicy() throws IOException { /** * A dummy container placement implementation for test. */ - public static class DummyImpl implements PlacementPolicy { + public static class DummyImpl implements + PlacementPolicy { @Override public List chooseDatanodes( List usedNodes, @@ -191,6 +196,19 @@ public List chooseDatanodes( validateContainerPlacement(List dns, int replicas) { return new ContainerPlacementStatusDefault(1, 1, 1); } + + @Override + public Set replicasToCopyToFixMisreplication( + Map replicas) { + return Collections.emptySet(); + } + + @Override + public Set replicasToRemoveToFixOverreplication( + Set replicas, int expectedCountPerUniqueReplica) { + return null; + } + } @Test diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementStatusDefault.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementStatusDefault.java index de69b013657a..74f083e8da47 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementStatusDefault.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementStatusDefault.java @@ -19,6 +19,9 @@ package org.apache.hadoop.hdds.scm.container.placement.algorithms; import org.junit.jupiter.api.Test; + +import java.util.Arrays; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -48,6 +51,10 @@ public void testPlacementSatisfiedCorrectly() { stat = new ContainerPlacementStatusDefault(3, 2, 3); assertTrue(stat.isPolicySatisfied()); assertEquals(0, stat.misReplicationCount()); + + stat = new ContainerPlacementStatusDefault(3, 2, 3); + assertTrue(stat.isPolicySatisfied()); + assertEquals(0, stat.misReplicationCount()); } @Test @@ -60,11 +67,24 @@ public void testPlacementNotSatisfied() { // Zero rack, but need 2 - shouldn't really happen in practice stat = new ContainerPlacementStatusDefault(0, 2, 1); assertFalse(stat.isPolicySatisfied()); - assertEquals(2, stat.misReplicationCount()); + assertEquals(1, stat.misReplicationCount()); stat = new ContainerPlacementStatusDefault(2, 3, 3); assertFalse(stat.isPolicySatisfied()); assertEquals(1, stat.misReplicationCount()); + + stat = new ContainerPlacementStatusDefault(2, 4, 3, 1, Arrays.asList(1, 3)); + assertFalse(stat.isPolicySatisfied()); + assertEquals(2, stat.misReplicationCount()); + + stat = new ContainerPlacementStatusDefault(1, 4, 3, 1, Arrays.asList(1, 2)); + assertFalse(stat.isPolicySatisfied()); + assertEquals(2, stat.misReplicationCount()); + + stat = new ContainerPlacementStatusDefault(2, 2, 3, 2, Arrays.asList(3, 1)); + assertFalse(stat.isPolicySatisfied()); + assertEquals(1, stat.misReplicationCount()); + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java index 46bf8031effc..815c70ffb3d4 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java @@ -58,6 +58,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY; +import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.FAILED_TO_FIND_HEALTHY_NODES; import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA; import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA; @@ -245,20 +247,34 @@ public void chooseNodeWithNoExcludedNodes(int datanodeCount) nodeNum = 5; if (datanodeCount > nodeNum) { assumeTrue(datanodeCount >= NODE_PER_RACK); - datanodeDetails = policy.chooseDatanodes(null, null, nodeNum, 0, 15); - Assertions.assertEquals(nodeNum, datanodeDetails.size()); - Assertions.assertEquals(getRackSize(datanodeDetails), - Math.min(nodeNum, rackNum)); + if (datanodeCount == 6) { + int finalNodeNum = nodeNum; + SCMException e = assertThrows(SCMException.class, + () -> policy.chooseDatanodes(null, null, finalNodeNum, 0, 15)); + assertEquals(FAILED_TO_FIND_HEALTHY_NODES, e.getResult()); + } else { + datanodeDetails = policy.chooseDatanodes(null, null, nodeNum, 0, 15); + Assertions.assertEquals(nodeNum, datanodeDetails.size()); + Assertions.assertEquals(getRackSize(datanodeDetails), + Math.min(nodeNum, rackNum)); + } } // 10 replicas nodeNum = 10; if (datanodeCount > nodeNum) { assumeTrue(datanodeCount > 2 * NODE_PER_RACK); - datanodeDetails = policy.chooseDatanodes(null, null, nodeNum, 0, 15); - Assertions.assertEquals(nodeNum, datanodeDetails.size()); - Assertions.assertEquals(getRackSize(datanodeDetails), - Math.min(nodeNum, rackNum)); + if (datanodeCount == 11) { + int finalNodeNum = nodeNum; + SCMException e = assertThrows(SCMException.class, + () -> policy.chooseDatanodes(null, null, finalNodeNum, 0, 15)); + assertEquals(FAILED_TO_FIND_HEALTHY_NODES, e.getResult()); + } else { + datanodeDetails = policy.chooseDatanodes(null, null, nodeNum, 0, 15); + Assertions.assertEquals(nodeNum, datanodeDetails.size()); + Assertions.assertEquals(getRackSize(datanodeDetails), + Math.min(nodeNum, rackNum)); + } } } @@ -314,11 +330,20 @@ public void chooseNodeWithExcludedNodes(int datanodeCount) totalNum = 5; excludedNodes.clear(); excludedNodes.add(datanodes.get(0)); - datanodeDetails = policy.chooseDatanodes( - excludedNodes, null, nodeNum, 0, 15); - Assertions.assertEquals(nodeNum, datanodeDetails.size()); - Assertions.assertEquals(getRackSize(datanodeDetails, excludedNodes), - Math.min(totalNum, rackNum)); + if (datanodeCount == 6) { + int finalNodeNum = nodeNum; + SCMException e = assertThrows(SCMException.class, + () -> policy.chooseDatanodes(excludedNodes, null, + finalNodeNum, 0, 15)); + assertEquals(FAILED_TO_FIND_HEALTHY_NODES, e.getResult()); + } else { + datanodeDetails = policy.chooseDatanodes( + excludedNodes, null, nodeNum, 0, 15); + Assertions.assertEquals(nodeNum, datanodeDetails.size()); + Assertions.assertEquals(getRackSize(datanodeDetails, excludedNodes), + Math.min(totalNum, rackNum)); + } + // 5 replicas, two existing datanodes on different rack nodeNum = 3; @@ -344,7 +369,9 @@ public void chooseNodeWithExcludedNodes(int datanodeCount) SCMException e = assertThrows(SCMException.class, () -> policy.chooseDatanodes(excludedNodes, null, 3, 0, 15)); String message = e.getMessage(); - assumeTrue(message.contains("ContainerPlacementPolicy not met")); + assertTrue(message.contains("Chosen nodes size from Unique Racks: 1," + + " but required nodes to choose from Unique Racks: " + + "2 do not match.")); } else { datanodeDetails = policy.chooseDatanodes( excludedNodes, null, nodeNum, 0, 15); @@ -530,6 +557,45 @@ private void assertPlacementPolicySatisfied(List usedDns, assertEquals(misReplication, stat.misReplicationCount()); } + @Test + public void testPipelineProviderRackScatter() throws SCMException { + setup(3, 1); + conf.set(OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY, + SCMContainerPlacementRackScatter.class.getCanonicalName()); + List usedDns = new ArrayList<>(); + List excludedDns = new ArrayList<>(); + List additionalNodes = policy.chooseDatanodes(usedDns, + excludedDns, null, 3, 0, 5); + assertPlacementPolicySatisfied(usedDns, additionalNodes, excludedDns, 3, + true, 0); + } + + // Test for pipeline provider placement when number of racks less than + // number of node required and nodes cannot be scattered. In this case + // the placement spreads the nodes as much as possible. In one case + // 3 nodes required and 2 racks placing 2 in one 1 in another. When + // only 1 rack placing all nodes in same rack. + @Test + public void testPipelineProviderRackScatterFallback() throws SCMException { + setup(3, 2); + conf.set(OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY, + SCMContainerPlacementRackScatter.class.getCanonicalName()); + List usedDns = new ArrayList<>(); + List excludedDns = new ArrayList<>(); + List additionalNodes = policy.chooseDatanodes(usedDns, + excludedDns, null, 3, 0, 5); + assertPlacementPolicySatisfied(usedDns, additionalNodes, excludedDns, 3, + true, 0); + + setup(3, 3); + additionalNodes = policy.chooseDatanodes(usedDns, + excludedDns, null, 3, 0, 5); + assertPlacementPolicySatisfied(usedDns, additionalNodes, excludedDns, 3, + true, 0); + } + + // add test for pipeline engagement + @Test public void testValidChooseNodesWithUsedNodes() throws SCMException { setup(5, 2); @@ -567,7 +633,7 @@ public void testInValidChooseNodesWithUsedNodesWithInsufficientRacks() { assertEquals("Chosen nodes size from Unique Racks: 1, but required " + "nodes to choose from Unique Racks: 2 do not match.", exception.getMessage()); - assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_HEALTHY_NODES, + assertEquals(FAILED_TO_FIND_HEALTHY_NODES, exception.getResult()); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java index f87d2851586a..4960f89d2fd0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.net.Node; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; @@ -198,6 +199,9 @@ public static Set createReplicas( public static PlacementPolicy getSimpleTestPlacementPolicy( final NodeManager nodeManager, final OzoneConfiguration conf) { + + final Node rackNode = MockDatanodeDetails.randomDatanodeDetails(); + return new SCMCommonPlacementPolicy(nodeManager, conf) { @Override protected List chooseDatanodesInternal( @@ -216,6 +220,12 @@ protected List chooseDatanodesInternal( public DatanodeDetails chooseNode(List healthyNodes) { return null; } + + @Override + protected Node getPlacementGroup(DatanodeDetails dn) { + // Make it look like a single rack cluster + return rackNode; + } }; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java index a86eb5ac7416..562b2b13697c 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java @@ -244,8 +244,8 @@ public void testReplicationMetrics() { pendingOps.removeExpiredEntries(1000); // Two Delete and Replication command should be timeout - Assertions.assertEquals(metrics.getEcReplicationCmdsTimeoutTotal(), 2); - Assertions.assertEquals(metrics.getEcDeletionCmdsTimeoutTotal(), 2); + Assertions.assertEquals(metrics.getEcReplicaCreateTimeoutTotal(), 2); + Assertions.assertEquals(metrics.getEcReplicaDeleteTimeoutTotal(), 2); pendingOps.scheduleDeleteReplica(new ContainerID(3), dn1, 2); pendingOps.scheduleAddReplica(new ContainerID(3), dn1, 3); @@ -262,8 +262,8 @@ public void testReplicationMetrics() { pendingOps.completeDeleteReplica(new ContainerID(4), dn2, 2); pendingOps.completeAddReplica(new ContainerID(4), dn3, 4); - Assertions.assertEquals(metrics.getEcReplicationCmdsCompletedTotal(), 2); - Assertions.assertEquals(metrics.getEcDeletionCmdsCompletedTotal(), 2); + Assertions.assertEquals(metrics.getEcReplicasCreatedTotal(), 2); + Assertions.assertEquals(metrics.getEcReplicasDeletedTotal(), 2); pendingOps.completeDeleteReplica(new ContainerID(3), dn1, 2); pendingOps.completeAddReplica(new ContainerID(2), dn1, 3); @@ -273,4 +273,81 @@ public void testReplicationMetrics() { Assertions.assertEquals(0, pendingOps.getPendingOpCount(DELETE)); } + /** + * Tests that registered subscribers are notified about completed and expired + * ops. + */ + @Test + public void testNotifySubscribers() { + // register subscribers + ContainerReplicaPendingOpsSubscriber subscriber1 = Mockito.mock( + ContainerReplicaPendingOpsSubscriber.class); + ContainerReplicaPendingOpsSubscriber subscriber2 = Mockito.mock( + ContainerReplicaPendingOpsSubscriber.class); + pendingOps.registerSubscriber(subscriber1); + pendingOps.registerSubscriber(subscriber2); + + // schedule an ADD and a DELETE + ContainerID containerID = new ContainerID(1); + pendingOps.scheduleAddReplica(containerID, dn1, 0); + ContainerReplicaOp addOp = pendingOps.getPendingOps(containerID).get(0); + pendingOps.scheduleDeleteReplica(containerID, dn1, 0); + + // complete the ADD and verify that subscribers were notified + pendingOps.completeAddReplica(containerID, dn1, 0); + Mockito.verify(subscriber1, Mockito.times(1)).opCompleted(addOp, + containerID, false); + Mockito.verify(subscriber2, Mockito.times(1)).opCompleted(addOp, + containerID, false); + + // complete the DELETE and verify subscribers were notified + ContainerReplicaOp deleteOp = pendingOps.getPendingOps(containerID).get(0); + pendingOps.completeDeleteReplica(containerID, dn1, 0); + Mockito.verify(subscriber1, Mockito.times(1)).opCompleted(deleteOp, + containerID, false); + Mockito.verify(subscriber2, Mockito.times(1)).opCompleted(deleteOp, + containerID, false); + + // now, test notification on expiration + pendingOps.scheduleDeleteReplica(containerID, dn1, 0); + pendingOps.scheduleAddReplica(containerID, dn2, 0); + for (ContainerReplicaOp op : pendingOps.getPendingOps(containerID)) { + if (op.getOpType() == ADD) { + addOp = op; + } else { + deleteOp = op; + } + } + clock.fastForward(1000); + pendingOps.removeExpiredEntries(500); + // the clock is at 1000 and commands expired at 500 + Mockito.verify(subscriber1, Mockito.times(1)).opCompleted(addOp, + containerID, true); + Mockito.verify(subscriber1, Mockito.times(1)).opCompleted(deleteOp, + containerID, true); + Mockito.verify(subscriber2, Mockito.times(1)).opCompleted(addOp, + containerID, true); + Mockito.verify(subscriber2, Mockito.times(1)).opCompleted(deleteOp, + containerID, true); + } + + @Test + public void subscribersShouldNotBeNotifiedWhenOpsHaveNotExpired() { + ContainerID containerID = new ContainerID(1); + + // schedule ops + pendingOps.scheduleDeleteReplica(containerID, dn1, 0); + pendingOps.scheduleAddReplica(containerID, dn2, 0); + + // register subscriber + ContainerReplicaPendingOpsSubscriber subscriber1 = Mockito.mock( + ContainerReplicaPendingOpsSubscriber.class); + pendingOps.registerSubscriber(subscriber1); + + clock.fastForward(1000); + pendingOps.removeExpiredEntries(5000); + // no entries have expired, so there should be zero interactions with the + // subscriber + Mockito.verifyZeroInteractions(subscriber1); + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java index cf844f2845c5..8ea24af0f6e7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java @@ -42,6 +42,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica; /** @@ -107,7 +109,7 @@ public void testContainerMissingReplicaDueToPendingDelete() { public void testUnderReplicationDueToUnhealthyReplica() { Set replicas = ReplicationTestUtil.createReplicas(container.containerID(), - ContainerReplicaProto.State.CLOSED, 1, 2, 3, 4); + CLOSED, 1, 2, 3, 4); ContainerReplica unhealthyIndex5 = createContainerReplica(container.containerID(), 5, IN_SERVICE, ContainerReplicaProto.State.UNHEALTHY); @@ -572,4 +574,77 @@ public void testDecommissioningOnlyIndexes() { Assertions .assertEquals(ImmutableSet.of(), rcnt.decommissioningOnlyIndexes(true)); } + + @Test + public void testSufficientlyReplicatedForOffline() { + Set replica = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 2)); + + ContainerReplica inServiceReplica = + ReplicationTestUtil.createContainerReplica(container.containerID(), + 1, IN_SERVICE, CLOSED); + replica.add(inServiceReplica); + + ContainerReplica offlineReplica = + ReplicationTestUtil.createContainerReplica(container.containerID(), + 1, DECOMMISSIONING, CLOSED); + replica.add(offlineReplica); + + ContainerReplica offlineNotReplicated = + ReplicationTestUtil.createContainerReplica(container.containerID(), + 3, DECOMMISSIONING, CLOSED); + replica.add(offlineNotReplicated); + + ECContainerReplicaCount rcnt = + new ECContainerReplicaCount(container, replica, Collections.emptyList(), + 1); + Assertions.assertFalse(rcnt.isSufficientlyReplicated(false)); + Assertions.assertTrue(rcnt.isSufficientlyReplicatedForOffline( + offlineReplica.getDatanodeDetails())); + Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline( + offlineNotReplicated.getDatanodeDetails())); + + // A random DN not hosting a replica for this container should return false. + Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline( + MockDatanodeDetails.randomDatanodeDetails())); + + // Passing the IN_SERVICE node should return false even though the + // replica is on a healthy node + Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline( + inServiceReplica.getDatanodeDetails())); + } + + @Test + public void testSufficientlyReplicatedWithUnhealthyAndPendingDelete() { + Set replica = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), + Pair.of(IN_SERVICE, 2), + Pair.of(IN_SERVICE, 3), + Pair.of(IN_SERVICE, 4), + Pair.of(IN_SERVICE, 5)); + + ContainerReplica unhealthyReplica = + ReplicationTestUtil.createContainerReplica(container.containerID(), + 1, IN_SERVICE, UNHEALTHY); + replica.add(unhealthyReplica); + + List pendingOps = new ArrayList<>(); + pendingOps.add(ContainerReplicaOp.create( + ContainerReplicaOp.PendingOpType.DELETE, + unhealthyReplica.getDatanodeDetails(), + unhealthyReplica.getReplicaIndex())); + + ECContainerReplicaCount rcnt = + new ECContainerReplicaCount(container, replica, pendingOps, 1); + Assertions.assertTrue(rcnt.isSufficientlyReplicated(false)); + + // Add another pending delete to an index that is not an unhealthy index + pendingOps.add(ContainerReplicaOp.create( + ContainerReplicaOp.PendingOpType.DELETE, + MockDatanodeDetails.randomDatanodeDetails(), 2)); + + rcnt = new ECContainerReplicaCount(container, replica, pendingOps, 1); + Assertions.assertFalse(rcnt.isSufficientlyReplicated(false)); + Assertions.assertEquals(2, rcnt.unavailableIndexes(false).get(0)); + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECMisReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECMisReplicationHandler.java new file mode 100644 index 000000000000..3332fc4e004a --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECMisReplicationHandler.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.client.ECReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyList; + +/** + * Tests the ECMisReplicationHandling functionality. + */ +public class TestECMisReplicationHandler extends TestMisReplicationHandler { + private static final int DATA = 3; + private static final int PARITY = 2; + + + @BeforeEach + public void setup() { + ECReplicationConfig repConfig = new ECReplicationConfig(DATA, PARITY); + setup(repConfig); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3, 4, 5, 6, 7}) + public void testMisReplicationWithAllNodesAvailable(int misreplicationCount) + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2), + Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4), + Pair.of(IN_SERVICE, 5)); + testMisReplication(availableReplicas, Collections.emptyList(), + 0, misreplicationCount, Math.min(misreplicationCount, 5)); + } + + @Test + public void testMisReplicationWithNoNodesReturned() throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2), + Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4), + Pair.of(IN_SERVICE, 5)); + PlacementPolicy placementPolicy = Mockito.mock(PlacementPolicy.class); + ContainerPlacementStatus mockedContainerPlacementStatus = + Mockito.mock(ContainerPlacementStatus.class); + Mockito.when(mockedContainerPlacementStatus.isPolicySatisfied()) + .thenReturn(false); + Mockito.when(placementPolicy.validateContainerPlacement(anyList(), + anyInt())).thenReturn(mockedContainerPlacementStatus); + Mockito.when(placementPolicy.chooseDatanodes( + Mockito.any(), Mockito.any(), Mockito.any(), + Mockito.anyInt(), Mockito.anyLong(), Mockito.anyLong())) + .thenThrow(new IOException("No nodes found")); + Assertions.assertThrows(SCMException.class, () -> testMisReplication( + availableReplicas, placementPolicy, Collections.emptyList(), + 0, 2, 0)); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3, 4, 5, 6, 7}) + public void testMisReplicationWithSomeNodesNotInService( + int misreplicationCount) throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2), + Pair.of(IN_MAINTENANCE, 3), Pair.of(IN_MAINTENANCE, 4), + Pair.of(IN_SERVICE, 5)); + testMisReplication(availableReplicas, Collections.emptyList(), + 0, misreplicationCount, Math.min(misreplicationCount, 3)); + } + + @Test + public void testMisReplicationWithUndereplication() throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 2), + Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4), + Pair.of(IN_SERVICE, 5)); + testMisReplication(availableReplicas, Collections.emptyList(), 0, 1, 0); + } + + @Test + public void testMisReplicationWithOvereplication() throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 1), + Pair.of(IN_SERVICE, 2), Pair.of(IN_SERVICE, 3), + Pair.of(IN_SERVICE, 4), Pair.of(IN_SERVICE, 5)); + testMisReplication(availableReplicas, Collections.emptyList(), 0, 1, 0); + } + + @Test + public void testMisReplicationWithSatisfiedPlacementPolicy() + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2), + Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4), + Pair.of(IN_SERVICE, 5)); + PlacementPolicy placementPolicy = Mockito.mock(PlacementPolicy.class); + ContainerPlacementStatus mockedContainerPlacementStatus = + Mockito.mock(ContainerPlacementStatus.class); + Mockito.when(mockedContainerPlacementStatus.isPolicySatisfied()) + .thenReturn(true); + Mockito.when(placementPolicy.validateContainerPlacement(anyList(), + anyInt())).thenReturn(mockedContainerPlacementStatus); + testMisReplication(availableReplicas, placementPolicy, + Collections.emptyList(), 0, 1, 0); + } + + @Test + public void testMisReplicationWithPendingOps() + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2), + Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4), + Pair.of(IN_SERVICE, 5)); + PlacementPolicy placementPolicy = Mockito.mock(PlacementPolicy.class); + ContainerPlacementStatus mockedContainerPlacementStatus = + Mockito.mock(ContainerPlacementStatus.class); + Mockito.when(mockedContainerPlacementStatus.isPolicySatisfied()) + .thenReturn(true); + Mockito.when(placementPolicy.validateContainerPlacement(anyList(), + anyInt())).thenReturn(mockedContainerPlacementStatus); + List pendingOp = Collections.singletonList( + ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.ADD, + MockDatanodeDetails.randomDatanodeDetails(), 1)); + testMisReplication(availableReplicas, placementPolicy, + pendingOp, 0, 1, 0); + pendingOp = Collections.singletonList(ContainerReplicaOp + .create(ContainerReplicaOp.PendingOpType.DELETE, availableReplicas + .stream().findAny().get().getDatanodeDetails(), 1)); + testMisReplication(availableReplicas, placementPolicy, + pendingOp, 0, 1, 0); + } + + @Override + protected MisReplicationHandler getMisreplicationHandler( + PlacementPolicy placementPolicy, OzoneConfiguration conf, + NodeManager nodeManager) { + return new ECMisReplicationHandler(placementPolicy, conf, nodeManager); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECOverReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECOverReplicationHandler.java index 71eb164ec0ca..e882374a0940 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECOverReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECOverReplicationHandler.java @@ -19,16 +19,17 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.MockNodeManager; -import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault; import org.apache.hadoop.hdds.scm.net.NodeSchema; import org.apache.hadoop.hdds.scm.net.NodeSchemaManager; import org.apache.hadoop.hdds.scm.node.NodeManager; @@ -42,18 +43,20 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.DELETE; import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA; import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyList; /** * Tests the ECOverReplicationHandling functionality. @@ -64,14 +67,18 @@ public class TestECOverReplicationHandler { private NodeManager nodeManager; private OzoneConfiguration conf; private PlacementPolicy policy; - private PlacementPolicy placementPolicy; + private DatanodeDetails staleNode; @BeforeEach public void setup() { + staleNode = null; nodeManager = new MockNodeManager(true, 10) { @Override public NodeStatus getNodeStatus(DatanodeDetails dd) throws NodeNotFoundException { + if (staleNode != null && dd.equals(staleNode)) { + return NodeStatus.inServiceStale(); + } return NodeStatus.inServiceHealthy(); } }; @@ -84,10 +91,6 @@ public NodeStatus getNodeStatus(DatanodeDetails dd) NodeSchema[] schemas = new NodeSchema[] {ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA}; NodeSchemaManager.getInstance().init(schemas, true); - placementPolicy = Mockito.mock(PlacementPolicy.class); - Mockito.when(placementPolicy.validateContainerPlacement( - anyList(), anyInt())) - .thenReturn(new ContainerPlacementStatusDefault(2, 2, 3)); } @Test @@ -96,7 +99,89 @@ public void testNoOverReplication() { .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2), Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4), Pair.of(IN_SERVICE, 5)); - testOverReplicationWithIndexes(availableReplicas, Collections.emptyMap()); + testOverReplicationWithIndexes(availableReplicas, Collections.emptyMap(), + ImmutableList.of()); + } + + @Test + public void testOverReplicationFixedByPendingDelete() { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), + Pair.of(IN_SERVICE, 2), Pair.of(IN_SERVICE, 3), + Pair.of(IN_SERVICE, 4), Pair.of(IN_SERVICE, 5)); + ContainerReplica excess = ReplicationTestUtil.createContainerReplica( + container.containerID(), 5, IN_SERVICE, + ContainerReplicaProto.State.CLOSED); + availableReplicas.add(excess); + List pendingOps = new ArrayList(); + pendingOps.add(ContainerReplicaOp.create(DELETE, + excess.getDatanodeDetails(), 5)); + testOverReplicationWithIndexes(availableReplicas, Collections.emptyMap(), + pendingOps); + } + + @Test + public void testOverReplicationWithDecommissionIndexes() { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), + Pair.of(IN_SERVICE, 2), Pair.of(IN_SERVICE, 3), + Pair.of(IN_SERVICE, 4), Pair.of(IN_SERVICE, 5), + Pair.of(DECOMMISSIONING, 5)); + testOverReplicationWithIndexes(availableReplicas, Collections.emptyMap(), + ImmutableList.of()); + } + + @Test + public void testOverReplicationWithStaleIndexes() { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), + Pair.of(IN_SERVICE, 2), Pair.of(IN_SERVICE, 3), + Pair.of(IN_SERVICE, 4), Pair.of(IN_SERVICE, 5)); + ContainerReplica stale = ReplicationTestUtil.createContainerReplica( + container.containerID(), 5, IN_SERVICE, + ContainerReplicaProto.State.CLOSED); + availableReplicas.add(stale); + // By setting stale node, it makes the mocked nodeManager return a stale + // start for it when checked. + staleNode = stale.getDatanodeDetails(); + testOverReplicationWithIndexes(availableReplicas, Collections.emptyMap(), + ImmutableList.of()); + } + + @Test + public void testOverReplicationWithOpenReplica() { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), + Pair.of(IN_SERVICE, 2), Pair.of(IN_SERVICE, 3), + Pair.of(IN_SERVICE, 4), Pair.of(IN_SERVICE, 5)); + ContainerReplica open = ReplicationTestUtil.createContainerReplica( + container.containerID(), 5, IN_SERVICE, + ContainerReplicaProto.State.OPEN); + availableReplicas.add(open); + testOverReplicationWithIndexes(availableReplicas, Collections.emptyMap(), + ImmutableList.of()); + } + + /** + * This test mocks the placement policy so it returns invalid results. This + * should not happen, but it tests that commands are not sent for the wrong + * replica. + */ + @Test + public void testOverReplicationButPolicyReturnsWrongIndexes() { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 2), Pair.of(IN_SERVICE, 3), + Pair.of(IN_SERVICE, 4), Pair.of(IN_SERVICE, 5), + Pair.of(IN_SERVICE, 5)); + ContainerReplica toReturn = ReplicationTestUtil.createContainerReplica( + container.containerID(), 1, IN_SERVICE, + ContainerReplicaProto.State.CLOSED); + policy = Mockito.mock(PlacementPolicy.class); + Mockito.when(policy.replicasToRemoveToFixOverreplication( + Mockito.any(), Mockito.anyInt())) + .thenReturn(ImmutableSet.of(toReturn)); + testOverReplicationWithIndexes(availableReplicas, Collections.emptyMap(), + ImmutableList.of()); } @Test @@ -109,7 +194,8 @@ public void testOverReplicationWithOneSameIndexes() { testOverReplicationWithIndexes(availableReplicas, //num of index 1 is 3, but it should be 1, so 2 excess - new ImmutableMap.Builder().put(1, 2).build()); + new ImmutableMap.Builder().put(1, 2).build(), + ImmutableList.of()); } @Test @@ -127,7 +213,7 @@ public void testOverReplicationWithMultiSameIndexes() { //num of index 1 is 3, but it should be 1, so 2 excess new ImmutableMap.Builder() .put(1, 2).put(2, 2).put(3, 2).put(4, 1) - .put(5, 1).build()); + .put(5, 1).build(), ImmutableList.of()); } /** @@ -163,7 +249,8 @@ public void testOverReplicationWithUnderReplication() { private void testOverReplicationWithIndexes( Set availableReplicas, - Map index2excessNum) { + Map index2excessNum, + List pendingOps) { ECOverReplicationHandler ecORH = new ECOverReplicationHandler(policy, nodeManager); ContainerHealthResult.OverReplicatedHealthResult result = @@ -171,7 +258,7 @@ private void testOverReplicationWithIndexes( Mockito.when(result.getContainerInfo()).thenReturn(container); Map> commands = ecORH - .processAndCreateCommands(availableReplicas, ImmutableList.of(), + .processAndCreateCommands(availableReplicas, pendingOps, result, 1); // total commands send out should be equal to the sum of all diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java index 9aa5cec7e4e0..58989daa4895 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java @@ -68,6 +68,8 @@ import static org.junit.Assert.assertThrows; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.times; /** @@ -83,6 +85,7 @@ public class TestECUnderReplicationHandler { private static final int DATA = 3; private static final int PARITY = 2; private PlacementPolicy ecPlacementPolicy; + private int remainingMaintenanceRedundancy = 1; @BeforeEach public void setup() { @@ -477,6 +480,141 @@ public void testUnderAndOverReplication() throws IOException { } } + /** + * HDDS-7683 was a case where the maintenance logic was calling the placement + * policy requesting zero nodes. This test asserts that it is never called + * with zero nodes to ensure that issue is fixed. + */ + @Test + public void testMaintenanceDoesNotRequestZeroNodes() throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(DECOMMISSIONING, 1), Pair.of(IN_SERVICE, 2), + Pair.of(IN_MAINTENANCE, 3), Pair.of(IN_SERVICE, 4), + Pair.of(IN_SERVICE, 5)); + + Mockito.when(ecPlacementPolicy.chooseDatanodes(anyList(), Mockito.isNull(), + anyInt(), anyLong(), anyLong())) + .thenAnswer(invocationOnMock -> { + int numNodes = invocationOnMock.getArgument(2); + List targets = new ArrayList<>(); + for (int i = 0; i < numNodes; i++) { + targets.add(MockDatanodeDetails.randomDatanodeDetails()); + } + return targets; + }); + + ContainerHealthResult.UnderReplicatedHealthResult result = + Mockito.mock(ContainerHealthResult.UnderReplicatedHealthResult.class); + Mockito.when(result.getContainerInfo()).thenReturn(container); + ECUnderReplicationHandler handler = new ECUnderReplicationHandler( + ecPlacementPolicy, conf, nodeManager, replicationManager); + + Map> commands = + handler.processAndCreateCommands(availableReplicas, + Collections.emptyList(), result, 1); + Assertions.assertEquals(1, commands.size()); + Mockito.verify(ecPlacementPolicy, times(0)) + .chooseDatanodes(anyList(), Mockito.isNull(), eq(0), anyLong(), + anyLong()); + } + + /** + * Create 3 replicas with 1 pending ADD. This means that 1 replica needs to + * be reconstructed. The target DN selected for reconstruction should not be + * the DN pending add. + */ + @Test + public void testDatanodesPendingAddAreNotSelectedAsTargets() + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2), + Pair.of(IN_SERVICE, 3)); + DatanodeDetails dn = MockDatanodeDetails.randomDatanodeDetails(); + List pendingOps = ImmutableList.of( + ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.ADD, dn, 4)); + + /* + Mock the placement policy. If the list of nodes to be excluded does not + contain the DN pending ADD, then chooseDatanodes will return a list + containing that DN. Ensures the test will fail if excludeNodes does not + contain the DN pending ADD. + */ + Mockito.when(ecPlacementPolicy.chooseDatanodes(anyList(), Mockito.isNull(), + anyInt(), anyLong(), anyLong())) + .thenAnswer(invocationOnMock -> { + List excludeList = invocationOnMock.getArgument(0); + List targets = new ArrayList<>(1); + if (excludeList.contains(dn)) { + targets.add(MockDatanodeDetails.randomDatanodeDetails()); + } else { + targets.add(dn); + } + return targets; + }); + + ContainerHealthResult.UnderReplicatedHealthResult result = + Mockito.mock(ContainerHealthResult.UnderReplicatedHealthResult.class); + Mockito.when(result.getContainerInfo()).thenReturn(container); + ECUnderReplicationHandler handler = new ECUnderReplicationHandler( + ecPlacementPolicy, conf, nodeManager, replicationManager); + + Map> commands = + handler.processAndCreateCommands(availableReplicas, pendingOps, result, + 1); + Assertions.assertEquals(1, commands.size()); + Assertions.assertFalse(commands.containsKey(dn)); + } + + @Test + public void testDecommissioningIndexCopiedWhenContainerUnRecoverable() + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1)); + ContainerReplica decomReplica = ReplicationTestUtil.createContainerReplica( + container.containerID(), 2, DECOMMISSIONING, CLOSED); + availableReplicas.add(decomReplica); + Map> cmds = + testUnderReplicationWithMissingIndexes(Collections.emptyList(), + availableReplicas, 1, 0, policy); + Assertions.assertEquals(1, cmds.size()); + ReplicateContainerCommand cmd = + (ReplicateContainerCommand) cmds.values().iterator().next(); + + List sources = cmd.getSourceDatanodes(); + Assertions.assertEquals(1, sources.size()); + Assertions.assertEquals(decomReplica.getDatanodeDetails(), + cmd.getSourceDatanodes().get(0)); + } + + @Test + public void testMaintenanceIndexCopiedWhenContainerUnRecoverable() + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 1)); + ContainerReplica maintReplica = ReplicationTestUtil.createContainerReplica( + container.containerID(), 2, ENTERING_MAINTENANCE, CLOSED); + availableReplicas.add(maintReplica); + + Map> cmds = + testUnderReplicationWithMissingIndexes(Collections.emptyList(), + availableReplicas, 0, 1, policy); + Assertions.assertEquals(0, cmds.size()); + + // Change the remaining redundancy to ensure something needs copied. + remainingMaintenanceRedundancy = 2; + cmds = testUnderReplicationWithMissingIndexes(Collections.emptyList(), + availableReplicas, 0, 1, policy); + + Assertions.assertEquals(1, cmds.size()); + ReplicateContainerCommand cmd = + (ReplicateContainerCommand) cmds.values().iterator().next(); + + List sources = cmd.getSourceDatanodes(); + Assertions.assertEquals(1, sources.size()); + Assertions.assertEquals(maintReplica.getDatanodeDetails(), + cmd.getSourceDatanodes().get(0)); + } + public Map> testUnderReplicationWithMissingIndexes( List missingIndexes, Set availableReplicas, @@ -490,7 +628,6 @@ public void testUnderAndOverReplication() throws IOException { Mockito.when(result.isUnrecoverable()).thenReturn(false); Mockito.when(result.getContainerInfo()).thenReturn(container); - int remainingMaintenanceRedundancy = 1; Map> datanodeDetailsSCMCommandMap = ecURH .processAndCreateCommands(availableReplicas, ImmutableList.of(), result, remainingMaintenanceRedundancy); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java index 69ed758b4042..8c46db43ac3d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java @@ -20,7 +20,6 @@ import com.google.common.primitives.Longs; import org.apache.commons.io.FileUtils; -import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -71,7 +70,10 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.mockito.Mockito; import java.io.File; @@ -79,7 +81,9 @@ import java.time.Instant; import java.time.ZoneId; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -103,6 +107,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE; import static org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.apache.hadoop.hdds.scm.HddsTestUtils.CONTAINER_NUM_KEYS_DEFAULT; import static org.apache.hadoop.hdds.scm.HddsTestUtils.CONTAINER_USED_BYTES_DEFAULT; import static org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer; @@ -282,1726 +288,2132 @@ void createReplicationManager(ReplicationManagerConfiguration rmConf, } @AfterEach - public void tearDown() throws Exception { + public void teardown() throws Exception { containerStateManager.close(); + replicationManager.stop(); if (dbStore != null) { dbStore.close(); } - - FileUtil.fullyDelete(testDir); - } - - /** - * Checks if restarting of replication manager works. - */ - @Test - public void testReplicationManagerRestart() throws InterruptedException { - Assertions.assertTrue(replicationManager.isRunning()); - replicationManager.stop(); - // Stop is a non-blocking call, it might take sometime for the - // ReplicationManager to shutdown - Thread.sleep(500); - Assertions.assertFalse(replicationManager.isRunning()); - replicationManager.start(); - Assertions.assertTrue(replicationManager.isRunning()); - } - - /** - * Open containers are not handled by ReplicationManager. - * This test-case makes sure that ReplicationManages doesn't take - * any action on OPEN containers. - */ - @Test - public void testOpenContainer() throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.OPEN); - containerStateManager.addContainer(container.getProtobuf()); - replicationManager.processAll(); - eventQueue.processAll(1000); - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.OPEN)); - Assertions.assertEquals(0, datanodeCommandHandler.getInvocation()); + FileUtils.deleteDirectory(testDir); } - /** - * If the container is in CLOSING state we resend close container command - * to all the datanodes. - */ - @Test - public void testClosingContainer() throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.CLOSING); - final ContainerID id = container.containerID(); - - containerStateManager.addContainer(container.getProtobuf()); - - // Two replicas in CLOSING state - final Set replicas = getReplicas(id, State.CLOSING, - randomDatanodeDetails(), - randomDatanodeDetails()); - - // One replica in OPEN state - final DatanodeDetails datanode = randomDatanodeDetails(); - replicas.addAll(getReplicas(id, State.OPEN, datanode)); - - for (ContainerReplica replica : replicas) { - containerStateManager.updateContainerReplica(id, replica); + @Nested + class Misc { + /** + * Checks if restarting of replication manager works. + */ + @Test + public void testReplicationManagerRestart() throws InterruptedException { + Assertions.assertTrue(replicationManager.isRunning()); + replicationManager.stop(); + // Stop is a non-blocking call, it might take sometime for the + // ReplicationManager to shutdown + Thread.sleep(500); + Assertions.assertFalse(replicationManager.isRunning()); + replicationManager.start(); + Assertions.assertTrue(replicationManager.isRunning()); } - final int currentCloseCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.closeContainerCommand); + @Test + public void testGeneratedConfig() { + ReplicationManagerConfiguration rmc = OzoneConfiguration.newInstanceOf( + ReplicationManagerConfiguration.class); - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentCloseCommandCount + 3, datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.closeContainerCommand)); + //default is not included in ozone-site.xml but generated from annotation + //to the ozone-site-generated.xml which should be loaded by the + // OzoneConfiguration. + Assertions.assertEquals(1800000, rmc.getEventTimeout()); - // Update the OPEN to CLOSING - for (ContainerReplica replica : getReplicas(id, State.CLOSING, datanode)) { - containerStateManager.updateContainerReplica(id, replica); } - - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentCloseCommandCount + 6, datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.closeContainerCommand)); - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSING)); - } - - - /** - * The container is QUASI_CLOSED but two of the replica is still in - * open state. ReplicationManager should resend close command to those - * datanodes. - */ - @Test - public void testQuasiClosedContainerWithTwoOpenReplica() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.OPEN, 1000L, originNodeId, randomDatanodeDetails()); - final DatanodeDetails datanodeDetails = randomDatanodeDetails(); - final ContainerReplica replicaThree = getReplicas( - id, State.OPEN, 1000L, datanodeDetails.getUuid(), datanodeDetails); - - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - - final int currentCloseCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.closeContainerCommand); - // Two of the replicas are in OPEN state - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentCloseCommandCount + 2, datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.closeContainerCommand)); - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.closeContainerCommand, - replicaTwo.getDatanodeDetails())); - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.closeContainerCommand, - replicaThree.getDatanodeDetails())); - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); } /** - * When the container is in QUASI_CLOSED state and all the replicas are - * also in QUASI_CLOSED state and doesn't have a quorum to force close - * the container, ReplicationManager will not do anything. + * Tests replication manager with healthy open and closed containers. No + * quasi closed or unhealthy containers are involved. */ - @Test - public void testHealthyQuasiClosedContainer() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaThree = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - - // All the QUASI_CLOSED replicas have same originNodeId, so the - // container will not be closed. ReplicationManager should take no action. - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(0, datanodeCommandHandler.getInvocation()); - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - } - - /** - * When a container is QUASI_CLOSED and we don't have quorum to force close - * the container, the container should have all the replicas in QUASI_CLOSED - * state, else ReplicationManager will take action. - * - * In this test case we make one of the replica unhealthy, replication manager - * will send delete container command to the datanode which has the unhealthy - * replica. - */ - @Test - public void testQuasiClosedContainerWithUnhealthyReplica() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); - container.setUsedBytes(100); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaThree = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - - final int currentDeleteCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); - final int currentReplicateCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); - - // All the QUASI_CLOSED replicas have same originNodeId, so the - // container will not be closed. ReplicationManager should take no action. - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(0, datanodeCommandHandler.getInvocation()); - - // Make the first replica unhealthy - final ContainerReplica unhealthyReplica = getReplicas( - id, State.UNHEALTHY, 1000L, originNodeId, - replicaOne.getDatanodeDetails()); - containerStateManager.updateContainerReplica( - id, unhealthyReplica); - - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentDeleteCommandCount + 1, - datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.deleteContainerCommand, - replicaOne.getDatanodeDetails())); - Assertions.assertEquals(currentDeleteCommandCount + 1, - replicationManager.getMetrics().getNumDeletionCmdsSent()); - - // Now we will delete the unhealthy replica from in-memory. - containerStateManager.removeContainerReplica(id, replicaOne); - - final long currentBytesToReplicate = replicationManager.getMetrics() - .getNumReplicationBytesTotal(); - - // The container is under replicated as unhealthy replica is removed - replicationManager.processAll(); - eventQueue.processAll(1000); - - // We should get replicate command - Assertions.assertEquals(currentReplicateCommandCount + 1, - datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.replicateContainerCommand)); - Assertions.assertEquals(currentReplicateCommandCount + 1, - replicationManager.getMetrics().getNumReplicationCmdsSent()); - Assertions.assertEquals(currentBytesToReplicate + 100L, - replicationManager.getMetrics().getNumReplicationBytesTotal()); - Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightReplication()); - - // We should have one under replicated and one quasi_closed_stuck - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); - - // Now we add the missing replica back - DatanodeDetails targetDn = replicationManager.getLegacyReplicationManager() - .getFirstDatanode(InflightType.REPLICATION, id); - final ContainerReplica replicatedReplicaOne = getReplicas( - id, State.CLOSED, 1000L, originNodeId, targetDn); - containerStateManager.updateContainerReplica( - id, replicatedReplicaOne); - - final long currentReplicationCommandCompleted = replicationManager - .getMetrics().getNumReplicationCmdsCompleted(); - final long currentBytesCompleted = replicationManager.getMetrics() - .getNumReplicationBytesCompleted(); - - replicationManager.processAll(); - eventQueue.processAll(1000); - - Assertions.assertEquals(0, getInflightCount(InflightType.REPLICATION)); - Assertions.assertEquals(0, replicationManager.getMetrics() - .getInflightReplication()); - Assertions.assertEquals(currentReplicationCommandCompleted + 1, - replicationManager.getMetrics().getNumReplicationCmdsCompleted()); - Assertions.assertEquals(currentBytesCompleted + 100L, - replicationManager.getMetrics().getNumReplicationBytesCompleted()); - - report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(0, report.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); - } - - /** - * When a QUASI_CLOSED container is over replicated, ReplicationManager - * deletes the excess replicas. - */ - @Test - public void testOverReplicatedQuasiClosedContainer() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); - container.setUsedBytes(101); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaThree = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaFour = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - containerStateManager.updateContainerReplica(id, replicaFour); - - final int currentDeleteCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); - - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentDeleteCommandCount + 1, - datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); - Assertions.assertEquals(currentDeleteCommandCount + 1, - replicationManager.getMetrics().getNumDeletionCmdsSent()); - Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightDeletion()); - - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.OVER_REPLICATED)); - - // Now we remove the replica according to inflight - DatanodeDetails targetDn = replicationManager.getLegacyReplicationManager() - .getFirstDatanode(InflightType.DELETION, id); - if (targetDn.equals(replicaOne.getDatanodeDetails())) { - containerStateManager.removeContainerReplica( - id, replicaOne); - } else if (targetDn.equals(replicaTwo.getDatanodeDetails())) { - containerStateManager.removeContainerReplica( - id, replicaTwo); - } else if (targetDn.equals(replicaThree.getDatanodeDetails())) { - containerStateManager.removeContainerReplica( - id, replicaThree); - } else if (targetDn.equals(replicaFour.getDatanodeDetails())) { - containerStateManager.removeContainerReplica( - id, replicaFour); + @Nested + class StableReplicas { + /** + * Open containers are not handled by ReplicationManager. + * This test-case makes sure that ReplicationManages doesn't take + * any action on OPEN containers. + */ + @Test + public void testOpenContainer() throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.OPEN); + containerStateManager.addContainer(container.getProtobuf()); + replicationManager.processAll(); + eventQueue.processAll(1000); + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.OPEN)); + Assertions.assertEquals(0, datanodeCommandHandler.getInvocation()); } - final long currentDeleteCommandCompleted = replicationManager.getMetrics() - .getNumDeletionCmdsCompleted(); - final long deleteBytesCompleted = - replicationManager.getMetrics().getNumDeletionBytesCompleted(); - - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(0, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(0, replicationManager.getMetrics() - .getInflightDeletion()); - Assertions.assertEquals(currentDeleteCommandCompleted + 1, - replicationManager.getMetrics().getNumDeletionCmdsCompleted()); - Assertions.assertEquals(deleteBytesCompleted + 101, - replicationManager.getMetrics().getNumDeletionBytesCompleted()); - - report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(0, report.getStat( - ReplicationManagerReport.HealthState.OVER_REPLICATED)); - } - - /** - * When a QUASI_CLOSED container is over replicated, ReplicationManager - * deletes the excess replicas. While choosing the replica for deletion - * ReplicationManager should prioritize unhealthy replica over QUASI_CLOSED - * replica. - */ - @Test - public void testOverReplicatedQuasiClosedContainerWithUnhealthyReplica() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.UNHEALTHY, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaThree = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaFour = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - containerStateManager.updateContainerReplica(id, replicaFour); - - final int currentDeleteCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); - - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentDeleteCommandCount + 1, - datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.deleteContainerCommand, - replicaOne.getDatanodeDetails())); - Assertions.assertEquals(currentDeleteCommandCount + 1, - replicationManager.getMetrics().getNumDeletionCmdsSent()); - Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightDeletion()); - - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.OVER_REPLICATED)); - - final long currentDeleteCommandCompleted = replicationManager.getMetrics() - .getNumDeletionCmdsCompleted(); - // Now we remove the replica to simulate deletion complete - containerStateManager.removeContainerReplica(id, replicaOne); - - replicationManager.processAll(); - eventQueue.processAll(1000); - - Assertions.assertEquals(currentDeleteCommandCompleted + 1, - replicationManager.getMetrics().getNumDeletionCmdsCompleted()); - Assertions.assertEquals(0, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(0, replicationManager.getMetrics() - .getInflightDeletion()); - - report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(0, report.getStat( - ReplicationManagerReport.HealthState.OVER_REPLICATED)); - } - - /** - * ReplicationManager should replicate an QUASI_CLOSED replica if it is - * under replicated. - */ - @Test - public void testUnderReplicatedQuasiClosedContainer() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); - container.setUsedBytes(100); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - - final int currentReplicateCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); - final long currentBytesToReplicate = replicationManager.getMetrics() - .getNumReplicationBytesTotal(); - - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentReplicateCommandCount + 1, - datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.replicateContainerCommand)); - Assertions.assertEquals(currentReplicateCommandCount + 1, - replicationManager.getMetrics().getNumReplicationCmdsSent()); - Assertions.assertEquals(currentBytesToReplicate + 100, - replicationManager.getMetrics().getNumReplicationBytesTotal()); - Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightReplication()); + /** + * 1 open replica + * 2 closing replicas + * Expectation: Close command is sent to the open replicas. + */ + @Test + public void testClosingContainer() throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.CLOSING); + final ContainerID id = container.containerID(); - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + containerStateManager.addContainer(container.getProtobuf()); - final long currentReplicateCommandCompleted = replicationManager - .getMetrics().getNumReplicationCmdsCompleted(); - final long currentReplicateBytesCompleted = replicationManager - .getMetrics().getNumReplicationBytesCompleted(); + // Two replicas in CLOSING state + final Set replicas = getReplicas(id, State.CLOSING, + randomDatanodeDetails(), + randomDatanodeDetails()); - // Now we add the replicated new replica - DatanodeDetails targetDn = replicationManager.getLegacyReplicationManager() - .getFirstDatanode(InflightType.REPLICATION, id); - final ContainerReplica replicatedReplicaThree = getReplicas( - id, State.CLOSED, 1000L, originNodeId, targetDn); - containerStateManager.updateContainerReplica( - id, replicatedReplicaThree); + // One replica in OPEN state + final DatanodeDetails datanode = randomDatanodeDetails(); + replicas.addAll(getReplicas(id, State.OPEN, datanode)); - replicationManager.processAll(); - eventQueue.processAll(1000); + for (ContainerReplica replica : replicas) { + containerStateManager.updateContainerReplica(id, replica); + } - Assertions.assertEquals(currentReplicateCommandCompleted + 1, - replicationManager.getMetrics().getNumReplicationCmdsCompleted()); - Assertions.assertEquals(currentReplicateBytesCompleted + 100, - replicationManager.getMetrics().getNumReplicationBytesCompleted()); - Assertions.assertEquals(0, getInflightCount(InflightType.REPLICATION)); - Assertions.assertEquals(0, replicationManager.getMetrics() - .getInflightReplication()); - - report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(0, report.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); - } + final int currentCloseCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.closeContainerCommand); - /** - * When a QUASI_CLOSED container is under replicated, ReplicationManager - * should re-replicate it. If there are any unhealthy replica, it has to - * be deleted. - * - * In this test case, the container is QUASI_CLOSED and is under replicated - * and also has an unhealthy replica. - * - * In the first iteration of ReplicationManager, it should re-replicate - * the container so that it has enough replicas. - * - * In the second iteration, ReplicationManager should delete the unhealthy - * replica. - * - * In the third iteration, ReplicationManager will re-replicate as the - * container has again become under replicated after the unhealthy - * replica has been deleted. - * - */ - @Test - public void testUnderReplicatedQuasiClosedContainerWithUnhealthyReplica() - throws IOException, InterruptedException, - TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); - container.setUsedBytes(99); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.UNHEALTHY, 1000L, originNodeId, randomDatanodeDetails()); + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentCloseCommandCount + 3, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.closeContainerCommand)); - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); + // Update the OPEN to CLOSING + for (ContainerReplica replica: getReplicas(id, State.CLOSING, datanode)) { + containerStateManager.updateContainerReplica(id, replica); + } - final int currentReplicateCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); - final int currentDeleteCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); - final long currentBytesToDelete = replicationManager.getMetrics() - .getNumDeletionBytesTotal(); + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentCloseCommandCount + 6, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.closeContainerCommand)); + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSING)); + } - replicationManager.processAll(); - GenericTestUtils.waitFor( - () -> (currentReplicateCommandCount + 1) == datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand), - 50, 5000); + @Test + public void testReplicateCommandTimeout() + throws IOException, TimeoutException { + long timeout = new ReplicationManagerConfiguration().getEventTimeout(); + + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + assertReplicaScheduled(1); + + // Already a pending replica, so nothing scheduled + assertReplicaScheduled(0); + + // Advance the clock past the timeout, and there should be a replica + // scheduled + clock.fastForward(timeout + 1000); + assertReplicaScheduled(1); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getNumReplicationCmdsTimeout()); + } - Optional replicateCommand = datanodeCommandHandler - .getReceivedCommands().stream() - .filter(c -> c.getCommand().getType() - .equals(SCMCommandProto.Type.replicateContainerCommand)) - .findFirst(); + @Test + public void testDeleteCommandTimeout() + throws IOException, TimeoutException { + long timeout = new ReplicationManagerConfiguration().getEventTimeout(); + + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + assertDeleteScheduled(1); + + // Already a pending replica, so nothing scheduled + assertReplicaScheduled(0); + + // Advance the clock past the timeout, and there should be a replica + // scheduled + clock.fastForward(timeout + 1000); + assertDeleteScheduled(1); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getNumDeletionCmdsTimeout()); + } - Assertions.assertTrue(replicateCommand.isPresent()); + /** + * A closed empty container with all the replicas also closed and empty + * should be deleted. + * A container/ replica should be deemed empty when it has 0 keyCount even + * if the usedBytes is not 0 (usedBytes should not be used to determine if + * the container or replica is empty). + */ + @Test + public void testDeleteEmptyContainer() throws Exception { + runTestDeleteEmptyContainer(3); + } - DatanodeDetails newNode = createDatanodeDetails( - replicateCommand.get().getDatanodeId()); - ContainerReplica newReplica = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, newNode); - containerStateManager.updateContainerReplica(id, newReplica); + Void runTestDeleteEmptyContainer(int expectedDelete) throws Exception { + // Create container with usedBytes = 1000 and keyCount = 0 + final ContainerInfo container = createContainer( + LifeCycleState.CLOSED, 1000, 0); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + // Create a replica with usedBytes != 0 and keyCount = 0 + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED, 100, 0); + + assertDeleteScheduled(expectedDelete); + return null; + } - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); - Assertions.assertEquals(0, report.getStat( - ReplicationManagerReport.HealthState.UNHEALTHY)); + @Test + public void testDeletionLimit() throws Exception { + runTestLimit(0, 2, 0, 1, + () -> runTestDeleteEmptyContainer(2)); + } - /* - * We have report the replica to SCM, in the next ReplicationManager - * iteration it should delete the unhealthy replica. + /** + * A closed empty container with a non-empty replica should not be deleted. */ + @Test + public void testDeleteEmptyContainerNonEmptyReplica() throws Exception { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED, 0, + 0); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + // Create the 3rd replica with non-zero key count and used bytes + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED, 100, 1); + assertDeleteScheduled(0); + } - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentDeleteCommandCount + 1, - datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); - // ReplicaTwo should be deleted, that is the unhealthy one - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.deleteContainerCommand, - replicaTwo.getDatanodeDetails())); - Assertions.assertEquals(currentDeleteCommandCount + 1, - replicationManager.getMetrics().getNumDeletionCmdsSent()); - Assertions.assertEquals(currentBytesToDelete + 99, - replicationManager.getMetrics().getNumDeletionBytesTotal()); - Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightDeletion()); - - containerStateManager.removeContainerReplica(id, replicaTwo); - - final long currentDeleteCommandCompleted = replicationManager.getMetrics() - .getNumDeletionCmdsCompleted(); - - report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(0, report.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.UNHEALTHY)); - /* - * We have now removed unhealthy replica, next iteration of - * ReplicationManager should re-replicate the container as it - * is under replicated now + /** + * ReplicationManager should replicate zero replica when all copies + * are missing. */ - - replicationManager.processAll(); - eventQueue.processAll(1000); - - Assertions.assertEquals(0, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(0, replicationManager.getMetrics() - .getInflightDeletion()); - Assertions.assertEquals(currentDeleteCommandCompleted + 1, - replicationManager.getMetrics().getNumDeletionCmdsCompleted()); - - Assertions.assertEquals(currentReplicateCommandCount + 2, - datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.replicateContainerCommand)); - Assertions.assertEquals(currentReplicateCommandCount + 2, - replicationManager.getMetrics().getNumReplicationCmdsSent()); - Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightReplication()); - - report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.UNDER_REPLICATED)); - Assertions.assertEquals(0, report.getStat( - ReplicationManagerReport.HealthState.UNHEALTHY)); - } - - - /** - * When a container is QUASI_CLOSED and it has >50% of its replica - * in QUASI_CLOSED state with unique origin node id, - * ReplicationManager should force close the replica(s) with - * highest BCSID. - */ - @Test - public void testQuasiClosedToClosed() throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); - final ContainerID id = container.containerID(); - final Set replicas = getReplicas(id, State.QUASI_CLOSED, - randomDatanodeDetails(), - randomDatanodeDetails(), - randomDatanodeDetails()); - containerStateManager.addContainer(container.getProtobuf()); - for (ContainerReplica replica : replicas) { - containerStateManager.updateContainerReplica(id, replica); + @Test + public void testContainerWithMissingReplicas() + throws IOException, TimeoutException { + createContainer(LifeCycleState.CLOSED); + assertReplicaScheduled(0); + assertUnderReplicatedCount(1); + assertMissingCount(1); } - final int currentCloseCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.closeContainerCommand); - - replicationManager.processAll(); - eventQueue.processAll(1000); + /** + * 3 healthy closed replicas. + * Expectation: No action. + * + * ReplicationManager should not take any action if the container is + * CLOSED and healthy. + */ + @Test + public void testHealthyClosedContainer() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.CLOSED); + final ContainerID id = container.containerID(); + final Set replicas = getReplicas(id, State.CLOSED, + randomDatanodeDetails(), + randomDatanodeDetails(), + randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + for (ContainerReplica replica : replicas) { + containerStateManager.updateContainerReplica(id, replica); + } - // All the replicas have same BCSID, so all of them will be closed. - Assertions.assertEquals(currentCloseCommandCount + 3, datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.closeContainerCommand)); + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(0, datanodeCommandHandler.getInvocation()); - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); - Assertions.assertEquals(0, report.getStat( - ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSED)); + for (ReplicationManagerReport.HealthState s : + ReplicationManagerReport.HealthState.values()) { + Assertions.assertEquals(0, report.getStat(s)); + } + } } - /** - * ReplicationManager should not take any action if the container is - * CLOSED and healthy. + * Tests replication manager with unhealthy and quasi-closed container + * replicas. */ - @Test - public void testHealthyClosedContainer() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.CLOSED); - final ContainerID id = container.containerID(); - final Set replicas = getReplicas(id, State.CLOSED, - randomDatanodeDetails(), - randomDatanodeDetails(), - randomDatanodeDetails()); + @Nested + class UnstableReplicas { + /** + * 2 open replicas + * 1 quasi-closed replica + * Expectation: close command is sent to the open replicas. + */ + @Test + public void testQuasiClosedContainerWithTwoOpenReplica() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, State.OPEN, 1000L, originNodeId, randomDatanodeDetails()); + final DatanodeDetails datanodeDetails = randomDatanodeDetails(); + final ContainerReplica replicaThree = getReplicas( + id, State.OPEN, 1000L, datanodeDetails.getUuid(), datanodeDetails); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica( + id, replicaThree); - containerStateManager.addContainer(container.getProtobuf()); - for (ContainerReplica replica : replicas) { - containerStateManager.updateContainerReplica(id, replica); + // First iteration + + final int currentCloseCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.closeContainerCommand); + // Two of the replicas are in OPEN state + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentCloseCommandCount + 2, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.closeContainerCommand)); + Assertions.assertTrue(datanodeCommandHandler.received( + SCMCommandProto.Type.closeContainerCommand, + replicaTwo.getDatanodeDetails())); + Assertions.assertTrue(datanodeCommandHandler.received( + SCMCommandProto.Type.closeContainerCommand, + replicaThree.getDatanodeDetails())); + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); } - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(0, datanodeCommandHandler.getInvocation()); + /** + * 3 quasi closed replicas with the same origin node ID. + * Expectation: No action taken. + * + * When the container is in QUASI_CLOSED state and all the replicas are + * also in QUASI_CLOSED state and doesn't have a quorum to force close + * the container, ReplicationManager will not do anything. + */ + @Test + public void testHealthyQuasiClosedContainer() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica( + id, replicaThree); - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSED)); - for (ReplicationManagerReport.HealthState s : - ReplicationManagerReport.HealthState.values()) { - Assertions.assertEquals(0, report.getStat(s)); + // All the QUASI_CLOSED replicas have same originNodeId, so the + // container will not be closed. ReplicationManager should take no action. + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(0, datanodeCommandHandler.getInvocation()); + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); } - } - /** - * ReplicationManager should close the unhealthy OPEN container. - */ - @Test - public void testUnhealthyOpenContainer() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.OPEN); - final ContainerID id = container.containerID(); - final Set replicas = getReplicas(id, State.OPEN, - randomDatanodeDetails(), - randomDatanodeDetails()); - replicas.addAll(getReplicas(id, State.UNHEALTHY, randomDatanodeDetails())); + /** + * 2 quasi-closed replicas. + * 1 unhealthy replica. + * All replicas have same origin node ID. + * Expectation: + * Round 1: Quasi closed replica is replicated. + * Round 2: Unhealthy replica is deleted. + * + * When a container is QUASI_CLOSED and we don't have quorum to force close + * the container, the container should have all the replicas in QUASI_CLOSED + * state, else ReplicationManager will take action. + * + * In this test case we make one of the replica unhealthy, replication + * manager will send delete container command to the datanode which has the + * unhealthy replica. + */ + @Test + public void testQuasiClosedContainerWithUnhealthyReplica() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + container.setUsedBytes(100); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica(id, replicaThree); + + int currentReplicateCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); + + // All the QUASI_CLOSED replicas have same originNodeId, so the + // container will not be closed. ReplicationManager should take no action. + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(0, datanodeCommandHandler.getInvocation()); + + // Make the first replica unhealthy + final ContainerReplica unhealthyReplica = getReplicas( + id, UNHEALTHY, 1000L, originNodeId, + replicaOne.getDatanodeDetails()); + containerStateManager.updateContainerReplica( + id, unhealthyReplica); + + long currentBytesToReplicate = replicationManager.getMetrics() + .getNumReplicationBytesTotal(); + replicationManager.processAll(); + eventQueue.processAll(1000); + // Under replication handler should first re-replicate one of the quasi + // closed containers. + // The unhealthy container should not have been deleted in the first pass. + assertDeleteScheduled(0); + currentReplicateCommandCount += 1; + currentBytesToReplicate += 100L; + Assertions.assertEquals(currentReplicateCommandCount, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + Assertions.assertEquals(currentReplicateCommandCount, + replicationManager.getMetrics().getNumReplicationCmdsSent()); + Assertions.assertEquals(currentBytesToReplicate, + replicationManager.getMetrics().getNumReplicationBytesTotal()); + Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightReplication()); + + // The quasi closed container cannot be closed, but it should have been + // restored to full replication on the previous run. + // The unhealthy replica should remain until the next iteration. + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + Assertions.assertEquals(0, report.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.UNHEALTHY)); + + // Create the replica so replication manager sees it on the next run. + List replicateCommands = datanodeCommandHandler + .getReceivedCommands().stream() + .filter(c -> c.getCommand().getType() + .equals(SCMCommandProto.Type.replicateContainerCommand)) + .collect(Collectors.toList()); + for (CommandForDatanode replicateCommand: replicateCommands) { + DatanodeDetails newNode = createDatanodeDetails( + replicateCommand.getDatanodeId()); + ContainerReplica newReplica = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, newNode); + containerStateManager.updateContainerReplica(id, newReplica); + } - containerStateManager.addContainer(container.getProtobuf()); - for (ContainerReplica replica : replicas) { - containerStateManager.updateContainerReplica(id, replica); + // On the next run, the unhealthy container should be scheduled for + // deletion, since the quasi closed container is now sufficiently + // replicated. + // This method runs an iteration of replication manager. + assertDeleteScheduled(1); + assertExactDeleteTargets(unhealthyReplica.getDatanodeDetails()); + // Replication should have finished on the previous iteration, leaving + // these numbers unchanged. + Assertions.assertEquals(currentReplicateCommandCount, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + Assertions.assertEquals(currentReplicateCommandCount, + replicationManager.getMetrics().getNumReplicationCmdsSent()); + Assertions.assertEquals(currentBytesToReplicate, + replicationManager.getMetrics().getNumReplicationBytesTotal()); + Assertions.assertEquals(0, getInflightCount(InflightType.REPLICATION)); + Assertions.assertEquals(0, replicationManager.getMetrics() + .getInflightReplication()); + + // Now we will delete the unhealthy replica. + containerStateManager.removeContainerReplica(id, unhealthyReplica); + + // There should be no work left on the following runs. + replicationManager.processAll(); + eventQueue.processAll(1000); + // The two commands shown are the previous delete and replicate commands. + Assertions.assertEquals(2, datanodeCommandHandler.getInvocation()); } - final CloseContainerEventHandler closeContainerHandler = - Mockito.mock(CloseContainerEventHandler.class); - eventQueue.addHandler(SCMEvents.CLOSE_CONTAINER, closeContainerHandler); - - replicationManager.processAll(); - eventQueue.processAll(1000); - Mockito.verify(closeContainerHandler, Mockito.times(1)) - .onMessage(id, eventQueue); - - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.OPEN)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.OPEN_UNHEALTHY)); - } - /** - * ReplicationManager should skip send close command to unhealthy replica. - */ - @Test - public void testCloseUnhealthyReplica() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.CLOSING); - final ContainerID id = container.containerID(); - final Set replicas = getReplicas(id, State.UNHEALTHY, - randomDatanodeDetails()); - replicas.addAll(getReplicas(id, State.OPEN, randomDatanodeDetails())); - replicas.addAll(getReplicas(id, State.OPEN, randomDatanodeDetails())); - - containerStateManager.addContainer(container.getProtobuf()); - for (ContainerReplica replica : replicas) { - containerStateManager.updateContainerReplica(id, replica); + /** + * Container is quasi closed. + * 3 quasi-closed replicas with the same origin node ID. + * 1 unhealthy replica with unique origin node ID. + * + * Expectation: + * No action taken. 3 healthy replicas are present. The unhealthy replica + * should not be deleted since it has a unique origin node ID. The + * container cannot be closed because there are not enough healthy unique + * origin node IDs. + */ + @Test + public void testQuasiClosedContainerWithUniqueUnhealthyReplica() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + container.setUsedBytes(100); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replica4 = getReplicas( + id, UNHEALTHY, 1000L, randomDatanodeDetails().getUuid(), + randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica(id, replicaThree); + containerStateManager.updateContainerReplica(id, replica4); + + replicationManager.processAll(); + eventQueue.processAll(1000); + + Assertions.assertEquals(0, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + + Assertions.assertEquals(0, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); } - replicationManager.processAll(); - // Wait for EventQueue to call the event handler - eventQueue.processAll(1000); - Assertions.assertEquals(2, datanodeCommandHandler.getInvocation()); - } - - @Test - public void testGeneratedConfig() { - ReplicationManagerConfiguration rmc = - OzoneConfiguration.newInstanceOf(ReplicationManagerConfiguration.class); - - //default is not included in ozone-site.xml but generated from annotation - //to the ozone-site-generated.xml which should be loaded by the - // OzoneConfiguration. - Assertions.assertEquals(1800000, rmc.getEventTimeout()); + /** + * Container is closed. + * 2 quasi-closed replicas. + * 1 unhealthy replica. + * All replicas have unique origin node IDs. + * Quasi closed replicas BCS IDs match closed container's BCS ID. + * + * Expectation: + * Iteration 1: Quasi closed replicas are closed since their BCS IDs + * match the closed container state. + * Iteration 2: The now closed replicas are replicated. + * Iteration 3: The unhealthy replica is deleted. + */ + @Test + public void testCloseableContainerWithUniqueUnhealthyReplica() + throws Exception { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + ContainerReplica quasi1 = addReplicaToDn(container, + randomDatanodeDetails(), QUASI_CLOSED, container.getSequenceId()); + ContainerReplica quasi2 = addReplicaToDn(container, + randomDatanodeDetails(), QUASI_CLOSED, container.getSequenceId()); + ContainerReplica unhealthyReplica = addReplicaToDn(container, + randomDatanodeDetails(), + UNHEALTHY, + 900L); + + // First RM iteration. + // The quasi containers should be closed since their BCSIDs match the + // closed container's state. + assertDeleteScheduled(0); + // All the containers are unhealthy, so it will not be counted as under + // replicated. + assertUnderReplicatedCount(0); + Assertions.assertEquals(2, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.closeContainerCommand)); + + // Update RM with the results of the close commands. + ContainerReplica closedRep1 = getReplicas( + container.containerID(), CLOSED, + container.getSequenceId(), quasi1.getDatanodeDetails()) + .stream().findFirst().get(); + ContainerReplica closedRep2 = getReplicas( + container.containerID(), CLOSED, + container.getSequenceId(), quasi2.getDatanodeDetails()) + .stream().findFirst().get(); + + containerStateManager.updateContainerReplica(container.containerID(), + closedRep1); + containerStateManager.updateContainerReplica(container.containerID(), + closedRep2); + + // Second RM iteration + // Now that we have healthy replicas, they should be replicated. + assertDeleteScheduled(0); + assertUnderReplicatedCount(1); + Assertions.assertEquals(1, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + + + // Process the replicate command and report the replica back to SCM. + List replicateCommands = datanodeCommandHandler + .getReceivedCommands().stream() + .filter(c -> c.getCommand().getType() + .equals(SCMCommandProto.Type.replicateContainerCommand)) + .collect(Collectors.toList()); + + // Report the new replica to SCM. + for (CommandForDatanode replicateCommand: replicateCommands) { + DatanodeDetails newNode = createDatanodeDetails( + replicateCommand.getDatanodeId()); + ContainerReplica newReplica = getReplicas( + container.containerID(), CLOSED, + container.getSequenceId(), newNode.getUuid(), newNode); + containerStateManager.updateContainerReplica(container.containerID(), + newReplica); + } - } + // Third RM iteration + // The unhealthy replica can be deleted since we have 3 healthy copies + // of a closed container. + assertDeleteScheduled(1); + assertUnderReplicatedCount(0); + Assertions.assertEquals(1, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + assertExactDeleteTargets(unhealthyReplica.getDatanodeDetails()); + } - @Test - public void additionalReplicaScheduledWhenMisReplicated() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.CLOSED); - container.setUsedBytes(100); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaThree = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - - // Ensure a mis-replicated status is returned for any containers in this - // test where there are 3 replicas. When there are 2 or 4 replicas - // the status returned will be healthy. - Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( - Mockito.argThat(list -> list.size() == 3), - Mockito.anyInt() - )).thenAnswer(invocation -> { - return new ContainerPlacementStatusDefault(1, 2, 3); - }); + /** + * $numReplicas unhealthy replicas. + * Expectation: The remaining replicas are scheduled. + */ + @ParameterizedTest + @ValueSource(ints = {1, 2}) + public void testUnderReplicatedWithOnlyUnhealthyReplicas(int numReplicas) + throws Exception { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + for (int i = 0; i < numReplicas; i++) { + addReplica(container, NodeStatus.inServiceHealthy(), UNHEALTHY); + } + int numReplicasNeeded = HddsProtos.ReplicationFactor.THREE_VALUE - + numReplicas; + assertReplicaScheduled(numReplicasNeeded); + assertUnderReplicatedCount(1); + } - int currentReplicateCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); - final long currentBytesToReplicate = replicationManager.getMetrics() - .getNumReplicationBytesTotal(); + /** + * 1 unhealthy replica. + * 4 closed replicas. + * Expectation: + * Iteration 1: The unhealthy replica should be deleted. + * Iteration 2: One of the closed replicas should be deleted. + */ + @Test + public void testOverReplicatedClosedAndUnhealthy() throws Exception { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + ContainerReplica unhealthy = addReplica(container, + NodeStatus.inServiceHealthy(), UNHEALTHY); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + + // This method does one run of replication manager. + assertReplicaScheduled(0); + assertUnderReplicatedCount(0); + boolean unhealthyDeleted = false; + boolean closedDeleted = false; + UUID closedDeletedUUID = null; + + for (CommandForDatanode command : + datanodeCommandHandler.getReceivedCommands()) { + if (command.getCommand().getType() == + SCMCommandProto.Type.deleteContainerCommand) { + if (command.getDatanodeId() == + unhealthy.getDatanodeDetails().getUuid()) { + unhealthyDeleted = true; + } else { + closedDeleted = true; + closedDeletedUUID = command.getDatanodeId(); + } + } + } - replicationManager.processAll(); - eventQueue.processAll(1000); - // At this stage, due to the mocked calls to validateContainerPlacement - // the policy will not be satisfied, and replication will be triggered. + Assertions.assertFalse(unhealthyDeleted); + Assertions.assertTrue(closedDeleted); + + // Do a second run. + assertReplicaScheduled(0); + assertUnderReplicatedCount(0); + unhealthyDeleted = false; + closedDeleted = false; + for (CommandForDatanode command : + datanodeCommandHandler.getReceivedCommands()) { + if (command.getCommand().getType() == + SCMCommandProto.Type.deleteContainerCommand) { + if (command.getDatanodeId() == + unhealthy.getDatanodeDetails().getUuid()) { + unhealthyDeleted = true; + } else { + closedDeleted = true; + // The delete command should have been left over from the last run. + Assertions.assertEquals(closedDeletedUUID, command.getDatanodeId()); + } + } + } - Assertions.assertEquals(currentReplicateCommandCount + 1, - datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.replicateContainerCommand)); - Assertions.assertEquals(currentReplicateCommandCount + 1, - replicationManager.getMetrics().getNumReplicationCmdsSent()); - Assertions.assertEquals(currentBytesToReplicate + 100, - replicationManager.getMetrics().getNumReplicationBytesTotal()); - Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightReplication()); + Assertions.assertTrue(unhealthyDeleted); + Assertions.assertTrue(closedDeleted); + } - ReplicationManagerReport report = replicationManager.getContainerReport(); - Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSED)); - Assertions.assertEquals(1, report.getStat( - ReplicationManagerReport.HealthState.MIS_REPLICATED)); + /** + * 4 unhealthy replicas. + * Expectation: One unhealthy replica should be deleted. + */ + @Test + public void testOverReplicatedUnhealthy() throws Exception { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + Set unhealthyContainerDNIDs = new HashSet<>(); + + final int numReplicas = 4; + for (int i = 0; i < numReplicas; i++) { + ContainerReplica replica = addReplica(container, + NodeStatus.inServiceHealthy(), UNHEALTHY); + unhealthyContainerDNIDs.add(replica.getDatanodeDetails().getUuid()); + } - // Now make it so that all containers seem mis-replicated no matter how - // many replicas. This will test replicas are not scheduled if the new - // replica does not fix the mis-replication. - Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( - Mockito.anyList(), - Mockito.anyInt() - )).thenAnswer(invocation -> { - return new ContainerPlacementStatusDefault(1, 2, 3); - }); + // No replications should be scheduled. + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(0, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + assertUnderReplicatedCount(0); + + // One replica should be deleted. + Assertions.assertEquals(1, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + Assertions.assertTrue( + datanodeCommandHandler.getReceivedCommands().stream() + .anyMatch(c -> c.getCommand().getType() == + SCMCommandProto.Type.deleteContainerCommand && + unhealthyContainerDNIDs.contains(c.getDatanodeId()))); + } - currentReplicateCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); + /** + * 4 quasi-closed replicas. + * All have same origin node ID. + * Expectation: One of the replicas is deleted. + */ + @Test + public void testOverReplicatedQuasiClosedContainer() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + container.setUsedBytes(101); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaFour = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica( + id, replicaThree); + containerStateManager.updateContainerReplica(id, replicaFour); + + final int currentDeleteCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentDeleteCommandCount + 1, + datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); + Assertions.assertEquals(currentDeleteCommandCount + 1, + replicationManager.getMetrics().getNumDeletionCmdsSent()); + Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightDeletion()); + + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.OVER_REPLICATED)); + + // Now we remove the replica according to inflight + DatanodeDetails targetDn = + replicationManager.getLegacyReplicationManager() + .getFirstDatanode(InflightType.DELETION, id); + if (targetDn.equals(replicaOne.getDatanodeDetails())) { + containerStateManager.removeContainerReplica( + id, replicaOne); + } else if (targetDn.equals(replicaTwo.getDatanodeDetails())) { + containerStateManager.removeContainerReplica( + id, replicaTwo); + } else if (targetDn.equals(replicaThree.getDatanodeDetails())) { + containerStateManager.removeContainerReplica( + id, replicaThree); + } else if (targetDn.equals(replicaFour.getDatanodeDetails())) { + containerStateManager.removeContainerReplica( + id, replicaFour); + } - replicationManager.processAll(); - eventQueue.processAll(1000); - // At this stage, due to the mocked calls to validateContainerPlacement - // the mis-replicated racks will not have improved, so expect to see nothing - // scheduled. - Assertions.assertEquals(currentReplicateCommandCount, datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand)); - Assertions.assertEquals(currentReplicateCommandCount, - replicationManager.getMetrics().getNumReplicationCmdsSent()); - Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightReplication()); - } + final long currentDeleteCommandCompleted = replicationManager.getMetrics() + .getNumDeletionCmdsCompleted(); + final long deleteBytesCompleted = + replicationManager.getMetrics().getNumDeletionBytesCompleted(); + + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(0, getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(0, replicationManager.getMetrics() + .getInflightDeletion()); + Assertions.assertEquals(currentDeleteCommandCompleted + 1, + replicationManager.getMetrics().getNumDeletionCmdsCompleted()); + Assertions.assertEquals(deleteBytesCompleted + 101, + replicationManager.getMetrics().getNumDeletionBytesCompleted()); + + report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + Assertions.assertEquals(0, report.getStat( + ReplicationManagerReport.HealthState.OVER_REPLICATED)); + } - @Test - public void overReplicatedButRemovingMakesMisReplicated() - throws IOException, TimeoutException { - // In this test, the excess replica should not be removed. - final ContainerInfo container = getContainer(LifeCycleState.CLOSED); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaThree = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaFour = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaFive = getReplicas( - id, State.UNHEALTHY, 1000L, originNodeId, randomDatanodeDetails()); + /** + * 2 open replicas. + * 1 unhealthy replica. + * Expectation: Container is closed. + * + * ReplicationManager should close the unhealthy OPEN container. + */ + @Test + public void testUnhealthyOpenContainer() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.OPEN); + final ContainerID id = container.containerID(); + final Set replicas = getReplicas(id, State.OPEN, + randomDatanodeDetails(), + randomDatanodeDetails()); + replicas.addAll(getReplicas(id, UNHEALTHY, randomDatanodeDetails())); + + containerStateManager.addContainer(container.getProtobuf()); + for (ContainerReplica replica : replicas) { + containerStateManager.updateContainerReplica(id, replica); + } - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - containerStateManager.updateContainerReplica(id, replicaFour); - containerStateManager.updateContainerReplica(id, replicaFive); - - // Ensure a mis-replicated status is returned for any containers in this - // test where there are exactly 3 replicas checked. - Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( - Mockito.argThat(list -> list.size() == 3), - Mockito.anyInt() - )).thenAnswer( - invocation -> new ContainerPlacementStatusDefault(1, 2, 3)); + final CloseContainerEventHandler closeContainerHandler = + Mockito.mock(CloseContainerEventHandler.class); + eventQueue.addHandler(SCMEvents.CLOSE_CONTAINER, closeContainerHandler); - int currentDeleteCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + replicationManager.processAll(); + eventQueue.processAll(1000); + Mockito.verify(closeContainerHandler, Mockito.times(1)) + .onMessage(id, eventQueue); - replicationManager.processAll(); - eventQueue.processAll(1000); - // The unhealthy replica should be removed, but not the other replica - // as each time we test with 3 replicas, Mockito ensures it returns - // mis-replicated - Assertions.assertEquals(currentDeleteCommandCount + 1, - datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); - Assertions.assertEquals(currentDeleteCommandCount + 1, - replicationManager.getMetrics().getNumDeletionCmdsSent()); + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.OPEN)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.OPEN_UNHEALTHY)); + } - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.deleteContainerCommand, - replicaFive.getDatanodeDetails())); - Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightDeletion()); - assertOverReplicatedCount(1); - } + /** + * 1 unhealthy replica. + * 2 open replicas. + * Expectation: Close command should be sent to open replicas only. + * + * ReplicationManager should skip send close command to unhealthy replica. + */ + @Test + public void testCloseUnhealthyReplica() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.CLOSING); + final ContainerID id = container.containerID(); + final Set replicas = getReplicas(id, UNHEALTHY, + randomDatanodeDetails()); + replicas.addAll(getReplicas(id, State.OPEN, randomDatanodeDetails())); + replicas.addAll(getReplicas(id, State.OPEN, randomDatanodeDetails())); + + containerStateManager.addContainer(container.getProtobuf()); + for (ContainerReplica replica : replicas) { + containerStateManager.updateContainerReplica(id, replica); + } - @Test - public void testOverReplicatedAndPolicySatisfied() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.CLOSED); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaThree = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaFour = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + replicationManager.processAll(); + // Wait for EventQueue to call the event handler + eventQueue.processAll(1000); + Assertions.assertEquals(2, + datanodeCommandHandler.getInvocation()); + } - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - containerStateManager.updateContainerReplica(id, replicaFour); + /** + * 1 unhealthy replica. + * 3 quasi closed replicas. + * All have same origin node ID. + * Expectation: Unhealthy replica is deleted. + * + * When a QUASI_CLOSED container is over replicated, ReplicationManager + * deletes the excess replicas. While choosing the replica for deletion + * ReplicationManager should prioritize deleting the unhealthy replica over + * QUASI_CLOSED replica if the unhealthy replica does not have a unique + * origin node ID. + */ + @Test + public void testQuasiClosedContainerWithExtraUnhealthyReplica() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica unhealthyReplica = getReplicas( + id, UNHEALTHY, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaFour = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, unhealthyReplica); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica(id, replicaThree); + containerStateManager.updateContainerReplica(id, replicaFour); + + assertDeleteScheduled(1); + Assertions.assertTrue( + datanodeCommandHandler.getReceivedCommands().stream() + .anyMatch(c -> c.getCommand().getType() == + SCMCommandProto.Type.deleteContainerCommand && + c.getDatanodeId().equals( + unhealthyReplica.getDatanodeDetails().getUuid()))); + + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + // Container should have been considered over replicated including the + // unhealthy replica. + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.OVER_REPLICATED)); + + final long currentDeleteCommandCompleted = replicationManager.getMetrics() + .getNumDeletionCmdsCompleted(); + // Now we remove the replica to simulate deletion complete + containerStateManager.removeContainerReplica(id, unhealthyReplica); + + // On the next run, the over replicated status should be reconciled and + // the delete completed. + replicationManager.processAll(); + eventQueue.processAll(1000); + + Assertions.assertEquals(currentDeleteCommandCompleted + 1, + replicationManager.getMetrics().getNumDeletionCmdsCompleted()); + Assertions.assertEquals(0, getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(0, replicationManager.getMetrics() + .getInflightDeletion()); + + report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + Assertions.assertEquals(0, report.getStat( + ReplicationManagerReport.HealthState.OVER_REPLICATED)); + } - Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( - Mockito.argThat(list -> list.size() == 3), - Mockito.anyInt() - )).thenAnswer( - invocation -> new ContainerPlacementStatusDefault(2, 2, 3)); + /** + * 2 quasi-closed replicas. + * Expectation: Replicate one of the replicas. + */ + @Test + public void testUnderReplicatedQuasiClosedContainer() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + container.setUsedBytes(100); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + + final int currentReplicateCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); + final long currentBytesToReplicate = replicationManager.getMetrics() + .getNumReplicationBytesTotal(); + + // On the first iteration, one of the quasi closed replicas should be + // replicated. + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentReplicateCommandCount + 1, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + Assertions.assertEquals(currentReplicateCommandCount + 1, + replicationManager.getMetrics().getNumReplicationCmdsSent()); + Assertions.assertEquals(currentBytesToReplicate + 100, + replicationManager.getMetrics().getNumReplicationBytesTotal()); + Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightReplication()); + + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + + final long currentReplicateCommandCompleted = replicationManager + .getMetrics().getNumReplicationCmdsCompleted(); + final long currentReplicateBytesCompleted = replicationManager + .getMetrics().getNumReplicationBytesCompleted(); + + // Now we add the replicated new replica + DatanodeDetails targetDn = + replicationManager.getLegacyReplicationManager() + .getFirstDatanode(InflightType.REPLICATION, id); + final ContainerReplica replicatedReplicaThree = getReplicas( + id, State.QUASI_CLOSED, 1000L, originNodeId, targetDn); + containerStateManager.updateContainerReplica( + id, replicatedReplicaThree); + + // On the next run, no additional replications should be scheduled. + replicationManager.processAll(); + eventQueue.processAll(1000); + + Assertions.assertEquals(currentReplicateCommandCompleted + 1, + replicationManager.getMetrics().getNumReplicationCmdsCompleted()); + Assertions.assertEquals(currentReplicateBytesCompleted + 100, + replicationManager.getMetrics().getNumReplicationBytesCompleted()); + Assertions.assertEquals(0, getInflightCount(InflightType.REPLICATION)); + Assertions.assertEquals(0, replicationManager.getMetrics() + .getInflightReplication()); + + report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + Assertions.assertEquals(0, report.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + } - final int currentDeleteCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + /** + * 1 quasi-closed replica. + * 1 unhealthy replica. + * All have same origin node ID. + * Expectation: + * + * In the first iteration of ReplicationManager, it should re-replicate + * the quasi closed replicas so that there are 3 of them. + * + * In the second iteration, ReplicationManager should delete the unhealthy + * replica since its origin node ID is not unique. + */ + @Test + public void testUnderReplicatedQuasiClosedContainerWithUnhealthyReplica() + throws IOException, InterruptedException, + TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + container.setUsedBytes(99); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, + randomDatanodeDetails()); + final ContainerReplica unhealthyReplica = getReplicas( + id, UNHEALTHY, 1000L, originNodeId, + randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, unhealthyReplica); + + final int currentReplicateCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); + final int currentDeleteCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + final long currentBytesToDelete = replicationManager.getMetrics() + .getNumDeletionBytesTotal(); + + // Run first iteraiton + + replicationManager.processAll(); + GenericTestUtils.waitFor( + () -> (currentReplicateCommandCount + 2) == datanodeCommandHandler + .getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand), + 50, 5000); + + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, + report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.UNHEALTHY)); + + List replicateCommands = datanodeCommandHandler + .getReceivedCommands().stream() + .filter(c -> c.getCommand().getType() + .equals(SCMCommandProto.Type.replicateContainerCommand)) + .collect(Collectors.toList()); + + Assertions.assertEquals(2, replicateCommands.size()); + + // Report the two new replicas to SCM. + for (CommandForDatanode replicateCommand: replicateCommands) { + DatanodeDetails newNode = createDatanodeDetails( + replicateCommand.getDatanodeId()); + ContainerReplica newReplica = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, newNode); + containerStateManager.updateContainerReplica(id, newReplica); + } - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentDeleteCommandCount + 1, - datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); - Assertions.assertEquals(currentDeleteCommandCount + 1, - replicationManager.getMetrics().getNumDeletionCmdsSent()); - Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightDeletion()); + // Run second iteration. + // Now that the quasi closed replica is sufficiently replicated, SCM + // should delete the unhealthy replica on the next iteration. - assertOverReplicatedCount(1); - } + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentDeleteCommandCount + 1, + datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); + Assertions.assertTrue(datanodeCommandHandler.received( + SCMCommandProto.Type.deleteContainerCommand, + unhealthyReplica.getDatanodeDetails())); + Assertions.assertEquals(currentDeleteCommandCount + 1, + replicationManager.getMetrics().getNumDeletionCmdsSent()); + Assertions.assertEquals(currentBytesToDelete + 99, + replicationManager.getMetrics().getNumDeletionBytesTotal()); + Assertions.assertEquals(1, + getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightDeletion()); + + containerStateManager.removeContainerReplica(id, unhealthyReplica); + + report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, + report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + Assertions.assertEquals(0, report.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.UNHEALTHY)); + } - @Test - public void testOverReplicatedAndPolicyUnSatisfiedAndDeleted() - throws IOException, TimeoutException { - final ContainerInfo container = getContainer(LifeCycleState.CLOSED); - final ContainerID id = container.containerID(); - final UUID originNodeId = UUID.randomUUID(); - final ContainerReplica replicaOne = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaTwo = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaThree = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaFour = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - final ContainerReplica replicaFive = getReplicas( - id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); - containerStateManager.addContainer(container.getProtobuf()); - containerStateManager.updateContainerReplica(id, replicaOne); - containerStateManager.updateContainerReplica(id, replicaTwo); - containerStateManager.updateContainerReplica( - id, replicaThree); - containerStateManager.updateContainerReplica(id, replicaFour); - containerStateManager.updateContainerReplica(id, replicaFive); + /** + * 3 quasi-closed replicas. + * All unique origin IDs. + * Expectation: Container is closed. + * + * When a container is QUASI_CLOSED and it has >50% of its replica + * in QUASI_CLOSED state with unique origin node id, + * ReplicationManager should force close the replica(s) with + * highest BCSID. + */ + @Test + public void testQuasiClosedToClosed() throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.QUASI_CLOSED); + final ContainerID id = container.containerID(); + final Set replicas = getReplicas(id, QUASI_CLOSED, + randomDatanodeDetails(), + randomDatanodeDetails(), + randomDatanodeDetails()); + containerStateManager.addContainer(container.getProtobuf()); + for (ContainerReplica replica : replicas) { + containerStateManager.updateContainerReplica(id, replica); + } - Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( - Mockito.argThat(list -> list != null && list.size() <= 4), - Mockito.anyInt() - )).thenAnswer( - invocation -> new ContainerPlacementStatusDefault(1, 2, 3)); + final int currentCloseCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.closeContainerCommand); - final int currentDeleteCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + replicationManager.processAll(); + eventQueue.processAll(1000); - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentDeleteCommandCount + 2, - datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); - Assertions.assertEquals(currentDeleteCommandCount + 2, - replicationManager.getMetrics().getNumDeletionCmdsSent()); - Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightDeletion()); - } + // All the replicas have same BCSID, so all of them will be closed. + Assertions.assertEquals(currentCloseCommandCount + 3, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.closeContainerCommand)); - /** - * ReplicationManager should replicate an additional replica if there are - * decommissioned replicas. - */ - @Test - public void testUnderReplicatedDueToDecommission() - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); - assertReplicaScheduled(2); - assertUnderReplicatedCount(1); + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, + report.getStat(LifeCycleState.QUASI_CLOSED)); + Assertions.assertEquals(0, report.getStat( + ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK)); + } } /** - * ReplicationManager should replicate an additional replica when all copies - * are decommissioning. + * Tests replication manager handling of decommissioning and maintainence + * mode datanodes. */ - @Test - public void testUnderReplicatedDueToAllDecommission() - throws IOException, TimeoutException { - runTestUnderReplicatedDueToAllDecommission(3); - } - - Void runTestUnderReplicatedDueToAllDecommission(int expectedReplication) - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); - assertReplicaScheduled(expectedReplication); - assertUnderReplicatedCount(1); - return null; - } + @Nested + class DecomAndMaintenance { + /** + * ReplicationManager should replicate an additional replica if there are + * decommissioned replicas. + */ + @Test + public void testUnderReplicatedDueToDecommission() + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); + assertReplicaScheduled(2); + assertUnderReplicatedCount(1); + } - @Test - public void testReplicationLimit() throws Exception { - runTestLimit(1, 0, 2, 0, - () -> runTestUnderReplicatedDueToAllDecommission(1)); - } + /** + * ReplicationManager should replicate an additional replica when all copies + * are decommissioning. + */ + @Test + public void testUnderReplicatedDueToAllDecommission() + throws IOException, TimeoutException { + runTestUnderReplicatedDueToAllDecommission(3); + } - void runTestLimit(int replicationLimit, int deletionLimit, - int expectedReplicationSkipped, int expectedDeletionSkipped, - Callable testcase) throws Exception { - createReplicationManager(replicationLimit, deletionLimit); + Void runTestUnderReplicatedDueToAllDecommission(int expectedReplication) + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); + assertReplicaScheduled(expectedReplication); + assertUnderReplicatedCount(1); + return null; + } - final ReplicationManagerMetrics metrics = replicationManager.getMetrics(); - final long replicationSkipped = metrics.getInflightReplicationSkipped(); - final long deletionSkipped = metrics.getInflightDeletionSkipped(); + @Test + public void testReplicationLimit() throws Exception { + runTestLimit(1, 0, 2, 0, + () -> runTestUnderReplicatedDueToAllDecommission(1)); + } - testcase.call(); + /** + * ReplicationManager should not take any action when the container is + * correctly replicated with decommissioned replicas still present. + */ + @Test + public void testCorrectlyReplicatedWithDecommission() + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); + assertReplicaScheduled(0); + assertUnderReplicatedCount(0); + } - Assertions.assertEquals(replicationSkipped + expectedReplicationSkipped, - metrics.getInflightReplicationSkipped()); - Assertions.assertEquals(deletionSkipped + expectedDeletionSkipped, - metrics.getInflightDeletionSkipped()); + /** + * ReplicationManager should replicate an additional replica when min rep + * is not met for maintenance. + */ + @Test + public void testUnderReplicatedDueToMaintenance() + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + assertReplicaScheduled(1); + assertUnderReplicatedCount(1); + } - //reset limits for other tests. - createReplicationManager(0, 0); - } + /** + * ReplicationManager should not replicate an additional replica when if + * min replica for maintenance is 1 and another replica is available. + */ + @Test + public void testNotUnderReplicatedDueToMaintenanceMinRepOne() + throws Exception { + replicationManager.stop(); + ReplicationManagerConfiguration newConf = + new ReplicationManagerConfiguration(); + newConf.setMaintenanceReplicaMinimum(1); + dbStore.close(); + createReplicationManager(newConf); + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + assertReplicaScheduled(0); + assertUnderReplicatedCount(0); + } - /** - * ReplicationManager should not take any action when the container is - * correctly replicated with decommissioned replicas still present. - */ - @Test - public void testCorrectlyReplicatedWithDecommission() - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONING, HEALTHY), CLOSED); - assertReplicaScheduled(0); - assertUnderReplicatedCount(0); - } + /** + * ReplicationManager should replicate an additional replica when all copies + * are going off line and min rep is 1. + */ + @Test + public void testUnderReplicatedDueToMaintenanceMinRepOne() + throws Exception { + replicationManager.stop(); + ReplicationManagerConfiguration newConf = + new ReplicationManagerConfiguration(); + newConf.setMaintenanceReplicaMinimum(1); + dbStore.close(); + createReplicationManager(newConf); + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + assertReplicaScheduled(1); + assertUnderReplicatedCount(1); + } - /** - * ReplicationManager should replicate an additional replica when min rep - * is not met for maintenance. - */ - @Test - public void testUnderReplicatedDueToMaintenance() - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - assertReplicaScheduled(1); - assertUnderReplicatedCount(1); - } + /** + * ReplicationManager should replicate additional replica when all copies + * are going into maintenance. + */ + @Test + public void testUnderReplicatedDueToAllMaintenance() + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + assertReplicaScheduled(2); + assertUnderReplicatedCount(1); + } - /** - * ReplicationManager should not replicate an additional replica when if - * min replica for maintenance is 1 and another replica is available. - */ - @Test - public void testNotUnderReplicatedDueToMaintenanceMinRepOne() - throws Exception { - replicationManager.stop(); - ReplicationManagerConfiguration newConf = - new ReplicationManagerConfiguration(); - newConf.setMaintenanceReplicaMinimum(1); - dbStore.close(); - createReplicationManager(newConf); - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - assertReplicaScheduled(0); - assertUnderReplicatedCount(0); - } + /** + * ReplicationManager should not replicate additional replica sufficient + * replica are available. + */ + @Test + public void testCorrectlyReplicatedWithMaintenance() + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + assertReplicaScheduled(0); + assertUnderReplicatedCount(0); + } - /** - * ReplicationManager should replicate an additional replica when all copies - * are going off line and min rep is 1. - */ - @Test - public void testUnderReplicatedDueToMaintenanceMinRepOne() - throws Exception { - replicationManager.stop(); - ReplicationManagerConfiguration newConf = - new ReplicationManagerConfiguration(); - newConf.setMaintenanceReplicaMinimum(1); - dbStore.close(); - createReplicationManager(newConf); - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - assertReplicaScheduled(1); - assertUnderReplicatedCount(1); - } + /** + * ReplicationManager should replicate additional replica when all copies + * are decommissioning or maintenance. + */ + @Test + public void testUnderReplicatedWithDecommissionAndMaintenance() + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + assertReplicaScheduled(2); + assertUnderReplicatedCount(1); + } - /** - * ReplicationManager should replicate additional replica when all copies - * are going into maintenance. - */ - @Test - public void testUnderReplicatedDueToAllMaintenance() - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - assertReplicaScheduled(2); - assertUnderReplicatedCount(1); - } + /** + * When a CLOSED container is over replicated, ReplicationManager + * deletes the excess replicas. While choosing the replica for deletion + * ReplicationManager should not attempt to remove a DECOMMISSION or + * MAINTENANCE replica. + */ + @Test + public void testOverReplicatedClosedContainerWithDecomAndMaint() + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED); + addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + + final int currentDeleteCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentDeleteCommandCount + 2, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + Assertions.assertEquals(currentDeleteCommandCount + 2, + replicationManager.getMetrics().getNumDeletionCmdsSent()); + Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightDeletion()); + // Get the DECOM and Maint replica and ensure none of them are scheduled + // for removal + Set decom = + containerStateManager.getContainerReplicas( + container.containerID()) + .stream() + .filter(r -> + r.getDatanodeDetails().getPersistedOpState() != IN_SERVICE) + .collect(Collectors.toSet()); + for (ContainerReplica r : decom) { + Assertions.assertFalse(datanodeCommandHandler.received( + SCMCommandProto.Type.deleteContainerCommand, + r.getDatanodeDetails())); + } + assertOverReplicatedCount(1); + } - /** - * ReplicationManager should not replicate additional replica sufficient - * replica are available. - */ - @Test - public void testCorrectlyReplicatedWithMaintenance() - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - assertReplicaScheduled(0); - assertUnderReplicatedCount(0); + /** + * Replication Manager should not attempt to replicate from an unhealthy + * (stale or dead) node. To test this, setup a scenario where a replia needs + * to be created, but mark all nodes stale. That way, no new replica will be + * scheduled. + */ + @Test + public void testUnderReplicatedNotHealthySource() + throws IOException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + addReplica(container, NodeStatus.inServiceStale(), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONED, STALE), CLOSED); + addReplica(container, new NodeStatus(DECOMMISSIONED, STALE), CLOSED); + // There should be replica scheduled, but as all nodes are stale, nothing + // gets scheduled. + assertReplicaScheduled(0); + assertUnderReplicatedCount(1); + } } /** - * ReplicationManager should replicate additional replica when all copies - * are decommissioning or maintenance. + * Tests replication manager move command. */ - @Test - public void testUnderReplicatedWithDecommissionAndMaintenance() - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - assertReplicaScheduled(2); - assertUnderReplicatedCount(1); - } + @Nested + class Move { + /** + * if all the prerequisites are satisfied, move should work as expected. + */ + @Test + public void testMove() throws IOException, NodeNotFoundException, + InterruptedException, ExecutionException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + ContainerID id = container.containerID(); + ContainerReplica dn1 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); + CompletableFuture cf = + replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(scmLogs.getOutput().contains( + "receive a move request about container")); + Thread.sleep(100L); + Assertions.assertTrue(datanodeCommandHandler.received( + SCMCommandProto.Type.replicateContainerCommand, dn3)); + Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + + //replicate container to dn3 + addReplicaToDn(container, dn3, CLOSED); + replicationManager.processAll(); + eventQueue.processAll(1000); + + Assertions.assertTrue(datanodeCommandHandler.received( + SCMCommandProto.Type.deleteContainerCommand, + dn1.getDatanodeDetails())); + Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + containerStateManager.removeContainerReplica(id, dn1); - /** - * ReplicationManager should replicate zero replica when all copies - * are missing. - */ - @Test - public void testContainerWithMissingReplicas() - throws IOException, TimeoutException { - createContainer(LifeCycleState.CLOSED); - assertReplicaScheduled(0); - assertUnderReplicatedCount(1); - assertMissingCount(1); - } - /** - * When a CLOSED container is over replicated, ReplicationManager - * deletes the excess replicas. While choosing the replica for deletion - * ReplicationManager should not attempt to remove a DECOMMISSION or - * MAINTENANCE replica. - */ - @Test - public void testOverReplicatedClosedContainerWithDecomAndMaint() - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONED, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_MAINTENANCE, HEALTHY), CLOSED); - addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); - addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); - addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); - addReplica(container, NodeStatus.inServiceHealthy(), CLOSED); + replicationManager.processAll(); + eventQueue.processAll(1000); - final int currentDeleteCommandCount = datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + Assertions.assertTrue(cf.isDone() && cf.get() == MoveResult.COMPLETED); + } - replicationManager.processAll(); - eventQueue.processAll(1000); - Assertions.assertEquals(currentDeleteCommandCount + 2, - datanodeCommandHandler - .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand)); - Assertions.assertEquals(currentDeleteCommandCount + 2, - replicationManager.getMetrics().getNumDeletionCmdsSent()); - Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getInflightDeletion()); - // Get the DECOM and Maint replica and ensure none of them are scheduled - // for removal - Set decom = - containerStateManager.getContainerReplicas( - container.containerID()) - .stream() - .filter(r -> r.getDatanodeDetails().getPersistedOpState() != IN_SERVICE) - .collect(Collectors.toSet()); - for (ContainerReplica r : decom) { + /** + * if crash happened and restarted, move option should work as expected. + */ + @Test + public void testMoveCrashAndRestart() throws IOException, + NodeNotFoundException, InterruptedException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + ContainerID id = container.containerID(); + ContainerReplica dn1 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); + replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(scmLogs.getOutput().contains( + "receive a move request about container")); + Thread.sleep(100L); + Assertions.assertTrue(datanodeCommandHandler.received( + SCMCommandProto.Type.replicateContainerCommand, dn3)); + Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + + //crash happens, restart scm. + //clear current inflight actions and reload inflightMove from DBStore. + resetReplicationManager(); + replicationManager.getMoveScheduler() + .reinitialize(SCMDBDefinition.MOVE.getTable(dbStore)); + Assertions.assertTrue(replicationManager.getMoveScheduler() + .getInflightMove().containsKey(id)); + MoveDataNodePair kv = replicationManager.getMoveScheduler() + .getInflightMove().get(id); + Assertions.assertEquals(kv.getSrc(), dn1.getDatanodeDetails()); + Assertions.assertEquals(kv.getTgt(), dn3); + serviceManager.notifyStatusChanged(); + + Thread.sleep(100L); + // now, the container is not over-replicated, + // so no deleteContainerCommand will be sent Assertions.assertFalse(datanodeCommandHandler.received( SCMCommandProto.Type.deleteContainerCommand, - r.getDatanodeDetails())); + dn1.getDatanodeDetails())); + //replica does not exist in target datanode, so a + // replicateContainerCommand will be sent again at + // notifyStatusChanged#onLeaderReadyAndOutOfSafeMode + Assertions.assertEquals(2, datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + + + //replicate container to dn3, now, over-replicated + addReplicaToDn(container, dn3, CLOSED); + replicationManager.processAll(); + eventQueue.processAll(1000); + + //deleteContainerCommand is sent, but the src replica is not deleted now + Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + + //crash happens, restart scm. + //clear current inflight actions and reload inflightMove from DBStore. + resetReplicationManager(); + replicationManager.getMoveScheduler() + .reinitialize(SCMDBDefinition.MOVE.getTable(dbStore)); + Assertions.assertTrue(replicationManager.getMoveScheduler() + .getInflightMove().containsKey(id)); + kv = replicationManager.getMoveScheduler() + .getInflightMove().get(id); + Assertions.assertEquals(kv.getSrc(), dn1.getDatanodeDetails()); + Assertions.assertEquals(kv.getTgt(), dn3); + serviceManager.notifyStatusChanged(); + + //after restart and the container is over-replicated now, + //deleteContainerCommand will be sent again + Assertions.assertEquals(2, datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + containerStateManager.removeContainerReplica(id, dn1); + + //replica in src datanode is deleted now + containerStateManager.removeContainerReplica(id, dn1); + replicationManager.processAll(); + eventQueue.processAll(1000); + + //since the move is complete,so after scm crash and restart + //inflightMove should not contain the container again + resetReplicationManager(); + replicationManager.getMoveScheduler() + .reinitialize(SCMDBDefinition.MOVE.getTable(dbStore)); + Assertions.assertFalse(replicationManager.getMoveScheduler() + .getInflightMove().containsKey(id)); + + //completeableFuture is not stored in DB, so after scm crash and + //restart ,completeableFuture is missing } - assertOverReplicatedCount(1); - } - - /** - * Replication Manager should not attempt to replicate from an unhealthy - * (stale or dead) node. To test this, setup a scenario where a replia needs - * to be created, but mark all nodes stale. That way, no new replica will be - * scheduled. - */ - @Test - public void testUnderReplicatedNotHealthySource() - throws IOException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, NodeStatus.inServiceStale(), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONED, STALE), CLOSED); - addReplica(container, new NodeStatus(DECOMMISSIONED, STALE), CLOSED); - // There should be replica scheduled, but as all nodes are stale, nothing - // gets scheduled. - assertReplicaScheduled(0); - assertUnderReplicatedCount(1); - } - - /** - * if all the prerequisites are satisfied, move should work as expected. - */ - @Test - public void testMove() throws IOException, NodeNotFoundException, - InterruptedException, ExecutionException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - ContainerID id = container.containerID(); - ContainerReplica dn1 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); - CompletableFuture cf = - replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(scmLogs.getOutput().contains( - "receive a move request about container")); - Thread.sleep(100L); - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.replicateContainerCommand, dn3)); - Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.replicateContainerCommand)); - //replicate container to dn3 - addReplicaToDn(container, dn3, CLOSED); - replicationManager.processAll(); - eventQueue.processAll(1000); + /** + * make sure RM does not delete replica if placement policy is not + * satisfied. + */ + @Test + public void testMoveNotDeleteSrcIfPolicyNotSatisfied() + throws IOException, NodeNotFoundException, + InterruptedException, ExecutionException, TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + ContainerID id = container.containerID(); + ContainerReplica dn1 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + ContainerReplica dn2 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + DatanodeDetails dn4 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); + CompletableFuture cf = + replicationManager.move(id, dn1.getDatanodeDetails(), dn4); + Assertions.assertTrue(scmLogs.getOutput().contains( + "receive a move request about container")); + Thread.sleep(100L); + Assertions.assertTrue(datanodeCommandHandler.received( + SCMCommandProto.Type.replicateContainerCommand, dn4)); + Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + + //replicate container to dn4 + addReplicaToDn(container, dn4, CLOSED); + //now, replication succeeds, but replica in dn2 lost, + //and there are only tree replicas totally, so rm should + //not delete the replica on dn1 + containerStateManager.removeContainerReplica(id, dn2); + replicationManager.processAll(); + eventQueue.processAll(1000); + + Assertions.assertFalse( + datanodeCommandHandler.received( + SCMCommandProto.Type.deleteContainerCommand, + dn1.getDatanodeDetails())); + + Assertions.assertTrue(cf.isDone() && + cf.get() == MoveResult.DELETE_FAIL_POLICY); + } - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.deleteContainerCommand, dn1.getDatanodeDetails())); - Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.deleteContainerCommand)); - containerStateManager.removeContainerReplica(id, dn1); - replicationManager.processAll(); - eventQueue.processAll(1000); + /** + * test src and target datanode become unhealthy when moving. + */ + @Test + public void testDnBecameUnhealthyWhenMoving() throws IOException, + NodeNotFoundException, InterruptedException, ExecutionException, + TimeoutException { + final ContainerInfo container = createContainer(LifeCycleState.CLOSED); + ContainerID id = container.containerID(); + ContainerReplica dn1 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); + CompletableFuture cf = + replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(scmLogs.getOutput().contains( + "receive a move request about container")); + + nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, STALE)); + replicationManager.processAll(); + eventQueue.processAll(1000); + + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY); + + nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY)); + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + addReplicaToDn(container, dn3, CLOSED); + replicationManager.processAll(); + eventQueue.processAll(1000); + nodeManager.setNodeStatus(dn1.getDatanodeDetails(), + new NodeStatus(IN_SERVICE, STALE)); + replicationManager.processAll(); + eventQueue.processAll(1000); + + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.DELETION_FAIL_NODE_UNHEALTHY); + } - Assertions.assertTrue(cf.isDone() && cf.get() == MoveResult.COMPLETED); + /** + * before Replication Manager generates a completablefuture for a move + * option, some Prerequisites should be satisfied. + */ + @Test + public void testMovePrerequisites() throws IOException, + NodeNotFoundException, InterruptedException, ExecutionException, + InvalidStateTransitionException, TimeoutException { + //all conditions is met + final ContainerInfo container = createContainer(LifeCycleState.OPEN); + ContainerID id = container.containerID(); + ContainerReplica dn1 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + ContainerReplica dn2 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); + ContainerReplica dn4 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); + + CompletableFuture cf; + //the above move is executed successfully, so there may be some item in + //inflightReplication or inflightDeletion. here we stop replication + // manager to clear these states, which may impact the tests below. + //we don't need a running replicationManamger now + replicationManager.stop(); + Thread.sleep(100L); + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.FAIL_NOT_RUNNING); + replicationManager.start(); + Thread.sleep(100L); + + //container in not in OPEN state + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED); + //open -> closing + containerStateManager.updateContainerState(id.getProtobuf(), + LifeCycleEvent.FINALIZE); + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED); + //closing -> quasi_closed + containerStateManager.updateContainerState(id.getProtobuf(), + LifeCycleEvent.QUASI_CLOSE); + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED); + + //quasi_closed -> closed + containerStateManager.updateContainerState(id.getProtobuf(), + LifeCycleEvent.FORCE_CLOSE); + Assertions.assertSame(LifeCycleState.CLOSED, + containerStateManager.getContainer(id).getState()); + + //Node is not in healthy state + for (HddsProtos.NodeState state : HddsProtos.NodeState.values()) { + if (state != HEALTHY) { + nodeManager.setNodeStatus(dn3, + new NodeStatus(IN_SERVICE, state)); + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY); + cf = replicationManager.move(id, dn3, dn1.getDatanodeDetails()); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY); + } + } + nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY)); + + //Node is not in IN_SERVICE state + for (HddsProtos.NodeOperationalState state : + HddsProtos.NodeOperationalState.values()) { + if (state != IN_SERVICE) { + nodeManager.setNodeStatus(dn3, + new NodeStatus(state, HEALTHY)); + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE); + cf = replicationManager.move(id, dn3, dn1.getDatanodeDetails()); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE); + } + } + nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY)); + + //container exists in target datanode + cf = replicationManager.move(id, dn1.getDatanodeDetails(), + dn2.getDatanodeDetails()); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_EXIST_IN_TARGET); + + //container does not exist in source datanode + cf = replicationManager.move(id, dn3, dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_NOT_EXIST_IN_SOURCE); + + //make container over relplicated to test the + // case that container is in inflightDeletion + ContainerReplica dn5 = addReplica(container, + new NodeStatus(IN_SERVICE, HEALTHY), State.CLOSED); + replicationManager.processAll(); + //waiting for inflightDeletion generation + eventQueue.processAll(1000); + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_INFLIGHT_DELETION); + resetReplicationManager(); + + //make the replica num be 2 to test the case + //that container is in inflightReplication + containerStateManager.removeContainerReplica(id, dn5); + containerStateManager.removeContainerReplica(id, dn4); + //replication manager should generate inflightReplication + replicationManager.processAll(); + //waiting for inflightReplication generation + eventQueue.processAll(1000); + cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); + Assertions.assertTrue(cf.isDone() && cf.get() == + MoveResult.REPLICATION_FAIL_INFLIGHT_REPLICATION); + } } /** - * if crash happened and restarted, move option should work as expected. + * Tests mis-replicated containers with rack topology information. */ - @Test - public void testMoveCrashAndRestart() throws IOException, - NodeNotFoundException, InterruptedException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - ContainerID id = container.containerID(); - ContainerReplica dn1 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); - replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(scmLogs.getOutput().contains( - "receive a move request about container")); - Thread.sleep(100L); - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.replicateContainerCommand, dn3)); - Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.replicateContainerCommand)); - - //crash happens, restart scm. - //clear current inflight actions and reload inflightMove from DBStore. - resetReplicationManager(); - replicationManager.getMoveScheduler() - .reinitialize(SCMDBDefinition.MOVE.getTable(dbStore)); - Assertions.assertTrue(replicationManager.getMoveScheduler() - .getInflightMove().containsKey(id)); - MoveDataNodePair kv = replicationManager.getMoveScheduler() - .getInflightMove().get(id); - Assertions.assertEquals(kv.getSrc(), dn1.getDatanodeDetails()); - Assertions.assertEquals(kv.getTgt(), dn3); - serviceManager.notifyStatusChanged(); + @Nested + class MisReplicated { + + @Test + public void additionalReplicaScheduledWhenMisReplicated() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.CLOSED); + container.setUsedBytes(100); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica( + id, replicaThree); + + // Ensure a mis-replicated status is returned for any containers in this + // test where there are 3 replicas. When there are 2 or 4 replicas + // the status returned will be healthy. + Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( + Mockito.argThat(list -> list.size() == 3), + Mockito.anyInt() + )).thenAnswer(invocation -> { + return new ContainerPlacementStatusDefault(1, 2, 3); + }); + + int currentReplicateCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.replicateContainerCommand); + final long currentBytesToReplicate = replicationManager.getMetrics() + .getNumReplicationBytesTotal(); + + replicationManager.processAll(); + eventQueue.processAll(1000); + // At this stage, due to the mocked calls to validateContainerPlacement + // the policy will not be satisfied, and replication will be triggered. + + Assertions.assertEquals(currentReplicateCommandCount + 1, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + Assertions.assertEquals(currentReplicateCommandCount + 1, + replicationManager.getMetrics().getNumReplicationCmdsSent()); + Assertions.assertEquals(currentBytesToReplicate + 100, + replicationManager.getMetrics().getNumReplicationBytesTotal()); + Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightReplication()); + + ReplicationManagerReport report = replicationManager.getContainerReport(); + Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSED)); + Assertions.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.MIS_REPLICATED)); + + // Now make it so that all containers seem mis-replicated no matter how + // many replicas. This will test replicas are not scheduled if the new + // replica does not fix the mis-replication. + Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( + Mockito.anyList(), + Mockito.anyInt() + )).thenAnswer(invocation -> { + return new ContainerPlacementStatusDefault(1, 2, 3); + }); + + currentReplicateCommandCount = datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand); + + replicationManager.processAll(); + eventQueue.processAll(1000); + // At this stage, due to the mocked calls to validateContainerPlacement + // the mis-replicated racks will not have improved, so expect to see + // nothing scheduled. + Assertions.assertEquals(currentReplicateCommandCount, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.replicateContainerCommand)); + Assertions.assertEquals(currentReplicateCommandCount, + replicationManager.getMetrics().getNumReplicationCmdsSent()); + Assertions.assertEquals(1, getInflightCount(InflightType.REPLICATION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightReplication()); + } - Thread.sleep(100L); - // now, the container is not over-replicated, - // so no deleteContainerCommand will be sent - Assertions.assertFalse(datanodeCommandHandler.received( - SCMCommandProto.Type.deleteContainerCommand, dn1.getDatanodeDetails())); - //replica does not exist in target datanode, so a replicateContainerCommand - //will be sent again at notifyStatusChanged#onLeaderReadyAndOutOfSafeMode - Assertions.assertEquals(2, datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.replicateContainerCommand)); - - - //replicate container to dn3, now, over-replicated - addReplicaToDn(container, dn3, CLOSED); - replicationManager.processAll(); - eventQueue.processAll(1000); + @Test + public void overReplicatedButRemovingMakesMisReplicated() + throws IOException, TimeoutException { + // In this test, the excess replica should not be removed. + final ContainerInfo container = getContainer(LifeCycleState.CLOSED); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaFour = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaFive = getReplicas( + id, UNHEALTHY, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica( + id, replicaThree); + containerStateManager.updateContainerReplica(id, replicaFour); + containerStateManager.updateContainerReplica(id, replicaFive); + + // Ensure a mis-replicated status is returned for any containers in this + // test where there are exactly 3 replicas checked. + Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( + Mockito.argThat(list -> list.size() == 3), + Mockito.anyInt() + )).thenAnswer( + invocation -> new ContainerPlacementStatusDefault(1, 2, 3)); + + int currentDeleteCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + + replicationManager.processAll(); + eventQueue.processAll(1000); + // TODO the new (non-legacy) RM needs a separate handler for + // topology status to make progress in this case by: + // 1. Deleting the closed replica to restore proper replica count. + // 2. Deleting the unhealthy replica since there are adequate healthy + // replicas. + // 3. Fixing topology issues left by the previous cleanup tasks. + // Current legacy RM implementation will take no action in this case + // because deletion would compromise topology. + Assertions.assertEquals(currentDeleteCommandCount, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + Assertions.assertEquals(currentDeleteCommandCount, + replicationManager.getMetrics().getNumDeletionCmdsSent()); - //deleteContainerCommand is sent, but the src replica is not deleted now - Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.deleteContainerCommand)); - - //crash happens, restart scm. - //clear current inflight actions and reload inflightMove from DBStore. - resetReplicationManager(); - replicationManager.getMoveScheduler() - .reinitialize(SCMDBDefinition.MOVE.getTable(dbStore)); - Assertions.assertTrue(replicationManager.getMoveScheduler() - .getInflightMove().containsKey(id)); - kv = replicationManager.getMoveScheduler() - .getInflightMove().get(id); - Assertions.assertEquals(kv.getSrc(), dn1.getDatanodeDetails()); - Assertions.assertEquals(kv.getTgt(), dn3); - serviceManager.notifyStatusChanged(); + Assertions.assertFalse(datanodeCommandHandler.received( + SCMCommandProto.Type.deleteContainerCommand, + replicaFive.getDatanodeDetails())); + Assertions.assertEquals(0, getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(0, replicationManager.getMetrics() + .getInflightDeletion()); + assertOverReplicatedCount(1); + } - //after restart and the container is over-replicated now, - //deleteContainerCommand will be sent again - Assertions.assertEquals(2, datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.deleteContainerCommand)); - containerStateManager.removeContainerReplica(id, dn1); + @Test + public void testOverReplicatedAndPolicySatisfied() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.CLOSED); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaFour = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica( + id, replicaThree); + containerStateManager.updateContainerReplica(id, replicaFour); + + Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( + Mockito.argThat(list -> list.size() == 3), + Mockito.anyInt() + )).thenAnswer( + invocation -> new ContainerPlacementStatusDefault(2, 2, 3)); + + final int currentDeleteCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentDeleteCommandCount + 1, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + Assertions.assertEquals(currentDeleteCommandCount + 1, + replicationManager.getMetrics().getNumDeletionCmdsSent()); + Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightDeletion()); + + assertOverReplicatedCount(1); + } - //replica in src datanode is deleted now - containerStateManager.removeContainerReplica(id, dn1); - replicationManager.processAll(); - eventQueue.processAll(1000); + @Test + public void testOverReplicatedAndPolicyUnSatisfiedAndDeleted() + throws IOException, TimeoutException { + final ContainerInfo container = getContainer(LifeCycleState.CLOSED); + final ContainerID id = container.containerID(); + final UUID originNodeId = UUID.randomUUID(); + final ContainerReplica replicaOne = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaTwo = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaThree = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaFour = getReplicas( + id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + final ContainerReplica replicaFive = getReplicas( + id, QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + + containerStateManager.addContainer(container.getProtobuf()); + containerStateManager.updateContainerReplica(id, replicaOne); + containerStateManager.updateContainerReplica(id, replicaTwo); + containerStateManager.updateContainerReplica( + id, replicaThree); + containerStateManager.updateContainerReplica(id, replicaFour); + containerStateManager.updateContainerReplica(id, replicaFive); + + Mockito.when(ratisContainerPlacementPolicy.validateContainerPlacement( + Mockito.argThat(list -> list != null && list.size() <= 4), + Mockito.anyInt() + )).thenAnswer( + invocation -> new ContainerPlacementStatusDefault(1, 2, 3)); + + int currentDeleteCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + + // On the first run, RM will delete one of the extra closed replicas. + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentDeleteCommandCount + 1, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + Assertions.assertEquals(currentDeleteCommandCount + 1, + replicationManager.getMetrics().getNumDeletionCmdsSent()); + Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightDeletion()); + + assertAnyDeleteTargets( + replicaOne.getDatanodeDetails(), + replicaTwo.getDatanodeDetails(), + replicaThree.getDatanodeDetails(), + replicaFour.getDatanodeDetails() + ); + + currentDeleteCommandCount = datanodeCommandHandler + .getInvocationCount(SCMCommandProto.Type.deleteContainerCommand); + + // One the second run, the container is now properly replicated when + // counting in flight deletes. This allows the quasi closed container to + // be deleted by the unhealthy container handler. + replicationManager.processAll(); + eventQueue.processAll(1000); + Assertions.assertEquals(currentDeleteCommandCount + 1, + datanodeCommandHandler.getInvocationCount( + SCMCommandProto.Type.deleteContainerCommand)); + Assertions.assertEquals(currentDeleteCommandCount + 1, + replicationManager.getMetrics().getNumDeletionCmdsSent()); + Assertions.assertEquals(1, getInflightCount(InflightType.DELETION)); + Assertions.assertEquals(1, replicationManager.getMetrics() + .getInflightDeletion()); + + assertDeleteTargetsContain(replicaFive.getDatanodeDetails()); + } + } - //since the move is complete,so after scm crash and restart - //inflightMove should not contain the container again - resetReplicationManager(); - replicationManager.getMoveScheduler() - .reinitialize(SCMDBDefinition.MOVE.getTable(dbStore)); - Assertions.assertFalse(replicationManager.getMoveScheduler() - .getInflightMove().containsKey(id)); + void runTestLimit(int replicationLimit, int deletionLimit, + int expectedReplicationSkipped, int expectedDeletionSkipped, + Callable testcase) throws Exception { + createReplicationManager(replicationLimit, deletionLimit); - //completeableFuture is not stored in DB, so after scm crash and - //restart ,completeableFuture is missing - } + final ReplicationManagerMetrics metrics = replicationManager.getMetrics(); + final long replicationSkipped = metrics.getInflightReplicationSkipped(); + final long deletionSkipped = metrics.getInflightDeletionSkipped(); - /** - * make sure RM does not delete replica if placement policy is not satisfied. - */ - @Test - public void testMoveNotDeleteSrcIfPolicyNotSatisfied() - throws IOException, NodeNotFoundException, - InterruptedException, ExecutionException, TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - ContainerID id = container.containerID(); - ContainerReplica dn1 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - ContainerReplica dn2 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - DatanodeDetails dn4 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); - CompletableFuture cf = - replicationManager.move(id, dn1.getDatanodeDetails(), dn4); - Assertions.assertTrue(scmLogs.getOutput().contains( - "receive a move request about container")); - Thread.sleep(100L); - Assertions.assertTrue(datanodeCommandHandler.received( - SCMCommandProto.Type.replicateContainerCommand, dn4)); - Assertions.assertEquals(1, datanodeCommandHandler.getInvocationCount( - SCMCommandProto.Type.replicateContainerCommand)); - - //replicate container to dn4 - addReplicaToDn(container, dn4, CLOSED); - //now, replication succeeds, but replica in dn2 lost, - //and there are only tree replicas totally, so rm should - //not delete the replica on dn1 - containerStateManager.removeContainerReplica(id, dn2); - replicationManager.processAll(); - eventQueue.processAll(1000); + testcase.call(); - Assertions.assertFalse(datanodeCommandHandler.received( - SCMCommandProto.Type.deleteContainerCommand, dn1.getDatanodeDetails())); + Assertions.assertEquals(replicationSkipped + expectedReplicationSkipped, + metrics.getInflightReplicationSkipped()); + Assertions.assertEquals(deletionSkipped + expectedDeletionSkipped, + metrics.getInflightDeletionSkipped()); - Assertions.assertTrue(cf.isDone() && - cf.get() == MoveResult.DELETE_FAIL_POLICY); + //reset limits for other tests. + createReplicationManager(0, 0); } - /** - * test src and target datanode become unhealthy when moving. + * Checks that the set of datanodes given delete commands exactly matches + * targetDNs. */ - @Test - public void testDnBecameUnhealthyWhenMoving() throws IOException, - NodeNotFoundException, InterruptedException, ExecutionException, - TimeoutException { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - ContainerID id = container.containerID(); - ContainerReplica dn1 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); - CompletableFuture cf = - replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(scmLogs.getOutput().contains( - "receive a move request about container")); - - nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, STALE)); - replicationManager.processAll(); - eventQueue.processAll(1000); + private void assertExactDeleteTargets(DatanodeDetails... targetDNs) { + List deleteCommands = datanodeCommandHandler + .getReceivedCommands().stream() + .filter(c -> c.getCommand().getType() == + SCMCommandProto.Type.deleteContainerCommand) + .collect(Collectors.toList()); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY); + Assertions.assertEquals(targetDNs.length, deleteCommands.size()); - nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY)); - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - addReplicaToDn(container, dn3, CLOSED); - replicationManager.processAll(); - eventQueue.processAll(1000); - nodeManager.setNodeStatus(dn1.getDatanodeDetails(), - new NodeStatus(IN_SERVICE, STALE)); - replicationManager.processAll(); - eventQueue.processAll(1000); + Set targetDNIDs = Arrays.stream(targetDNs) + .map(DatanodeDetails::getUuid) + .collect(Collectors.toSet()); + Set chosenDNIDs = deleteCommands.stream() + .map(CommandForDatanode::getDatanodeId) + .collect(Collectors.toSet()); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.DELETION_FAIL_NODE_UNHEALTHY); + Assertions.assertEquals(targetDNIDs, chosenDNIDs); } /** - * before Replication Manager generates a completablefuture for a move option, - * some Prerequisites should be satisfied. + * Checks if the set of nodes with deletions scheduled were taken from the + * provided set of DNs. */ - @Test - public void testMovePrerequisites() throws IOException, NodeNotFoundException, - InterruptedException, ExecutionException, - InvalidStateTransitionException, TimeoutException { - //all conditions is met - final ContainerInfo container = createContainer(LifeCycleState.OPEN); - ContainerID id = container.containerID(); - ContainerReplica dn1 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - ContainerReplica dn2 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - DatanodeDetails dn3 = addNode(new NodeStatus(IN_SERVICE, HEALTHY)); - ContainerReplica dn4 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - - CompletableFuture cf; - //the above move is executed successfully, so there may be some item in - //inflightReplication or inflightDeletion. here we stop replication manager - //to clear these states, which may impact the tests below. - //we don't need a running replicationManamger now - replicationManager.stop(); - Thread.sleep(100L); - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.FAIL_NOT_RUNNING); - replicationManager.start(); - Thread.sleep(100L); - - //container in not in OPEN state - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED); - //open -> closing - containerStateManager.updateContainerState(id.getProtobuf(), - LifeCycleEvent.FINALIZE); - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED); - //closing -> quasi_closed - containerStateManager.updateContainerState(id.getProtobuf(), - LifeCycleEvent.QUASI_CLOSE); - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED); - - //quasi_closed -> closed - containerStateManager.updateContainerState(id.getProtobuf(), - LifeCycleEvent.FORCE_CLOSE); - Assertions.assertSame(LifeCycleState.CLOSED, - containerStateManager.getContainer(id).getState()); - - //Node is not in healthy state - for (HddsProtos.NodeState state : HddsProtos.NodeState.values()) { - if (state != HEALTHY) { - nodeManager.setNodeStatus(dn3, - new NodeStatus(IN_SERVICE, state)); - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY); - cf = replicationManager.move(id, dn3, dn1.getDatanodeDetails()); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY); - } - } - nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY)); - - //Node is not in IN_SERVICE state - for (HddsProtos.NodeOperationalState state : - HddsProtos.NodeOperationalState.values()) { - if (state != IN_SERVICE) { - nodeManager.setNodeStatus(dn3, - new NodeStatus(state, HEALTHY)); - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE); - cf = replicationManager.move(id, dn3, dn1.getDatanodeDetails()); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE); - } - } - nodeManager.setNodeStatus(dn3, new NodeStatus(IN_SERVICE, HEALTHY)); - - //container exists in target datanode - cf = replicationManager.move(id, dn1.getDatanodeDetails(), - dn2.getDatanodeDetails()); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_EXIST_IN_TARGET); - - //container does not exist in source datanode - cf = replicationManager.move(id, dn3, dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_NOT_EXIST_IN_SOURCE); - - //make container over relplicated to test the - // case that container is in inflightDeletion - ContainerReplica dn5 = addReplica(container, - new NodeStatus(IN_SERVICE, HEALTHY), State.CLOSED); - replicationManager.processAll(); - //waiting for inflightDeletion generation - eventQueue.processAll(1000); - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_INFLIGHT_DELETION); - resetReplicationManager(); - - //make the replica num be 2 to test the case - //that container is in inflightReplication - containerStateManager.removeContainerReplica(id, dn5); - containerStateManager.removeContainerReplica(id, dn4); - //replication manager should generate inflightReplication - replicationManager.processAll(); - //waiting for inflightReplication generation - eventQueue.processAll(1000); - cf = replicationManager.move(id, dn1.getDatanodeDetails(), dn3); - Assertions.assertTrue(cf.isDone() && cf.get() == - MoveResult.REPLICATION_FAIL_INFLIGHT_REPLICATION); - } + private void assertAnyDeleteTargets(DatanodeDetails... validDeleteDNs) { + List deleteCommands = datanodeCommandHandler + .getReceivedCommands().stream() + .filter(c -> c.getCommand().getType() == + SCMCommandProto.Type.deleteContainerCommand) + .collect(Collectors.toList()); - @Test - public void testReplicateCommandTimeout() - throws IOException, TimeoutException { - long timeout = new ReplicationManagerConfiguration().getEventTimeout(); - - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - assertReplicaScheduled(1); - - // Already a pending replica, so nothing scheduled - assertReplicaScheduled(0); - - // Advance the clock past the timeout, and there should be a replica - // scheduled - clock.fastForward(timeout + 1000); - assertReplicaScheduled(1); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getNumReplicationCmdsTimeout()); - } + Set deleteCandidateIDs = Arrays.stream(validDeleteDNs) + .map(DatanodeDetails::getUuid) + .collect(Collectors.toSet()); + Set chosenDNIDs = deleteCommands.stream() + .map(CommandForDatanode::getDatanodeId) + .collect(Collectors.toSet()); - @Test - public void testDeleteCommandTimeout() - throws IOException, TimeoutException { - long timeout = new ReplicationManagerConfiguration().getEventTimeout(); - - final ContainerInfo container = createContainer(LifeCycleState.CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - assertDeleteScheduled(1); - - // Already a pending replica, so nothing scheduled - assertReplicaScheduled(0); - - // Advance the clock past the timeout, and there should be a replica - // scheduled - clock.fastForward(timeout + 1000); - assertDeleteScheduled(1); - Assertions.assertEquals(1, replicationManager.getMetrics() - .getNumDeletionCmdsTimeout()); + Assertions.assertTrue(deleteCandidateIDs.containsAll(chosenDNIDs)); } /** - * A closed empty container with all the replicas also closed and empty - * should be deleted. - * A container/ replica should be deemed empty when it has 0 keyCount even - * if the usedBytes is not 0 (usedBytes should not be used to determine if - * the container or replica is empty). + * Checks if the set of nodes with deletions scheduled contains all of the + * provided DNs. */ - @Test - public void testDeleteEmptyContainer() throws Exception { - runTestDeleteEmptyContainer(3); - } - - Void runTestDeleteEmptyContainer(int expectedDelete) throws Exception { - // Create container with usedBytes = 1000 and keyCount = 0 - final ContainerInfo container = createContainer(LifeCycleState.CLOSED, 1000, - 0); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - // Create a replica with usedBytes != 0 and keyCount = 0 - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED, 100, 0); - - assertDeleteScheduled(expectedDelete); - return null; - } + private void assertDeleteTargetsContain(DatanodeDetails... deleteDN) { + List deleteCommands = datanodeCommandHandler + .getReceivedCommands().stream() + .filter(c -> c.getCommand().getType() == + SCMCommandProto.Type.deleteContainerCommand) + .collect(Collectors.toList()); - @Test - public void testDeletionLimit() throws Exception { - runTestLimit(0, 2, 0, 1, - () -> runTestDeleteEmptyContainer(2)); - } + Set deleteDNIDs = Arrays.stream(deleteDN) + .map(DatanodeDetails::getUuid) + .collect(Collectors.toSet()); + Set chosenDNIDs = deleteCommands.stream() + .map(CommandForDatanode::getDatanodeId) + .collect(Collectors.toSet()); - /** - * A closed empty container with a non-empty replica should not be deleted. - */ - @Test - public void testDeleteEmptyContainerNonEmptyReplica() throws Exception { - final ContainerInfo container = createContainer(LifeCycleState.CLOSED, 0, - 0); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED); - // Create the 3rd replica with non-zero key count and used bytes - addReplica(container, new NodeStatus(IN_SERVICE, HEALTHY), CLOSED, 100, 1); - assertDeleteScheduled(0); + Assertions.assertTrue(chosenDNIDs.containsAll(deleteDNIDs)); } private ContainerInfo createContainer(LifeCycleState containerState) @@ -2052,6 +2464,12 @@ private ContainerReplica addReplica(ContainerInfo container, private ContainerReplica addReplicaToDn(ContainerInfo container, DatanodeDetails dn, State replicaState) throws ContainerNotFoundException { + return addReplicaToDn(container, dn, replicaState, 1000L); + } + + private ContainerReplica addReplicaToDn(ContainerInfo container, + DatanodeDetails dn, State replicaState, long bcsId) + throws ContainerNotFoundException { // Using the same originID for all replica in the container set. If each // replica has a unique originID, it causes problems in ReplicationManager // when processing over-replicated containers. @@ -2059,7 +2477,7 @@ private ContainerReplica addReplicaToDn(ContainerInfo container, UUID.nameUUIDFromBytes(Longs.toByteArray(container.getContainerID())); final ContainerReplica replica = getReplicas(container.containerID(), replicaState, container.getUsedBytes(), container.getNumberOfKeys(), - 1000L, originNodeId, dn); + bcsId, originNodeId, dn); containerStateManager .updateContainerReplica(container.containerID(), replica); return replica; @@ -2124,16 +2542,6 @@ private void assertOverReplicatedCount(int count) { ReplicationManagerReport.HealthState.OVER_REPLICATED)); } - @AfterEach - public void teardown() throws Exception { - containerStateManager.close(); - replicationManager.stop(); - if (dbStore != null) { - dbStore.close(); - } - FileUtils.deleteDirectory(testDir); - } - private static class DatanodeCommandHandler implements EventHandler { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestMisReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestMisReplicationHandler.java new file mode 100644 index 000000000000..205967041746 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestMisReplicationHandler.java @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.MockNodeManager; +import org.apache.hadoop.hdds.scm.net.NodeSchema; +import org.apache.hadoop.hdds.scm.net.NodeSchemaManager; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.ozone.container.common.SCMTestUtils; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.junit.jupiter.api.Assertions; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type.replicateContainerCommand; +import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; +import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA; +import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.eq; + +/** + * Tests the MisReplicationHandling functionalities to test implementations. + */ +public abstract class TestMisReplicationHandler { + + private ContainerInfo container; + private NodeManager nodeManager; + private OzoneConfiguration conf; + + protected void setup(ReplicationConfig repConfig) { + nodeManager = new MockNodeManager(true, 10) { + @Override + public NodeStatus getNodeStatus(DatanodeDetails dd) { + return new NodeStatus( + dd.getPersistedOpState(), HddsProtos.NodeState.HEALTHY, 0); + } + + }; + conf = SCMTestUtils.getConf(); + container = ReplicationTestUtil + .createContainer(HddsProtos.LifeCycleState.CLOSED, repConfig); + NodeSchema[] schemas = + new NodeSchema[] {ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA}; + NodeSchemaManager.getInstance().init(schemas, true); + } + + protected abstract MisReplicationHandler getMisreplicationHandler( + PlacementPolicy placementPolicy, OzoneConfiguration configuration, + NodeManager nm); + protected void testMisReplication(Set availableReplicas, + List pendingOp, + int maintenanceCnt, int misreplicationCount, + int expectedNumberOfNodes) + throws IOException { + PlacementPolicy placementPolicy = Mockito.mock(PlacementPolicy.class); + ContainerPlacementStatus mockedContainerPlacementStatus = + Mockito.mock(ContainerPlacementStatus.class); + Mockito.when(mockedContainerPlacementStatus.isPolicySatisfied()) + .thenReturn(false); + Mockito.when(placementPolicy.validateContainerPlacement(anyList(), + anyInt())).thenReturn(mockedContainerPlacementStatus); + testMisReplication(availableReplicas, placementPolicy, pendingOp, + maintenanceCnt, misreplicationCount, expectedNumberOfNodes); + } + + protected void testMisReplication(Set availableReplicas, + PlacementPolicy mockedPlacementPolicy, + List pendingOp, + int maintenanceCnt, int misreplicationCount, + int expectedNumberOfNodes) + throws IOException { + MisReplicationHandler misReplicationHandler = + getMisreplicationHandler(mockedPlacementPolicy, conf, nodeManager); + + ContainerHealthResult.MisReplicatedHealthResult result = + Mockito.mock(ContainerHealthResult.MisReplicatedHealthResult.class); + Mockito.when(result.isReplicatedOkAfterPending()).thenReturn(false); + Mockito.when(result.getContainerInfo()).thenReturn(container); + Map sources = availableReplicas.stream() + .collect(Collectors.toMap(Function.identity(), + r -> { + if (r.getDatanodeDetails().getPersistedOpState() + == IN_SERVICE) { + try { + return nodeManager.getNodeStatus( + r.getDatanodeDetails()).isHealthy(); + } catch (NodeNotFoundException e) { + throw new RuntimeException(e); + } + } + return false; + })); + + Set copy = sources.entrySet().stream() + .filter(Map.Entry::getValue).limit(misreplicationCount) + .map(Map.Entry::getKey).collect(Collectors.toSet()); + Mockito.when(mockedPlacementPolicy.replicasToCopyToFixMisreplication( + anyMap())).thenAnswer(invocation -> copy); + Set remainingReplicasAfterCopy = + availableReplicas.stream().filter(r -> !copy.contains(r)) + .map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toSet()); + List targetNodes = + IntStream.range(0, expectedNumberOfNodes) + .mapToObj(i -> MockDatanodeDetails.randomDatanodeDetails()) + .collect(Collectors.toList()); + if (expectedNumberOfNodes > 0) { + Mockito.when(mockedPlacementPolicy.chooseDatanodes( + Mockito.any(), Mockito.any(), Mockito.any(), + eq(copy.size()), Mockito.anyLong(), Mockito.anyLong())) + .thenAnswer(invocation -> { + List datanodeDetails = + invocation.getArgument(0); + Assertions.assertTrue(remainingReplicasAfterCopy + .containsAll(datanodeDetails)); + return targetNodes; + }); + } + Map copyReplicaIdxMap = copy.stream() + .collect(Collectors.toMap(ContainerReplica::getDatanodeDetails, + ContainerReplica::getReplicaIndex)); + Map> datanodeDetailsSCMCommandMap = + misReplicationHandler.processAndCreateCommands(availableReplicas, + pendingOp, result, maintenanceCnt); + Assertions.assertEquals(expectedNumberOfNodes, + datanodeDetailsSCMCommandMap.size()); + Assertions.assertTrue(datanodeDetailsSCMCommandMap.keySet() + .containsAll(targetNodes)); + for (SCMCommand command : datanodeDetailsSCMCommandMap.values()) { + Assertions.assertTrue(command.getType() == replicateContainerCommand); + ReplicateContainerCommand replicateContainerCommand = + (ReplicateContainerCommand) command; + Assertions.assertEquals(replicateContainerCommand.getContainerID(), + container.getContainerID()); + DatanodeDetails replicateSrcDn = + replicateContainerCommand.getSourceDatanodes().stream() + .findFirst().get(); + Assertions.assertTrue(copyReplicaIdxMap.containsKey(replicateSrcDn)); + Assertions.assertEquals(copyReplicaIdxMap.get(replicateSrcDn), + replicateContainerCommand.getReplicaIndex()); + } + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisMisReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisMisReplicationHandler.java new file mode 100644 index 000000000000..f960974256bd --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisMisReplicationHandler.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyList; + +/** + * Tests the RatisReplicationHandling functionality. + */ +public class TestRatisMisReplicationHandler extends TestMisReplicationHandler { + + @BeforeEach + public void setup() { + RatisReplicationConfig repConfig = RatisReplicationConfig + .getInstance(ReplicationFactor.THREE); + setup(repConfig); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3, 4, 5, 6, 7}) + public void testMisReplicationWithAllNodesAvailable(int misreplicationCount) + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0), + Pair.of(IN_SERVICE, 0)); + testMisReplication(availableReplicas, Collections.emptyList(), + 0, misreplicationCount, Math.min(misreplicationCount, 3)); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3, 4, 5, 6, 7}) + public void testMisReplicationWithAllNodesAvailableQuasiClosed( + int misreplicationCount) throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(State.QUASI_CLOSED, Pair.of(IN_SERVICE, 0), + Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0)); + testMisReplication(availableReplicas, Collections.emptyList(), + 0, misreplicationCount, Math.min(misreplicationCount, 3)); + } + + @Test + public void testMisReplicationWithNoNodesReturned() throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0), + Pair.of(IN_SERVICE, 0)); + PlacementPolicy placementPolicy = Mockito.mock(PlacementPolicy.class); + ContainerPlacementStatus mockedContainerPlacementStatus = + Mockito.mock(ContainerPlacementStatus.class); + Mockito.when(mockedContainerPlacementStatus.isPolicySatisfied()) + .thenReturn(false); + Mockito.when(placementPolicy.validateContainerPlacement(anyList(), + anyInt())).thenReturn(mockedContainerPlacementStatus); + Mockito.when(placementPolicy.chooseDatanodes( + Mockito.any(), Mockito.any(), Mockito.any(), + Mockito.anyInt(), Mockito.anyLong(), Mockito.anyLong())) + .thenThrow(new IOException("No nodes found")); + Assertions.assertThrows(SCMException.class, () -> testMisReplication( + availableReplicas, placementPolicy, Collections.emptyList(), + 0, 2, 0)); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3, 4, 5, 6, 7}) + public void testMisReplicationWithSomeNodesNotInService( + int misreplicationCount) throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0), + Pair.of(IN_MAINTENANCE, 0)); + testMisReplication(availableReplicas, Collections.emptyList(), + 0, misreplicationCount, Math.min(misreplicationCount, 2)); + } + + @Test + public void testMisReplicationWithUndereplication() throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 0), + Pair.of(IN_SERVICE, 0)); + testMisReplication(availableReplicas, Collections.emptyList(), 0, 1, 0); + } + + @Test + public void testMisReplicationWithOvereplication() throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0), + Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0)); + testMisReplication(availableReplicas, Collections.emptyList(), 0, 1, 0); + } + + @Test + public void testMisReplicationWithSatisfiedPlacementPolicy() + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0), + Pair.of(IN_SERVICE, 0)); + PlacementPolicy placementPolicy = Mockito.mock(PlacementPolicy.class); + ContainerPlacementStatus mockedContainerPlacementStatus = + Mockito.mock(ContainerPlacementStatus.class); + Mockito.when(mockedContainerPlacementStatus.isPolicySatisfied()) + .thenReturn(true); + Mockito.when(placementPolicy.validateContainerPlacement(anyList(), + anyInt())).thenReturn(mockedContainerPlacementStatus); + testMisReplication(availableReplicas, placementPolicy, + Collections.emptyList(), 0, 1, 0); + } + + @Test + public void testMisReplicationWithPendingOps() + throws IOException { + Set availableReplicas = ReplicationTestUtil + .createReplicas(Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0), + Pair.of(IN_SERVICE, 0)); + PlacementPolicy placementPolicy = Mockito.mock(PlacementPolicy.class); + ContainerPlacementStatus mockedContainerPlacementStatus = + Mockito.mock(ContainerPlacementStatus.class); + Mockito.when(mockedContainerPlacementStatus.isPolicySatisfied()) + .thenReturn(true); + Mockito.when(placementPolicy.validateContainerPlacement(anyList(), + anyInt())).thenReturn(mockedContainerPlacementStatus); + List pendingOp = Collections.singletonList( + ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.ADD, + MockDatanodeDetails.randomDatanodeDetails(), 0)); + testMisReplication(availableReplicas, placementPolicy, + pendingOp, 0, 1, 0); + pendingOp = Collections.singletonList(ContainerReplicaOp + .create(ContainerReplicaOp.PendingOpType.DELETE, availableReplicas + .stream().findAny().get().getDatanodeDetails(), 0)); + testMisReplication(availableReplicas, placementPolicy, + pendingOp, 0, 1, 0); + } + + @Override + protected MisReplicationHandler getMisreplicationHandler( + PlacementPolicy placementPolicy, OzoneConfiguration conf, + NodeManager nodeManager) { + return new RatisMisReplicationHandler(placementPolicy, conf, nodeManager); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java new file mode 100644 index 000000000000..46e844d9a70e --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java @@ -0,0 +1,281 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import com.google.common.collect.ImmutableList; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.slf4j.event.Level; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainer; +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica; +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas; +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicasWithSameOrigin; + +/** + * Tests for {@link RatisOverReplicationHandler}. + */ +public class TestRatisOverReplicationHandler { + private ContainerInfo container; + private static final RatisReplicationConfig RATIS_REPLICATION_CONFIG = + RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE); + private PlacementPolicy policy; + + @Before + public void setup() throws NodeNotFoundException { + container = createContainer(HddsProtos.LifeCycleState.CLOSED, + RATIS_REPLICATION_CONFIG); + + policy = Mockito.mock(PlacementPolicy.class); + Mockito.when(policy.validateContainerPlacement( + Mockito.anyList(), Mockito.anyInt())) + .thenReturn(new ContainerPlacementStatusDefault(2, 2, 3)); + + GenericTestUtils.setLogLevel(RatisOverReplicationHandler.LOG, Level.DEBUG); + } + + /** + * Handler should create one delete command when a closed ratis container + * has 5 replicas and 1 pending delete. + */ + @Test + public void testOverReplicatedClosedContainer() throws IOException { + Set replicas = createReplicas(container.containerID(), + ContainerReplicaProto.State.CLOSED, 0, 0, 0, 0, 0); + List pendingOps = ImmutableList.of( + ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.DELETE, + MockDatanodeDetails.randomDatanodeDetails(), 0)); + + // 1 replica is already pending delete, so only 1 new command should be + // created + testProcessing(replicas, pendingOps, getOverReplicatedHealthResult(), + 1); + } + + /** + * The container is quasi closed. All 4 replicas are quasi closed and + * originate from the same datanode. This container is over replicated. + * Handler should preserve 1 replica and any 1 of the other 3 replicas can + * be deleted. + */ + @Test + public void testOverReplicatedQuasiClosedContainerWithSameOrigin() + throws IOException { + container = createContainer(HddsProtos.LifeCycleState.QUASI_CLOSED, + RATIS_REPLICATION_CONFIG); + Set replicas = + createReplicasWithSameOrigin(container.containerID(), + ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0, 0); + + testProcessing(replicas, Collections.emptyList(), + getOverReplicatedHealthResult(), 1); + } + + /** + * The container is quasi closed. All replicas are quasi closed but + * originate from different datanodes. While this container is over + * replicated, handler should not create a delete command for any replica. It + * tries to preserve one replica per unique origin datanode. + */ + @Test + public void testOverReplicatedQuasiClosedContainerWithDifferentOrigins() + throws IOException { + container = createContainer(HddsProtos.LifeCycleState.QUASI_CLOSED, + RATIS_REPLICATION_CONFIG); + Set replicas = createReplicas(container.containerID(), + ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0, 0, 0); + + testProcessing(replicas, Collections.emptyList(), + getOverReplicatedHealthResult(), 0); + } + + /** + * When a quasi closed container is over replicated, the handler should + * prioritize creating delete commands for unhealthy replicas over quasi + * closed replicas. + */ + @Test + public void testOverReplicatedQuasiClosedContainerWithUnhealthyReplica() + throws IOException { + container = createContainer(HddsProtos.LifeCycleState.QUASI_CLOSED, + RATIS_REPLICATION_CONFIG); + Set replicas = + createReplicasWithSameOrigin(container.containerID(), + ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0); + ContainerReplica unhealthyReplica = + createContainerReplica(container.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.UNHEALTHY); + replicas.add(unhealthyReplica); + + Map> commands = testProcessing(replicas, + Collections.emptyList(), getOverReplicatedHealthResult(), 1); + Assert.assertTrue( + commands.containsKey(unhealthyReplica.getDatanodeDetails())); + } + + /** + * Handler should not create any delete commands if removing a replica + * makes the container mis replicated. + */ + @Test + public void testOverReplicatedContainerBecomesMisReplicatedOnRemoving() + throws IOException { + Set replicas = createReplicas(container.containerID(), + ContainerReplicaProto.State.CLOSED, 0, 0, 0, 0, 0); + + // Ensure a mis-replicated status is returned when 4 or fewer replicas are + // checked. + Mockito.when(policy.validateContainerPlacement( + Mockito.argThat(list -> list.size() <= 4), Mockito.anyInt())) + .thenReturn(new ContainerPlacementStatusDefault(1, 2, 3)); + + testProcessing(replicas, Collections.emptyList(), + getOverReplicatedHealthResult(), 0); + } + + /** + * Closed container with 4 closed replicas and 1 quasi closed replica. This + * container is over replicated and the handler should create a delete + * command for the quasi closed replica even if it violates the placement + * policy. Once the quasi closed container is removed and we have 4 + * replicas, then the mocked placement policy considers the container mis + * replicated. As long as the rack count does not change, another replica + * can be removed. + */ + @Test + public void testOverReplicatedClosedContainerWithQuasiClosedReplica() + throws IOException { + Set replicas = createReplicas(container.containerID(), + ContainerReplicaProto.State.CLOSED, 0, 0, 0, 0); + ContainerReplica quasiClosedReplica = + createContainerReplica(container.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.QUASI_CLOSED); + replicas.add(quasiClosedReplica); + + // Ensure a mis-replicated status is returned when 4 or fewer replicas are + // checked. + Mockito.when(policy.validateContainerPlacement( + Mockito.argThat(list -> list.size() <= 4), Mockito.anyInt())) + .thenReturn(new ContainerPlacementStatusDefault(1, 2, 3)); + + Map> commands = testProcessing(replicas, + Collections.emptyList(), getOverReplicatedHealthResult(), 2); + Assert.assertTrue( + commands.containsKey(quasiClosedReplica.getDatanodeDetails())); + } + + @Test + public void testOverReplicatedWithDecomAndMaintenanceReplicas() + throws IOException { + Set replicas = createReplicas(container.containerID(), + ContainerReplicaProto.State.CLOSED, 0, 0, 0, 0); + ContainerReplica decommissioningReplica = + createContainerReplica(container.containerID(), 0, + HddsProtos.NodeOperationalState.DECOMMISSIONING, + ContainerReplicaProto.State.CLOSED); + ContainerReplica maintenanceReplica = + createContainerReplica(container.containerID(), 0, + HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE, + ContainerReplicaProto.State.CLOSED); + replicas.add(decommissioningReplica); + replicas.add(maintenanceReplica); + + Map> commands = testProcessing(replicas, + Collections.emptyList(), getOverReplicatedHealthResult(), 1); + Assert.assertFalse( + commands.containsKey(decommissioningReplica.getDatanodeDetails())); + Assert.assertFalse( + commands.containsKey(maintenanceReplica.getDatanodeDetails())); + } + + @Test + public void testPerfectlyReplicatedContainer() throws IOException { + Set replicas = createReplicas(container.containerID(), + ContainerReplicaProto.State.CLOSED, 0, 0, 0); + + testProcessing(replicas, Collections.emptyList(), + getOverReplicatedHealthResult(), 0); + + // now test 4 replicas and 1 pending delete + replicas = createReplicas(container.containerID(), + ContainerReplicaProto.State.CLOSED, 0, 0, 0, 0); + List pendingOps = ImmutableList.of( + ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.DELETE, + MockDatanodeDetails.randomDatanodeDetails(), 0)); + + testProcessing(replicas, pendingOps, getOverReplicatedHealthResult(), 0); + } + + /** + * Tests whether the specified expectNumCommands number of commands are + * created by the handler. + * + * @param replicas All replicas of the container + * @param pendingOps Collection of pending ops + * @param healthResult ContainerHealthResult that should be passed to the + * handler + * @param expectNumCommands number of commands expected to be created by + * the handler + * @return map of commands + */ + private Map> testProcessing( + Set replicas, List pendingOps, + ContainerHealthResult healthResult, + int expectNumCommands) throws IOException { + RatisOverReplicationHandler handler = + new RatisOverReplicationHandler(policy); + + Map> commands = + handler.processAndCreateCommands(replicas, pendingOps, + healthResult, 2); + Assert.assertEquals(expectNumCommands, commands.size()); + + return commands; + } + + private ContainerHealthResult.OverReplicatedHealthResult + getOverReplicatedHealthResult() { + ContainerHealthResult.OverReplicatedHealthResult healthResult = + Mockito.mock(ContainerHealthResult.OverReplicatedHealthResult.class); + Mockito.when(healthResult.getContainerInfo()).thenReturn(container); + return healthResult; + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java new file mode 100644 index 000000000000..b2ccba14554b --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import com.google.common.collect.ImmutableList; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.UnderReplicatedHealthResult; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.ozone.container.common.SCMTestUtils; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas; + +/** + * Tests for {@link RatisUnderReplicationHandler}. + */ +public class TestRatisUnderReplicationHandler { + private ContainerInfo container; + private NodeManager nodeManager; + private OzoneConfiguration conf; + private static final RatisReplicationConfig RATIS_REPLICATION_CONFIG = + RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE); + private PlacementPolicy policy; + + @Before + public void setup() throws NodeNotFoundException { + container = ReplicationTestUtil.createContainer( + HddsProtos.LifeCycleState.CLOSED, RATIS_REPLICATION_CONFIG); + + nodeManager = Mockito.mock(NodeManager.class); + conf = SCMTestUtils.getConf(); + policy = ReplicationTestUtil + .getSimpleTestPlacementPolicy(nodeManager, conf); + + /* + Return NodeStatus with NodeOperationalState as specified in + DatanodeDetails, and NodeState as HEALTHY. + */ + Mockito.when(nodeManager.getNodeStatus(Mockito.any(DatanodeDetails.class))) + .thenAnswer(invocationOnMock -> { + DatanodeDetails dn = invocationOnMock.getArgument(0); + return new NodeStatus(dn.getPersistedOpState(), + HddsProtos.NodeState.HEALTHY); + }); + } + + /** + * When the container is under replicated even though there's a pending + * add, the handler should create replication commands. + */ + @Test + public void testUnderReplicatedWithMissingReplicasAndPendingAdd() + throws IOException { + Set replicas + = createReplicas(container.containerID(), State.CLOSED, 0); + List pendingOps = ImmutableList.of( + ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.ADD, + MockDatanodeDetails.randomDatanodeDetails(), 0)); + + testProcessing(replicas, pendingOps, getUnderReplicatedHealthResult(), 2, + 1); + } + + /** + * When the container is under replicated and unrecoverable (no replicas + * exist), the handler will not create any commands. + */ + @Test + public void testUnderReplicatedAndUnrecoverable() throws IOException { + testProcessing(Collections.emptySet(), Collections.emptyList(), + getUnderReplicatedHealthResult(), 2, 0); + } + + /** + * The container is currently under replicated, but there's a pending add + * that will make it sufficiently replicated. The handler should not create + * any commands. + */ + @Test + public void testUnderReplicatedFixedByPendingAdd() throws IOException { + Set replicas + = createReplicas(container.containerID(), State.CLOSED, 0, 0); + List pendingOps = ImmutableList.of( + ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.ADD, + MockDatanodeDetails.randomDatanodeDetails(), 0)); + + testProcessing(replicas, pendingOps, getUnderReplicatedHealthResult(), 2, + 0); + } + + /** + * The container is under-replicated because a DN is decommissioning. The + * handler should create replication command. + */ + @Test + public void testUnderReplicatedBecauseOfDecommissioningReplica() + throws IOException { + Set replicas = ReplicationTestUtil + .createReplicas(Pair.of(DECOMMISSIONING, 0), Pair.of(IN_SERVICE, 0), + Pair.of(IN_SERVICE, 0)); + + testProcessing(replicas, Collections.emptyList(), + getUnderReplicatedHealthResult(), 2, 1); + } + + /** + * The container is under-replicated because a DN is entering maintenance + * and the remaining number of replicas (CLOSED or QUASI_CLOSED replicas on + * HEALTHY datanodes) are less than the minimum healthy required. + */ + @Test + public void testUnderReplicatedBecauseOfMaintenanceReplica() + throws IOException { + Set replicas = ReplicationTestUtil + .createReplicas(Pair.of(ENTERING_MAINTENANCE, 0), + Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0)); + + testProcessing(replicas, Collections.emptyList(), + getUnderReplicatedHealthResult(), 3, 1); + } + + /** + * The container is sufficiently replicated because we have the minimum + * healthy replicas required for a DN to enter maintenance. + */ + @Test + public void testSufficientlyReplicatedDespiteMaintenanceReplica() + throws IOException { + Set replicas = ReplicationTestUtil + .createReplicas(Pair.of(ENTERING_MAINTENANCE, 0), + Pair.of(IN_SERVICE, 0), Pair.of(IN_SERVICE, 0)); + + testProcessing(replicas, Collections.emptyList(), + getUnderReplicatedHealthResult(), 2, 0); + } + + /** + * The handler should throw an exception when the placement policy is unable + * to choose new targets for replication. + */ + @Test + public void testNoTargetsFoundBecauseOfPlacementPolicy() { + policy = ReplicationTestUtil.getNoNodesTestPlacementPolicy(nodeManager, + conf); + RatisUnderReplicationHandler handler = + new RatisUnderReplicationHandler(policy, conf, nodeManager); + + Set replicas + = createReplicas(container.containerID(), State.CLOSED, 0, 0); + + Assert.assertThrows(IOException.class, + () -> handler.processAndCreateCommands(replicas, + Collections.emptyList(), getUnderReplicatedHealthResult(), 2)); + } + + /** + * Tests whether the specified expectNumCommands number of commands are + * created by the handler. + * @param replicas All replicas of the container + * @param pendingOps Collection of pending ops + * @param healthResult ContainerHealthResult that should be passed to the + * handler + * @param minHealthyForMaintenance the minimum number of healthy replicas + * required for a datanode to enter + * maintenance + * @param expectNumCommands number of commands expected to be created by + * the handler + */ + private void testProcessing( + Set replicas, List pendingOps, + ContainerHealthResult healthResult, + int minHealthyForMaintenance, int expectNumCommands) throws IOException { + RatisUnderReplicationHandler handler = + new RatisUnderReplicationHandler(policy, conf, nodeManager); + + Map> commands = + handler.processAndCreateCommands(replicas, pendingOps, + healthResult, minHealthyForMaintenance); + Assert.assertEquals(expectNumCommands, commands.size()); + } + + private UnderReplicatedHealthResult getUnderReplicatedHealthResult() { + UnderReplicatedHealthResult healthResult = + Mockito.mock(UnderReplicatedHealthResult.class); + Mockito.when(healthResult.getContainerInfo()).thenReturn(container); + return healthResult; + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java index 82f971be799e..dd4a9e385ccb 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java @@ -62,9 +62,11 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; import static org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.ADD; import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo; +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica; import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; @@ -245,6 +247,94 @@ public void testUnderReplicatedAndUnrecoverable() ReplicationManagerReport.HealthState.MISSING)); } + /** + * A closed EC container with 3 closed and 2 unhealthy replicas is under + * replicated. RM should add it to under replicated queue. + */ + @Test + public void testUnderReplicatedClosedContainerWithUnhealthyReplicas() + throws ContainerNotFoundException { + ContainerInfo container = createContainerInfo(repConfig, 1, + HddsProtos.LifeCycleState.CLOSED); + Set replicas = addReplicas(container, + ContainerReplicaProto.State.CLOSED, 1, 2, 3); + ContainerReplica unhealthyReplica1 = + createContainerReplica(container.containerID(), 4, + IN_SERVICE, ContainerReplicaProto.State.UNHEALTHY); + ContainerReplica unhealthyReplica2 = + createContainerReplica(container.containerID(), 5, + IN_SERVICE, ContainerReplicaProto.State.UNHEALTHY); + replicas.add(unhealthyReplica1); + replicas.add(unhealthyReplica2); + + replicationManager.processContainer( + container, repQueue, repReport); + + Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); + Assert.assertEquals(1, repReport.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + } + + /** + * A closed EC container with 2 closed and 3 unhealthy replicas is + * unrecoverable. It should not be queued to under replicated queue but + * should be recorded as missing (currently, we're calling an unrecoverable + * EC container missing). + */ + @Test + public void testUnrecoverableClosedContainerWithUnhealthyReplicas() + throws ContainerNotFoundException { + ContainerInfo container = createContainerInfo(repConfig, 1, + HddsProtos.LifeCycleState.CLOSED); + Set replicas = addReplicas(container, + ContainerReplicaProto.State.UNHEALTHY, 3, 4, 5); + ContainerReplica closedReplica1 = + createContainerReplica(container.containerID(), 1, + IN_SERVICE, ContainerReplicaProto.State.CLOSED); + ContainerReplica closedReplica2 = + createContainerReplica(container.containerID(), 2, + IN_SERVICE, ContainerReplicaProto.State.CLOSED); + replicas.add(closedReplica1); + replicas.add(closedReplica2); + + replicationManager.processContainer( + container, repQueue, repReport); + + Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); + Assert.assertEquals(1, repReport.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + Assert.assertEquals(1, repReport.getStat( + ReplicationManagerReport.HealthState.MISSING)); + } + + @Test + public void + testUnderReplicatedClosedContainerWithUnHealthyAndClosingReplicas() + throws ContainerNotFoundException { + ContainerInfo container = createContainerInfo(repConfig, 1, + HddsProtos.LifeCycleState.CLOSED); + Set replicas = addReplicas(container, + ContainerReplicaProto.State.CLOSED, 1, 2, 3); + ContainerReplica unhealthyReplica1 = + createContainerReplica(container.containerID(), 4, + IN_SERVICE, ContainerReplicaProto.State.UNHEALTHY); + ContainerReplica unhealthyReplica2 = + createContainerReplica(container.containerID(), 5, + IN_SERVICE, ContainerReplicaProto.State.CLOSING); + replicas.add(unhealthyReplica1); + replicas.add(unhealthyReplica2); + + replicationManager.processContainer( + container, repQueue, repReport); + + Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); + Assert.assertEquals(1, repReport.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + } + @Test public void testUnderAndOverReplicated() throws ContainerNotFoundException { @@ -300,6 +390,13 @@ public void testOverReplicatedFixByPending() @Test public void testUnderReplicationQueuePopulated() { + // Make it always return mis-replicated. Only a perfectly replicated + // container should make it the mis-replicated state as under / over + // replicated take precedence. + Mockito.when(ecPlacementPolicy.validateContainerPlacement( + anyList(), anyInt())) + .thenReturn(new ContainerPlacementStatusDefault(1, 2, 3)); + ContainerInfo decomContainer = createContainerInfo(repConfig, 1, HddsProtos.LifeCycleState.CLOSED); addReplicas(decomContainer, ContainerReplicaProto.State.CLOSED, @@ -314,6 +411,10 @@ public void testUnderReplicationQueuePopulated() { HddsProtos.LifeCycleState.CLOSED); addReplicas(underRep0, ContainerReplicaProto.State.CLOSED, 1, 2, 3); + ContainerInfo misRep = createContainerInfo(repConfig, 4, + HddsProtos.LifeCycleState.CLOSED); + addReplicas(misRep, ContainerReplicaProto.State.CLOSED, 1, 2, 3, 4, 5); + enableProcessAll(); replicationManager.processAll(); @@ -348,6 +449,10 @@ public void testUnderReplicationQueuePopulated() { res = replicationManager.dequeueUnderReplicatedContainer(); Assert.assertEquals(underRep0, res.getContainerInfo()); + // Next is the mis-rep container, which has a remaining redundancy of 6. + res = replicationManager.dequeueUnderReplicatedContainer(); + Assert.assertEquals(misRep, res.getContainerInfo()); + res = replicationManager.dequeueUnderReplicatedContainer(); Assert.assertNull(res); } @@ -477,6 +582,15 @@ public void testSendDatanodeReplicateCommand() throws NotLeaderException { replicationManager.sendDatanodeCommand(command, containerInfo, target); + // Ensure that the command deadline is set to current time + // + evenTime * factor + ReplicationManager.ReplicationManagerConfiguration rmConf = configuration + .getObject(ReplicationManager.ReplicationManagerConfiguration.class); + long expectedDeadline = clock.millis() + + Math.round(rmConf.getEventTimeout() * + rmConf.getCommandDeadlineFactor()); + Assert.assertEquals(expectedDeadline, command.getDeadline()); + List ops = containerReplicaPendingOps.getPendingOps( containerInfo.containerID()); Mockito.verify(eventPublisher).fireEvent(any(), any()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java index ff2faa0ebc1f..f2b3a72885df 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java @@ -99,7 +99,7 @@ public void testClosedHealthyContainerReturnsFalse() { } @Test - public void testClosedMissMatchContainerReturnsTrue() { + public void testCloseCommandSentForMismatchedReplicas() { ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( ecReplicationConfig, 1, CLOSED); Set containerReplicas = ReplicationTestUtil @@ -126,7 +126,10 @@ public void testClosedMissMatchContainerReturnsTrue() { .setContainerInfo(containerInfo) .setContainerReplicas(containerReplicas) .build(); - Assertions.assertTrue(handler.handle(request)); + + // this handler always returns false so other handlers can fix issues + // such as under replication + Assertions.assertFalse(handler.handle(request)); Mockito.verify(replicationManager, times(1)) .sendCloseContainerReplicaCommand( @@ -134,6 +137,7 @@ public void testClosedMissMatchContainerReturnsTrue() { Mockito.verify(replicationManager, times(1)) .sendCloseContainerReplicaCommand( containerInfo, mismatch2.getDatanodeDetails(), true); + // close command should not be sent for unhealthy replica Mockito.verify(replicationManager, times(0)) .sendCloseContainerReplicaCommand( containerInfo, mismatch3.getDatanodeDetails(), true); @@ -177,7 +181,7 @@ public void testClosedHealthyRatisContainerReturnsFalse() { } @Test - public void testClosedMissMatchRatisContainerReturnsTrue() { + public void testCloseCommandSentForMismatchedRatisReplicas() { ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( ratisReplicationConfig, 1, CLOSED); ContainerReplica mismatch1 = ReplicationTestUtil.createContainerReplica( @@ -202,7 +206,10 @@ public void testClosedMissMatchRatisContainerReturnsTrue() { .setContainerInfo(containerInfo) .setContainerReplicas(containerReplicas) .build(); - Assertions.assertTrue(handler.handle(request)); + + // this handler always returns false so other handlers can fix issues + // such as under replication + Assertions.assertFalse(handler.handle(request)); Mockito.verify(replicationManager, times(1)) .sendCloseContainerReplicaCommand( @@ -210,6 +217,7 @@ public void testClosedMissMatchRatisContainerReturnsTrue() { Mockito.verify(replicationManager, times(1)) .sendCloseContainerReplicaCommand( containerInfo, mismatch2.getDatanodeDetails(), true); + // close command should not be sent for unhealthy replica Mockito.verify(replicationManager, times(0)) .sendCloseContainerReplicaCommand( containerInfo, mismatch3.getDatanodeDetails(), true); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithUnhealthyReplicasHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithUnhealthyReplicasHandler.java index 1b638b6db963..bf6232329fd7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithUnhealthyReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithUnhealthyReplicasHandler.java @@ -143,7 +143,7 @@ public void testRatisContainerReturnsFalse() { ArgumentCaptor.forClass(Integer.class); Mockito.verify(replicationManager, Mockito.times(2)) .sendDeleteCommand(Mockito.eq(container), Mockito.anyInt(), Mockito.any( - DatanodeDetails.class)); + DatanodeDetails.class), Mockito.eq(true)); // replica index that delete was sent for should either be 2 or 5 replicaIndexCaptor.getAllValues() .forEach(index -> Assert.assertTrue(index == 2 || index == 5)); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestDeletingContainerHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestDeletingContainerHandler.java index e2a147ffd2f6..eeaf290c7b72 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestDeletingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestDeletingContainerHandler.java @@ -233,6 +233,6 @@ private void verifyDeleteCommandCount(ContainerInfo containerInfo, Mockito.verify(replicationManager, Mockito.times(times)) .sendDeleteCommand(Mockito.any(ContainerInfo.class), Mockito.anyInt(), - Mockito.any(DatanodeDetails.class)); + Mockito.any(DatanodeDetails.class), Mockito.eq(false)); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java index 06f68ee91cee..c3fe3664ed3e 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.ContainerInfo; @@ -48,8 +49,10 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.ADD; import static org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.DELETE; import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo; @@ -274,6 +277,88 @@ public void testUnderReplicatedAndUnrecoverable() { ReplicationManagerReport.HealthState.MISSING)); } + @Test + public void testUnderReplicatedAndUnrecoverableWithDecommission() { + testUnderReplicatedAndUnrecoverableWithOffline(DECOMMISSIONING); + } + + @Test + public void testUnderReplicatedAndUnrecoverableWithMaintenance() { + testUnderReplicatedAndUnrecoverableWithOffline(ENTERING_MAINTENANCE); + } + + private void testUnderReplicatedAndUnrecoverableWithOffline( + HddsProtos.NodeOperationalState offlineState) { + ContainerInfo container = createContainerInfo(repConfig); + Set replicas = createReplicas(container.containerID(), + Pair.of(IN_SERVICE, 1), Pair.of(offlineState, 2)); + ContainerCheckRequest request = requestBuilder + .setContainerReplicas(replicas) + .setContainerInfo(container) + .build(); + + UnderReplicatedHealthResult result = (UnderReplicatedHealthResult) + healthCheck.checkHealth(request); + Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState()); + Assert.assertEquals(-1, result.getRemainingRedundancy()); + Assert.assertFalse(result.isReplicatedOkAfterPending()); + Assert.assertFalse(result.underReplicatedDueToDecommission()); + Assert.assertTrue(result.isUnrecoverable()); + Assert.assertTrue(result.hasUnreplicatedOfflineIndexes()); + + Assert.assertTrue(healthCheck.handle(request)); + // Unrecoverable so not added to the queue + Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); + Assert.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + Assert.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.MISSING)); + } + + @Test + public void testUnderReplicatedAndUnrecoverableWithDecommissionPending() { + testUnderReplicatedAndUnrecoverableWithOffline(DECOMMISSIONING); + } + + @Test + public void testUnderReplicatedAndUnrecoverableWithMaintenancePending() { + testUnderReplicatedAndUnrecoverableWithOffline(ENTERING_MAINTENANCE); + } + + private void testUnderReplicatedAndUnrecoverableWithOfflinePending( + HddsProtos.NodeOperationalState offlineState) { + ContainerInfo container = createContainerInfo(repConfig); + Set replicas = createReplicas(container.containerID(), + Pair.of(IN_SERVICE, 1), Pair.of(offlineState, 2)); + List pending = new ArrayList<>(); + pending.add(ContainerReplicaOp.create( + ADD, MockDatanodeDetails.randomDatanodeDetails(), 2)); + ContainerCheckRequest request = requestBuilder + .setContainerReplicas(replicas) + .setContainerInfo(container) + .setPendingOps(pending) + .build(); + + UnderReplicatedHealthResult result = (UnderReplicatedHealthResult) + healthCheck.checkHealth(request); + Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState()); + Assert.assertEquals(-1, result.getRemainingRedundancy()); + Assert.assertFalse(result.isReplicatedOkAfterPending()); + Assert.assertFalse(result.underReplicatedDueToDecommission()); + Assert.assertTrue(result.isUnrecoverable()); + Assert.assertFalse(result.hasUnreplicatedOfflineIndexes()); + + Assert.assertTrue(healthCheck.handle(request)); + // Unrecoverable so not added to the queue + Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); + Assert.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + Assert.assertEquals(1, report.getStat( + ReplicationManagerReport.HealthState.MISSING)); + } + /** * Tests that a closed EC 3-2 container with 3 closed and 2 unhealthy * replicas is under replicated. @@ -481,9 +566,8 @@ public void testMisReplicatedContainer() { Assert.assertEquals(HealthState.MIS_REPLICATED, result.getHealthState()); Assert.assertTrue(healthCheck.handle(request)); - Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, repQueue.misReplicatedQueueSize()); Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -531,7 +615,6 @@ public void testMisReplicatedContainerFixedByPending() { Assert.assertTrue(healthCheck.handle(request)); Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -567,7 +650,6 @@ public void testUnderAndMisReplicatedContainer() { Assert.assertTrue(healthCheck.handle(request)); Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -604,7 +686,6 @@ public void testOverAndMisReplicatedContainer() { Assert.assertTrue(healthCheck.handle(request)); Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(1, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, report.getStat( @@ -613,4 +694,39 @@ public void testOverAndMisReplicatedContainer() { ReplicationManagerReport.HealthState.MIS_REPLICATED)); } + @Test + public void testUnhealthyReplicaWithOtherCopyAndPendingDelete() { + ContainerInfo container = createContainerInfo(repConfig); + Set replicas = createReplicas(container.containerID(), + Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2), + Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4), + Pair.of(IN_SERVICE, 5)); + + ContainerReplica unhealthyReplica = ReplicationTestUtil + .createContainerReplica(container.containerID(), 1, IN_SERVICE, + UNHEALTHY); + replicas.add(unhealthyReplica); + + List pendingOps = new ArrayList<>(); + pendingOps.add(ContainerReplicaOp.create(DELETE, + unhealthyReplica.getDatanodeDetails(), + unhealthyReplica.getReplicaIndex())); + + ContainerCheckRequest request = requestBuilder + .setContainerReplicas(replicas) + .setContainerInfo(container) + .setPendingOps(pendingOps) + .build(); + ContainerHealthResult result = healthCheck.checkHealth(request); + Assert.assertEquals(HealthState.HEALTHY, result.getHealthState()); + + Assert.assertFalse(healthCheck.handle(request)); + Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); + Assert.assertEquals(0, report.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + Assert.assertEquals(0, report.getStat( + ReplicationManagerReport.HealthState.OVER_REPLICATED)); + } + } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestEmptyContainerHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestEmptyContainerHandler.java index bac90b4c390e..9585a1a23015 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestEmptyContainerHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestEmptyContainerHandler.java @@ -215,7 +215,7 @@ private void assertAndVerify(ContainerCheckRequest request, Assertions.assertEquals(assertion, emptyContainerHandler.handle(request)); Mockito.verify(replicationManager, Mockito.times(times)) .sendDeleteCommand(Mockito.any(ContainerInfo.class), Mockito.anyInt(), - Mockito.any(DatanodeDetails.class)); + Mockito.any(DatanodeDetails.class), Mockito.eq(false)); Assertions.assertEquals(numEmptyExpected, request.getReport().getStat( ReplicationManagerReport.HealthState.EMPTY)); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java index f6493b7817c4..9cc2c6263d44 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java @@ -423,7 +423,6 @@ public void testUnderReplicatedWithMisReplication() { Assert.assertTrue(healthCheck.handle(requestBuilder.build())); Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -468,7 +467,6 @@ public void testUnderReplicatedWithMisReplicationFixedByPending() { Assert.assertTrue(healthCheck.handle(requestBuilder.build())); Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -494,9 +492,8 @@ public void testMisReplicated() { Assert.assertFalse(result.isReplicatedOkAfterPending()); Assert.assertTrue(healthCheck.handle(requestBuilder.build())); - Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, repQueue.misReplicatedQueueSize()); Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, report.getStat( diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorTestUtil.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorTestUtil.java index b5d4d1158d04..9b5c34859543 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorTestUtil.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorTestUtil.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdds.scm.node; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; @@ -26,12 +28,14 @@ import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaCount; +import org.apache.hadoop.hdds.scm.container.replication.ECContainerReplicaCount; import org.apache.hadoop.hdds.scm.container.replication.RatisContainerReplicaCount; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; import org.apache.hadoop.hdds.server.events.EventHandler; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.mockito.Mockito; +import java.util.Collections; import java.util.HashSet; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -53,20 +57,24 @@ private DatanodeAdminMonitorTestUtil() { * @param containerID The ID the replica is associated with * @param nodeState The persistedOpState stored in datanodeDetails. * @param replicaState The state of the generated replica. + * @param replicaIndex The replica Index for the replica. + * @param datanodeDetails The datanode the replica is hosted on. * @return A containerReplica with the given ID and state */ public static ContainerReplica generateReplica( ContainerID containerID, HddsProtos.NodeOperationalState nodeState, StorageContainerDatanodeProtocolProtos.ContainerReplicaProto - .State replicaState) { - DatanodeDetails dn = MockDatanodeDetails.randomDatanodeDetails(); - dn.setPersistedOpState(nodeState); + .State replicaState, + int replicaIndex, + DatanodeDetails datanodeDetails) { + datanodeDetails.setPersistedOpState(nodeState); return ContainerReplica.newBuilder() .setContainerState(replicaState) .setContainerID(containerID) .setSequenceId(1) - .setDatanodeDetails(dn) + .setDatanodeDetails(datanodeDetails) + .setReplicaIndex(replicaIndex) .build(); } @@ -86,7 +94,8 @@ public static ContainerReplicaCount generateReplicaCount( HddsProtos.NodeOperationalState...states) { Set replicas = new HashSet<>(); for (HddsProtos.NodeOperationalState s : states) { - replicas.add(generateReplica(containerID, s, CLOSED)); + replicas.add(generateReplica(containerID, s, CLOSED, 0, + MockDatanodeDetails.randomDatanodeDetails())); } ContainerInfo container = new ContainerInfo.Builder() .setContainerID(containerID.getId()) @@ -96,6 +105,39 @@ public static ContainerReplicaCount generateReplicaCount( return new RatisContainerReplicaCount(container, replicas, 0, 0, 3, 2); } + /** + * Create a ContainerReplicaCount object for an EC container, including a + * container with the requested ContainerID and state, along with a set of + * replicas of the given states. + * @param containerID The ID of the container to create an included + * @param repConfig The Replication Config for the container + * @param containerState The state of the container + * @param states Create a replica for each of the given states. + * @return A ContainerReplicaCount containing the generated container and + * replica set + */ + public static ContainerReplicaCount generateECReplicaCount( + ContainerID containerID, ECReplicationConfig repConfig, + HddsProtos.LifeCycleState containerState, + Triple...states) { + + Set replicas = new HashSet<>(); + for (Triple t + : states) { + replicas.add(generateReplica(containerID, t.getLeft(), CLOSED, + t.getRight(), t.getMiddle())); + } + ContainerInfo container = new ContainerInfo.Builder() + .setContainerID(containerID.getId()) + .setState(containerState) + .setReplicationConfig(repConfig) + .build(); + + return new ECContainerReplicaCount(container, replicas, + Collections.emptyList(), 1); + } + /** * The only interaction the DatanodeAdminMonitor has with the * ReplicationManager, is to request a ContainerReplicaCount object for each @@ -120,6 +162,32 @@ public static void mockGetContainerReplicaCount( containerState, replicaStates)); } + /** + * The only interaction the DatanodeAdminMonitor has with the + * ReplicationManager, is to request a ContainerReplicaCount object for each + * container on nodes being deocmmissioned or moved to maintenance. This + * method mocks that interface to return a ContainerReplicaCount with a + * container in the given containerState and a set of replias in the given + * replicaStates. + * @param containerState + * @param replicaStates + * @throws ContainerNotFoundException + */ + public static void mockGetContainerReplicaCountForEC( + ReplicationManager repManager, + HddsProtos.LifeCycleState containerState, + ECReplicationConfig repConfig, + Triple...replicaStates) + throws ContainerNotFoundException { + reset(repManager); + Mockito.when(repManager.getContainerReplicaCount( + Mockito.any(ContainerID.class))) + .thenAnswer(invocation -> + generateECReplicaCount((ContainerID)invocation.getArguments()[0], + repConfig, containerState, replicaStates)); + } + /** * This simple internal class is used to track and handle any DatanodeAdmin * events fired by the DatanodeAdminMonitor during tests. diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java index 97575c55829f..6ea0851b2b07 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; +import java.time.Clock; import java.time.ZoneId; import java.util.List; import java.util.UUID; @@ -59,7 +60,6 @@ import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.ozone.upgrade.LayoutVersionManager; import org.apache.hadoop.test.PathUtils; @@ -163,7 +163,7 @@ ContainerManager createContainerManager() scmhaManager, sequenceIdGen, pipelineManager, SCMDBDefinition.CONTAINERS.getTable(dbStore), new ContainerReplicaPendingOps( - conf, new MonotonicClock(ZoneId.systemDefault()))); + conf, Clock.system(ZoneId.systemDefault()))); } /** diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java index e2b4e13037c9..327ae26de24a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdds.scm.node; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; @@ -36,6 +38,7 @@ import java.util.HashSet; import java.util.Set; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; @@ -206,6 +209,63 @@ public void testDecommissionNodeWaitsForContainersToReplicate() nodeManager.getNodeStatus(dn1).getOperationalState()); } + @Test + public void testDecommissionNodeWithUnrecoverableECContainer() + throws NodeNotFoundException, ContainerNotFoundException { + DatanodeDetails dn1 = MockDatanodeDetails.randomDatanodeDetails(); + nodeManager.register(dn1, + new NodeStatus(HddsProtos.NodeOperationalState.DECOMMISSIONING, + HddsProtos.NodeState.HEALTHY)); + + nodeManager.setContainers(dn1, generateContainers(1)); + // Mock Replication Manager to return ContainerReplicaCount's which + // always have a DECOMMISSIONED replica. + DatanodeAdminMonitorTestUtil + .mockGetContainerReplicaCountForEC( + repManager, + HddsProtos.LifeCycleState.CLOSED, + new ECReplicationConfig(3, 2), + Triple.of(DECOMMISSIONING, dn1, 1), + Triple.of(IN_SERVICE, + MockDatanodeDetails.randomDatanodeDetails(), 2)); + + // Run the monitor for the first time and the node will transition to + // REPLICATE_CONTAINERS as there are no pipelines to close. + monitor.startMonitoring(dn1); + monitor.run(); + DatanodeDetails node = getFirstTrackedNode(); + assertEquals(1, monitor.getTrackedNodeCount()); + assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, + nodeManager.getNodeStatus(dn1).getOperationalState()); + + // Running the monitor again causes it to remain DECOMMISSIONING + // as nothing has changed. + monitor.run(); + assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, + nodeManager.getNodeStatus(dn1).getOperationalState()); + + // Now change the replicationManager mock another copy of the + // decommissioning replica on an IN_SERVICE node and the node should + // complete the REPLICATE_CONTAINERS step, moving to complete which will end + // the decommission workflow + DatanodeAdminMonitorTestUtil + .mockGetContainerReplicaCountForEC( + repManager, + HddsProtos.LifeCycleState.CLOSED, + new ECReplicationConfig(3, 2), + Triple.of(DECOMMISSIONING, dn1, 1), + Triple.of(IN_SERVICE, + MockDatanodeDetails.randomDatanodeDetails(), 2), + Triple.of(IN_SERVICE, + MockDatanodeDetails.randomDatanodeDetails(), 1)); + + monitor.run(); + + assertEquals(0, monitor.getTrackedNodeCount()); + assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED, + nodeManager.getNodeStatus(dn1).getOperationalState()); + } + @Test public void testDecommissionAbortedWhenNodeInUnexpectedState() throws NodeNotFoundException, ContainerNotFoundException { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java index 1ed9b845ac25..c75cb937e5b6 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java @@ -762,7 +762,7 @@ public void testSafeModeUpdatedOnSafemodeExit() throws Exception { } @Test - public void testAddContainerWithClosedPipeline() throws Exception { + public void testAddContainerWithClosedPipelineScmStart() throws Exception { GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer. captureLogs(LoggerFactory.getLogger(PipelineStateMap.class)); SCMHADBTransactionBuffer buffer = new SCMHADBTransactionBufferStub(dbStore); @@ -786,6 +786,30 @@ public void testAddContainerWithClosedPipeline() throws Exception { pipelineID + " in closed state")); } + @Test + public void testAddContainerWithClosedPipeline() throws Exception { + GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer. + captureLogs(LoggerFactory.getLogger(PipelineStateMap.class)); + SCMHADBTransactionBuffer buffer = new SCMHADBTransactionBufferStub(dbStore); + PipelineManagerImpl pipelineManager = + createPipelineManager(true, buffer); + Table pipelineStore = + SCMDBDefinition.PIPELINES.getTable(dbStore); + Pipeline pipeline = pipelineManager.createPipeline( + RatisReplicationConfig + .getInstance(HddsProtos.ReplicationFactor.THREE)); + PipelineID pipelineID = pipeline.getId(); + pipelineManager.addContainerToPipeline(pipelineID, ContainerID.valueOf(1)); + pipelineManager.getStateManager().updatePipelineState( + pipelineID.getProtobuf(), HddsProtos.PipelineState.PIPELINE_CLOSED); + buffer.flush(); + Assertions.assertTrue(pipelineStore.get(pipelineID).isClosed()); + pipelineManager.addContainerToPipeline(pipelineID, + ContainerID.valueOf(2)); + assertTrue(logCapturer.getOutput().contains( + "Adding container #2 to pipeline=" + pipelineID + " in CLOSED state.")); + } + @Test public void testPipelineCloseFlow() throws IOException, TimeoutException { GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementFactory.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementFactory.java new file mode 100644 index 000000000000..9d7bae48103c --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementFactory.java @@ -0,0 +1,215 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.pipeline; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.io.File; +import java.util.List; +import java.util.UUID; + +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; + +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.HddsTestUtils; +import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.MockNodeManager; +import org.apache.hadoop.hdds.scm.container.TestContainerManagerImpl; +import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRackScatter; +import org.apache.hadoop.hdds.scm.ha.SCMHAManager; +import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub; +import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition; +import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl; +import org.apache.hadoop.hdds.scm.net.NodeSchema; +import org.apache.hadoop.hdds.scm.net.NodeSchemaManager; +import org.apache.hadoop.hdds.scm.node.DatanodeInfo; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; +import org.apache.hadoop.ozone.container.upgrade.UpgradeUtils; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY; +import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; +import static org.apache.hadoop.hdds.scm.net.NetConstants.RACK_SCHEMA; +import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT_SCHEMA; +import static org.mockito.Mockito.when; + +/** + * Test for PipelinePlacementFactory. + */ +public class TestPipelinePlacementFactory { + private OzoneConfiguration conf; + private NodeManager nodeManager; + private NodeManager nodeManagerBase; + private PipelineStateManager stateManager; + private NetworkTopologyImpl cluster; + private final List datanodes = new ArrayList<>(); + private final List dnInfos = new ArrayList<>(); + private File testDir; + private DBStore dbStore; + private SCMHAManager scmhaManager; + + private static final long STORAGE_CAPACITY = 100L; + + @BeforeEach + public void setup() { + //initialize ozone config for tests + conf = new OzoneConfiguration(); + } + + private void setupRacks(int datanodeCount, int nodesPerRack) + throws Exception { + conf.setStorageSize(OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN, + 1, StorageUnit.BYTES); + NodeSchema[] schemas = new NodeSchema[] + {ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA}; + NodeSchemaManager.getInstance().init(schemas, true); + cluster = new NetworkTopologyImpl(NodeSchemaManager.getInstance()); + + // build datanodes, and network topology + String rack = "/rack"; + String hostname = "node"; + for (int i = 0; i < datanodeCount; i++) { + DatanodeDetails datanodeDetails = + MockDatanodeDetails.createDatanodeDetails( + hostname + i, rack + (i / nodesPerRack)); + + datanodes.add(datanodeDetails); + cluster.add(datanodeDetails); + DatanodeInfo datanodeInfo = new DatanodeInfo( + datanodeDetails, NodeStatus.inServiceHealthy(), + UpgradeUtils.defaultLayoutVersionProto()); + + StorageContainerDatanodeProtocolProtos.StorageReportProto storage1 = + HddsTestUtils.createStorageReport( + datanodeInfo.getUuid(), "/data1-" + datanodeInfo.getUuidString(), + STORAGE_CAPACITY, 0, 100L, null); + StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto + metaStorage1 = + HddsTestUtils.createMetadataStorageReport( + "/metadata1-" + datanodeInfo.getUuidString(), + STORAGE_CAPACITY, 0, 100L, null); + datanodeInfo.updateStorageReports( + new ArrayList<>(Arrays.asList(storage1))); + datanodeInfo.updateMetaDataStorageReports( + new ArrayList<>(Arrays.asList(metaStorage1))); + dnInfos.add(datanodeInfo); + } + nodeManagerBase = new MockNodeManager(cluster, datanodes, + false, 10); + nodeManager = Mockito.spy(nodeManagerBase); + for (DatanodeInfo dn: dnInfos) { + when(nodeManager.getNodeByUuid(dn.getUuidString())) + .thenReturn(dn); + } + + testDir = GenericTestUtils.getTestDir( + TestContainerManagerImpl.class.getSimpleName() + UUID.randomUUID()); + conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); + dbStore = DBStoreBuilder.createDBStore( + conf, new SCMDBDefinition()); + scmhaManager = SCMHAManagerStub.getInstance(true); + + stateManager = PipelineStateManagerImpl.newBuilder() + .setPipelineStore(SCMDBDefinition.PIPELINES.getTable(dbStore)) + .setRatisServer(scmhaManager.getRatisServer()) + .setNodeManager(nodeManager) + .setSCMDBTransactionBuffer(scmhaManager.getDBTransactionBuffer()) + .build(); + } + + @Test + public void testDefaultPolicy() throws IOException { + PlacementPolicy policy = PipelinePlacementPolicyFactory + .getPolicy(null, null, conf); + Assertions.assertSame(PipelinePlacementPolicy.class, policy.getClass()); + } + + @Test + public void testRackScatterPolicy() throws Exception { + conf.set(OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY, + SCMContainerPlacementRackScatter.class.getCanonicalName()); + // for this test, rack setup does not matter, just + // need a non-null NetworkTopologyMap within the nodeManager + setupRacks(6, 3); + PlacementPolicy policy = PipelinePlacementPolicyFactory + .getPolicy(nodeManager, stateManager, conf); + Assertions.assertSame(SCMContainerPlacementRackScatter.class, + policy.getClass()); + } + + // test default rack aware pipeline provider placement - 3 racks + // pipeline created with 1 node on one rack and other 2 nodes + // on separate rack + @Test + public void testDefaultPipelineProviderRackPlacement() throws Exception { + setupRacks(6, 2); + PlacementPolicy policy = PipelinePlacementPolicyFactory + .getPolicy(nodeManager, stateManager, conf); + + int nodeNum = 3; + List datanodeDetails = + policy.chooseDatanodes(null, null, nodeNum, 15, 15); + Assertions.assertEquals(nodeNum, datanodeDetails.size()); + Assertions.assertTrue(cluster.isSameParent(datanodeDetails.get(0), + datanodeDetails.get(2))); + Assertions.assertFalse(cluster.isSameParent(datanodeDetails.get(0), + datanodeDetails.get(1))); + Assertions.assertFalse(cluster.isSameParent(datanodeDetails.get(1), + datanodeDetails.get(2))); + } + + // test rack scatter pipeline provider placement - 3 racks + // pipeline created with node on each rack + @Test + public void testRackScatterPipelineProviderRackPlacement() throws Exception { + conf.set(OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY, + SCMContainerPlacementRackScatter.class.getCanonicalName()); + + setupRacks(6, 2); + PlacementPolicy policy = PipelinePlacementPolicyFactory + .getPolicy(nodeManager, stateManager, conf); + + int nodeNum = 3; + List excludedNodes = new ArrayList<>(); + List favoredNodes = new ArrayList<>(); + List datanodeDetails = + policy.chooseDatanodes(excludedNodes, excludedNodes, favoredNodes, + nodeNum, 15, 15); + Assertions.assertEquals(nodeNum, datanodeDetails.size()); + Assertions.assertFalse(cluster.isSameParent(datanodeDetails.get(0), + datanodeDetails.get(2))); + Assertions.assertFalse(cluster.isSameParent(datanodeDetails.get(0), + datanodeDetails.get(1))); + Assertions.assertFalse(cluster.isSameParent(datanodeDetails.get(1), + datanodeDetails.get(2))); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java index 9d5cadeb2d38..82a2ab32465e 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelinePlacementPolicy.java @@ -692,7 +692,8 @@ public void testCurrentRatisThreePipelineCount() createPipelineWithReplicationConfig(standaloneOneDn, STAND_ALONE, ONE); pipelineCount - = placementPolicy.currentRatisThreePipelineCount(healthyNodes.get(0)); + = placementPolicy.currentRatisThreePipelineCount(nodeManager, + stateManager, healthyNodes.get(0)); Assertions.assertEquals(pipelineCount, 0); // Check datanode with one RATIS/ONE pipeline @@ -701,7 +702,8 @@ public void testCurrentRatisThreePipelineCount() createPipelineWithReplicationConfig(ratisOneDn, RATIS, ONE); pipelineCount - = placementPolicy.currentRatisThreePipelineCount(healthyNodes.get(1)); + = placementPolicy.currentRatisThreePipelineCount(nodeManager, + stateManager, healthyNodes.get(1)); Assertions.assertEquals(pipelineCount, 0); // Check datanode with one RATIS/THREE pipeline @@ -712,7 +714,8 @@ public void testCurrentRatisThreePipelineCount() createPipelineWithReplicationConfig(ratisThreeDn, RATIS, THREE); pipelineCount - = placementPolicy.currentRatisThreePipelineCount(healthyNodes.get(2)); + = placementPolicy.currentRatisThreePipelineCount(nodeManager, + stateManager, healthyNodes.get(2)); Assertions.assertEquals(pipelineCount, 1); // Check datanode with one RATIS/ONE and one STANDALONE/ONE pipeline @@ -721,7 +724,8 @@ public void testCurrentRatisThreePipelineCount() createPipelineWithReplicationConfig(standaloneOneDn, STAND_ALONE, ONE); pipelineCount - = placementPolicy.currentRatisThreePipelineCount(healthyNodes.get(1)); + = placementPolicy.currentRatisThreePipelineCount(nodeManager, + stateManager, healthyNodes.get(1)); Assertions.assertEquals(pipelineCount, 0); // Check datanode with one RATIS/ONE and one STANDALONE/ONE pipeline and @@ -734,7 +738,8 @@ public void testCurrentRatisThreePipelineCount() createPipelineWithReplicationConfig(ratisThreeDn, RATIS, THREE); pipelineCount - = placementPolicy.currentRatisThreePipelineCount(healthyNodes.get(1)); + = placementPolicy.currentRatisThreePipelineCount(nodeManager, + stateManager, healthyNodes.get(1)); Assertions.assertEquals(pipelineCount, 2); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java index b788fd713eb4..1d3d1ae4737a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestRatisPipelineProvider.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.MockNodeManager; import org.apache.hadoop.hdds.scm.container.TestContainerManagerImpl; +import org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRackScatter; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub; import org.apache.hadoop.hdds.scm.ha.SCMHAManager; @@ -58,8 +59,10 @@ import java.util.stream.Collectors; import static org.apache.commons.collections.CollectionUtils.intersection; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -316,6 +319,23 @@ public void testCreateFactorTHREEPipelineWithExcludedDatanodes() } } + @Test + // Test pipeline provider with RackScatter policy cannot create + // pipeline due to nodes with full pipeline engagement. + public void testFactorTHREEPipelineRackScatterEngagement() + throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY, + SCMContainerPlacementRackScatter.class.getCanonicalName()); + conf.set(OZONE_DATANODE_PIPELINE_LIMIT, "0"); + init(0, conf); + List excludedNodes = new ArrayList<>(); + + Assertions.assertThrows(SCMException.class, () -> + provider.create(RatisReplicationConfig + .getInstance(ReplicationFactor.THREE), + excludedNodes, Collections.EMPTY_LIST)); + } @Test public void testCreatePipelinesWhenNotEnoughSpace() throws Exception { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java index 08b0122ee4ac..0b9dbdfbe7a7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.scm.safemode; import java.io.File; +import java.time.Clock; import java.time.ZoneOffset; import java.util.ArrayList; import java.util.List; @@ -44,7 +45,6 @@ import org.apache.hadoop.hdds.scm.pipeline.PipelineProvider; import org.apache.hadoop.hdds.scm.pipeline.PipelineManagerImpl; import org.apache.hadoop.hdds.server.events.EventQueue; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.Assertions; @@ -88,7 +88,7 @@ public void testHealthyPipelineSafeModeRuleWithNoPipelines() eventQueue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, pipelineManager.getStateManager(), config); @@ -143,7 +143,7 @@ public void testHealthyPipelineSafeModeRuleWithPipelines() throws Exception { eventQueue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, @@ -242,7 +242,7 @@ public void testHealthyPipelineSafeModeRuleWithMixedPipelines() eventQueue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java index 77bd18faa5a9..20c1a788f898 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; import java.nio.file.Path; +import java.time.Clock; import java.time.ZoneOffset; import java.util.ArrayList; import java.util.Collections; @@ -55,7 +56,6 @@ import org.apache.hadoop.hdds.server.events.EventHandler; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventQueue; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterEach; @@ -254,7 +254,7 @@ public void testFailWithIncorrectValueForHealthyPipelinePercent() queue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); scmSafeModeManager = new SCMSafeModeManager( conf, containers, null, pipelineManager, queue, serviceManager, scmContext); @@ -281,7 +281,7 @@ public void testFailWithIncorrectValueForOneReplicaPipelinePercent() queue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); scmSafeModeManager = new SCMSafeModeManager( conf, containers, null, pipelineManager, queue, serviceManager, scmContext); @@ -307,7 +307,7 @@ public void testFailWithIncorrectValueForSafeModePercent() throws Exception { queue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); scmSafeModeManager = new SCMSafeModeManager( conf, containers, null, pipelineManager, queue, serviceManager, scmContext); @@ -340,7 +340,7 @@ public void testSafeModeExitRuleWithPipelineAvailabilityCheck( queue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(mockNodeManager, pipelineManager.getStateManager(), config); @@ -592,7 +592,7 @@ public void testSafeModePipelineExitRule() throws Exception { queue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, @@ -657,7 +657,7 @@ public void testPipelinesNotCreatedUntilPreCheckPasses() queue, scmContext, serviceManager, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java index 0230109fa47f..b620ace2a994 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java @@ -144,6 +144,10 @@ public void testGetVersionTask() throws Exception { try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, serverAddress, 1000)) { DatanodeDetails datanodeDetails = randomDatanodeDetails(); + conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, + true); + conf.setBoolean( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT, true); OzoneContainer ozoneContainer = new OzoneContainer( datanodeDetails, conf, getContext(datanodeDetails), null); rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); @@ -168,6 +172,10 @@ public void testCheckVersionResponse() throws Exception { true); conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_RANDOM_PORT, true); + conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, + true); + conf.setBoolean( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT, true); conf.setFromObject(new ReplicationConfig().setPort(0)); try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, serverAddress, 1000)) { diff --git a/hadoop-hdds/test-utils/pom.xml b/hadoop-hdds/test-utils/pom.xml index d4d60605a07f..5c5fc0bdbd94 100644 --- a/hadoop-hdds/test-utils/pom.xml +++ b/hadoop-hdds/test-utils/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-test-utils - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Test Utils Apache Ozone HDDS Test Utils jar @@ -74,7 +74,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.jacoco org.jacoco.core - 0.8.5 provided diff --git a/hadoop-hdds/test-utils/src/main/java/org/apache/ozone/test/GenericTestUtils.java b/hadoop-hdds/test-utils/src/main/java/org/apache/ozone/test/GenericTestUtils.java index e03f0a7ffe2e..771f5137a4ff 100644 --- a/hadoop-hdds/test-utils/src/main/java/org/apache/ozone/test/GenericTestUtils.java +++ b/hadoop-hdds/test-utils/src/main/java/org/apache/ozone/test/GenericTestUtils.java @@ -25,6 +25,8 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.io.UnsupportedEncodingException; +import java.util.List; +import java.util.Map; import java.util.concurrent.TimeoutException; import com.google.common.base.Preconditions; @@ -32,6 +34,7 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.log4j.Appender; import org.apache.log4j.Layout; import org.apache.log4j.Level; @@ -43,6 +46,7 @@ import org.mockito.Mockito; import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.util.stream.Collectors; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertTrue; @@ -283,6 +287,12 @@ public static T getFieldReflection(Object object, String fieldName) return value; } + public static Map getReverseMap(Map> map) { + return map.entrySet().stream().flatMap(entry -> entry.getValue().stream() + .map(v -> Pair.of(v, entry.getKey()))) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } + /** * Class to capture logs for doing assertions. */ diff --git a/hadoop-hdds/tools/pom.xml b/hadoop-hdds/tools/pom.xml index 29cbeaf0e1c9..16d086b17612 100644 --- a/hadoop-hdds/tools/pom.xml +++ b/hadoop-hdds/tools/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone hdds - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT hdds-tools - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Distributed Data Store Tools Apache Ozone HDDS Tools jar @@ -99,5 +99,9 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> slf4j-reload4j ${slf4j.version} + + org.apache.ozone + hdds-server-scm + diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/cert/CertCommands.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/cert/CertCommands.java index 21ba03599e76..6b50cb451b1a 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/cert/CertCommands.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/cert/CertCommands.java @@ -40,6 +40,7 @@ subcommands = { InfoSubcommand.class, ListSubcommand.class, + CleanExpired.class, }) @MetaInfServices(SubcommandWithParent.class) diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/cert/CleanExpired.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/cert/CleanExpired.java new file mode 100644 index 000000000000..b5a2ec523f15 --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/cert/CleanExpired.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.cert; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.hdds.cli.GenericParentCommand; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition; +import org.apache.hadoop.hdds.utils.HAUtils; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.File; +import java.io.IOException; +import java.math.BigInteger; +import java.security.cert.X509Certificate; +import java.time.Instant; +import java.util.concurrent.Callable; + +/** + * This is the handler to clean SCM database from expired certificates. + */ +@CommandLine.Command( + name = "clean", + description = "Clean expired certificates from the SCM metadata. " + + "This command is only supported when the SCM is shutdown.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class CleanExpired implements Callable, SubcommandWithParent { + + private static final Logger LOG = LoggerFactory.getLogger(CleanExpired.class); + + @CommandLine.Option(names = {"--db"}, + required = true, + description = "Database file path") + private String dbFilePath; + + @CommandLine.Spec + private CommandLine.Model.CommandSpec spec; + + @Override + public Void call() { + GenericParentCommand parent = + (GenericParentCommand) spec.root().userObject(); + + OzoneConfiguration configuration = parent.createOzoneConfiguration(); + + File db = new File(dbFilePath); + if (!db.exists()) { + LOG.error("DB path does not exist: " + dbFilePath); + return null; + } + if (!db.isDirectory()) { + LOG.error("DB path does not point to a directory: " + dbFilePath); + return null; + } + + try { + DBStore dbStore = HAUtils.loadDB( + configuration, db.getParentFile(), + db.getName(), new SCMDBDefinition()); + removeExpiredCertificates(dbStore); + } catch (Exception e) { + LOG.error("Error trying to open file: " + dbFilePath + + " failed with exception: " + e); + } + return null; + } + + @VisibleForTesting + void removeExpiredCertificates(DBStore dbStore) { + try { + Table certsTable = + SCMDBDefinition.VALID_CERTS.getTable(dbStore); + TableIterator> tableIterator = certsTable.iterator(); + while (tableIterator.hasNext()) { + Table.KeyValue certPair = tableIterator.next(); + X509Certificate certificate = (X509Certificate) certPair.getValue(); + if (Instant.now().isAfter(certificate.getNotAfter().toInstant())) { + LOG.info("Certificate with id " + certPair.getKey() + + " and value: " + certificate + "will be deleted"); + tableIterator.removeFromDB(); + } + } + } catch (IOException e) { + LOG.error("Error when trying to open " + + "certificate table from db: " + e); + } + } + + @Override + public Class getParentType() { + return CertCommands.class; + } +} diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/cert/TestCleanExpired.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/cert/TestCleanExpired.java new file mode 100644 index 000000000000..b169e6359d59 --- /dev/null +++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/cert/TestCleanExpired.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.cert; + +import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.security.cert.X509Certificate; +import java.sql.Date; +import java.time.Duration; +import java.time.Instant; + +/** + * Test the cleaning tool for expired certificates. + */ +public class TestCleanExpired { + + private CleanExpired cmd; + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); + private final PrintStream originalOut = System.out; + private final PrintStream originalErr = System.err; + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + + @Mock + private DBStore dbStore; + @Mock + private Table mockTable; + @Mock + private TableIterator iterator; + @Mock + private Table.KeyValue kv; + @Mock + private Table.KeyValue kv2; + @Mock + private X509Certificate nonExpiredCert; + @Mock + private X509Certificate expiredCert; + + @BeforeEach + public void setup() throws IOException { + MockitoAnnotations.initMocks(this); + cmd = new CleanExpired(); + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); + } + + @AfterEach + public void tearDown() { + System.setOut(originalOut); + System.setErr(originalErr); + } + + @Test + public void testOnlyExpiredCertsRemoved() + throws Exception { + Mockito.when(SCMDBDefinition.VALID_CERTS.getTable(dbStore)) + .thenReturn(mockTable); + Mockito.when(mockTable.iterator()).thenReturn(iterator); + Mockito.when(nonExpiredCert.getNotAfter()) + .thenReturn(Date.from(Instant.now().plus(Duration.ofDays(365)))); + Mockito.when(expiredCert.getNotAfter()) + .thenReturn(Date.from(Instant.now().minus(Duration.ofDays(365)))); + Mockito.when(iterator.hasNext()).thenReturn(true, true, false); + Mockito.when(iterator.next()).thenReturn(kv, kv2); + Mockito.when(kv.getValue()).thenReturn(expiredCert); + Mockito.when(kv2.getValue()).thenReturn(nonExpiredCert); + + cmd.removeExpiredCertificates(dbStore); + Mockito.verify(iterator, Mockito.times(1)).removeFromDB(); + } +} \ No newline at end of file diff --git a/hadoop-ozone/client/pom.xml b/hadoop-ozone/client/pom.xml index 35e13d22cfca..e3e4a0655beb 100644 --- a/hadoop-ozone/client/pom.xml +++ b/hadoop-ozone/client/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-client - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Client Apache Ozone Client jar diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java index 596afa6371b9..b0a8e965c4bb 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.scm.client.HddsClientUtils; import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.client.protocol.ClientProtocol; @@ -52,6 +53,7 @@ import java.io.IOException; import java.time.Instant; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -599,6 +601,40 @@ public OzoneOutputStream createKey(String key, long size, .createKey(volumeName, name, key, size, replicationConfig, keyMetadata); } + /** + * Creates a new key in the bucket, with default replication type RATIS and + * with replication factor THREE. + * + * @param key Name of the key to be created. + * @param size Size of the data the key will point to. + * @return OzoneOutputStream to which the data has to be written. + * @throws IOException + */ + public OzoneDataStreamOutput createStreamKey(String key, long size) + throws IOException { + return createStreamKey(key, size, defaultReplication, + Collections.emptyMap()); + } + + /** + * Creates a new key in the bucket. + * + * @param key Name of the key to be created. + * @param size Size of the data the key will point to. + * @param replicationConfig Replication configuration. + * @return OzoneDataStreamOutput to which the data has to be written. + * @throws IOException + */ + public OzoneDataStreamOutput createStreamKey(String key, long size, + ReplicationConfig replicationConfig, Map keyMetadata) + throws IOException { + if (replicationConfig == null) { + replicationConfig = defaultReplication; + } + return proxy.createStreamKey(volumeName, name, key, size, + replicationConfig, keyMetadata); + } + /** * Reads an existing key from the bucket. * @@ -791,6 +827,21 @@ public OzoneOutputStream createMultipartKey(String key, long size, uploadID); } + /** + * Create a part key for a multipart upload key. + * @param key + * @param size + * @param partNumber + * @param uploadID + * @return OzoneDataStreamOutput + * @throws IOException + */ + public OzoneDataStreamOutput createMultipartStreamKey(String key, + long size, int partNumber, String uploadID) throws IOException { + return proxy.createMultipartStreamKey(volumeName, name, + key, size, partNumber, uploadID); + } + /** * Complete Multipart upload. This will combine all the parts and make the * key visible in ozone. @@ -921,6 +972,13 @@ public OzoneOutputStream createFile(String keyName, long size, overWrite, recursive); } + public OzoneDataStreamOutput createStreamFile(String keyName, long size, + ReplicationConfig replicationConfig, boolean overWrite, + boolean recursive) throws IOException { + return proxy.createStreamFile(volumeName, name, keyName, size, + replicationConfig, overWrite, recursive); + } + /** * List the status for a file or a directory and its contents. * diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockDataStreamOutputEntry.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockDataStreamOutputEntry.java new file mode 100644 index 000000000000..4e5a35a539ce --- /dev/null +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockDataStreamOutputEntry.java @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.client.io; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.XceiverClientFactory; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.storage.BlockDataStreamOutput; +import org.apache.hadoop.hdds.scm.storage.ByteBufferStreamOutput; +import org.apache.hadoop.hdds.scm.storage.StreamBuffer; +import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier; +import org.apache.hadoop.security.token.Token; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +/** + * Helper class used inside {@link BlockDataStreamOutput}. + * */ +public final class BlockDataStreamOutputEntry + implements ByteBufferStreamOutput { + + private final OzoneClientConfig config; + private ByteBufferStreamOutput byteBufferStreamOutput; + private BlockID blockID; + private final String key; + private final XceiverClientFactory xceiverClientManager; + private final Pipeline pipeline; + // total number of bytes that should be written to this stream + private final long length; + // the current position of this stream 0 <= currentPosition < length + private long currentPosition; + private final Token token; + private List bufferList; + + @SuppressWarnings({"parameternumber", "squid:S00107"}) + private BlockDataStreamOutputEntry( + BlockID blockID, String key, + XceiverClientFactory xceiverClientManager, + Pipeline pipeline, + long length, + Token token, + OzoneClientConfig config, + List bufferList + ) { + this.config = config; + this.byteBufferStreamOutput = null; + this.blockID = blockID; + this.key = key; + this.xceiverClientManager = xceiverClientManager; + this.pipeline = pipeline; + this.token = token; + this.length = length; + this.currentPosition = 0; + this.bufferList = bufferList; + } + + long getLength() { + return length; + } + + Token getToken() { + return token; + } + + long getRemaining() { + return length - currentPosition; + } + + /** + * BlockDataStreamOutput is initialized in this function. This makes sure that + * xceiverClient initialization is not done during preallocation and only + * done when data is written. + * @throws IOException if xceiverClient initialization fails + */ + private void checkStream() throws IOException { + if (this.byteBufferStreamOutput == null) { + this.byteBufferStreamOutput = + new BlockDataStreamOutput(blockID, xceiverClientManager, pipeline, + config, token, bufferList); + } + } + + @Override + public void write(ByteBuffer b, int off, int len) throws IOException { + checkStream(); + byteBufferStreamOutput.write(b, off, len); + this.currentPosition += len; + } + + @Override + public void flush() throws IOException { + if (this.byteBufferStreamOutput != null) { + this.byteBufferStreamOutput.flush(); + } + } + + @Override + public void close() throws IOException { + if (this.byteBufferStreamOutput != null) { + this.byteBufferStreamOutput.close(); + // after closing the chunkOutPutStream, blockId would have been + // reconstructed with updated bcsId + this.blockID = + ((BlockDataStreamOutput) byteBufferStreamOutput).getBlockID(); + } + } + + boolean isClosed() { + if (byteBufferStreamOutput != null) { + return ((BlockDataStreamOutput) byteBufferStreamOutput).isClosed(); + } + return false; + } + + Collection getFailedServers() { + if (byteBufferStreamOutput != null) { + BlockDataStreamOutput out = + (BlockDataStreamOutput) this.byteBufferStreamOutput; + return out.getFailedServers(); + } + return Collections.emptyList(); + } + + long getWrittenDataLength() { + if (byteBufferStreamOutput != null) { + BlockDataStreamOutput out = + (BlockDataStreamOutput) this.byteBufferStreamOutput; + return out.getWrittenDataLength(); + } else { + // For a pre allocated block for which no write has been initiated, + // the ByteBufferStreamOutput will be null here. + // In such cases, the default blockCommitSequenceId will be 0 + return 0; + } + } + + public long getTotalAckDataLength() { + if (byteBufferStreamOutput != null) { + BlockDataStreamOutput out = + (BlockDataStreamOutput) this.byteBufferStreamOutput; + blockID = out.getBlockID(); + return out.getTotalAckDataLength(); + } else { + // For a pre allocated block for which no write has been initiated, + // the OutputStream will be null here. + // In such cases, the default blockCommitSequenceId will be 0 + return 0; + } + } + + void cleanup(boolean invalidateClient) throws IOException { + checkStream(); + BlockDataStreamOutput out = + (BlockDataStreamOutput) this.byteBufferStreamOutput; + out.cleanup(invalidateClient); + + } + + void writeOnRetry(long len) throws IOException { + checkStream(); + BlockDataStreamOutput out = + (BlockDataStreamOutput) this.byteBufferStreamOutput; + out.writeOnRetry(len); + this.currentPosition += len; + + } + + /** + * Builder class for BlockDataStreamOutputEntry. + * */ + public static class Builder { + + private BlockID blockID; + private String key; + private XceiverClientFactory xceiverClientManager; + private Pipeline pipeline; + private long length; + private Token token; + private OzoneClientConfig config; + private List bufferList; + + public Builder setBlockID(BlockID bID) { + this.blockID = bID; + return this; + } + + public Builder setKey(String keys) { + this.key = keys; + return this; + } + + public Builder setXceiverClientManager( + XceiverClientFactory + xClientManager) { + this.xceiverClientManager = xClientManager; + return this; + } + + public Builder setPipeline(Pipeline ppln) { + this.pipeline = ppln; + return this; + } + + + public Builder setLength(long len) { + this.length = len; + return this; + } + + public Builder setConfig(OzoneClientConfig clientConfig) { + this.config = clientConfig; + return this; + } + + public Builder setToken(Token bToken) { + this.token = bToken; + return this; + } + + public Builder setBufferList(List bList) { + this.bufferList = bList; + return this; + } + + public BlockDataStreamOutputEntry build() { + return new BlockDataStreamOutputEntry(blockID, + key, + xceiverClientManager, + pipeline, + length, + token, config, bufferList); + } + } + + @VisibleForTesting + public ByteBufferStreamOutput getByteBufStreamOutput() { + return byteBufferStreamOutput; + } + + public BlockID getBlockID() { + return blockID; + } + + public String getKey() { + return key; + } + + public XceiverClientFactory getXceiverClientManager() { + return xceiverClientManager; + } + + public Pipeline getPipeline() { + return pipeline; + } + + public long getCurrentPosition() { + return currentPosition; + } + + public void setCurrentPosition(long curPosition) { + this.currentPosition = curPosition; + } +} + + diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockDataStreamOutputEntryPool.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockDataStreamOutputEntryPool.java new file mode 100644 index 000000000000..e51242cc107b --- /dev/null +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockDataStreamOutputEntryPool.java @@ -0,0 +1,290 @@ + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.client.io; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.XceiverClientFactory; +import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.storage.StreamBuffer; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.apache.hadoop.ozone.om.helpers.OmMultipartCommitUploadPartInfo; +import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; + +/** + * This class manages the stream entries list and handles block allocation + * from OzoneManager. + */ +public class BlockDataStreamOutputEntryPool { + + public static final Logger LOG = + LoggerFactory.getLogger(BlockDataStreamOutputEntryPool.class); + + private final List streamEntries; + private final OzoneClientConfig config; + private int currentStreamIndex; + private final OzoneManagerProtocol omClient; + private final OmKeyArgs keyArgs; + private final XceiverClientFactory xceiverClientFactory; + private final String requestID; + private OmMultipartCommitUploadPartInfo commitUploadPartInfo; + private final long openID; + private final ExcludeList excludeList; + private List bufferList; + + @SuppressWarnings({"parameternumber", "squid:S00107"}) + public BlockDataStreamOutputEntryPool( + OzoneClientConfig config, + OzoneManagerProtocol omClient, + String requestId, ReplicationConfig replicationConfig, + String uploadID, int partNumber, + boolean isMultipart, OmKeyInfo info, + boolean unsafeByteBufferConversion, + XceiverClientFactory xceiverClientFactory, long openID + ) { + this.config = config; + this.xceiverClientFactory = xceiverClientFactory; + streamEntries = new ArrayList<>(); + currentStreamIndex = 0; + this.omClient = omClient; + this.keyArgs = new OmKeyArgs.Builder().setVolumeName(info.getVolumeName()) + .setBucketName(info.getBucketName()).setKeyName(info.getKeyName()) + .setReplicationConfig(replicationConfig).setDataSize(info.getDataSize()) + .setIsMultipartKey(isMultipart).setMultipartUploadID(uploadID) + .setMultipartUploadPartNumber(partNumber).build(); + this.requestID = requestId; + this.openID = openID; + this.excludeList = new ExcludeList(); + this.bufferList = new ArrayList<>(); + } + + /** + * When a key is opened, it is possible that there are some blocks already + * allocated to it for this open session. In this case, to make use of these + * blocks, we need to add these blocks to stream entries. But, a key's version + * also includes blocks from previous versions, we need to avoid adding these + * old blocks to stream entries, because these old blocks should not be picked + * for write. To do this, the following method checks that, only those + * blocks created in this particular open version are added to stream entries. + * + * @param version the set of blocks that are pre-allocated. + * @param openVersion the version corresponding to the pre-allocation. + * @throws IOException + */ + public void addPreallocateBlocks(OmKeyLocationInfoGroup version, + long openVersion) throws IOException { + // server may return any number of blocks, (0 to any) + // only the blocks allocated in this open session (block createVersion + // equals to open session version) + for (OmKeyLocationInfo subKeyInfo : version.getLocationList(openVersion)) { + addKeyLocationInfo(subKeyInfo); + } + } + + private void addKeyLocationInfo(OmKeyLocationInfo subKeyInfo) { + Preconditions.checkNotNull(subKeyInfo.getPipeline()); + BlockDataStreamOutputEntry.Builder builder = + new BlockDataStreamOutputEntry.Builder() + .setBlockID(subKeyInfo.getBlockID()) + .setKey(keyArgs.getKeyName()) + .setXceiverClientManager(xceiverClientFactory) + .setPipeline(subKeyInfo.getPipeline()) + .setConfig(config) + .setLength(subKeyInfo.getLength()) + .setToken(subKeyInfo.getToken()) + .setBufferList(bufferList); + streamEntries.add(builder.build()); + } + + public List getLocationInfoList() { + List locationInfoList = new ArrayList<>(); + for (BlockDataStreamOutputEntry streamEntry : streamEntries) { + long length = streamEntry.getCurrentPosition(); + + // Commit only those blocks to OzoneManager which are not empty + if (length != 0) { + OmKeyLocationInfo info = + new OmKeyLocationInfo.Builder().setBlockID(streamEntry.getBlockID()) + .setLength(streamEntry.getCurrentPosition()).setOffset(0) + .setToken(streamEntry.getToken()) + .setPipeline(streamEntry.getPipeline()).build(); + locationInfoList.add(info); + } + if (LOG.isDebugEnabled()) { + LOG.debug( + "block written " + streamEntry.getBlockID() + ", length " + length + + " bcsID " + streamEntry.getBlockID() + .getBlockCommitSequenceId()); + } + } + return locationInfoList; + } + + /** + * Discards the subsequent pre allocated blocks and removes the streamEntries + * from the streamEntries list for the container which is closed. + * @param containerID id of the closed container + * @param pipelineId id of the associated pipeline + */ + void discardPreallocatedBlocks(long containerID, PipelineID pipelineId) { + // currentStreamIndex < streamEntries.size() signifies that, there are still + // pre allocated blocks available. + + // This will be called only to discard the next subsequent unused blocks + // in the streamEntryList. + if (currentStreamIndex + 1 < streamEntries.size()) { + ListIterator streamEntryIterator = + streamEntries.listIterator(currentStreamIndex + 1); + while (streamEntryIterator.hasNext()) { + BlockDataStreamOutputEntry streamEntry = streamEntryIterator.next(); + Preconditions.checkArgument(streamEntry.getCurrentPosition() == 0); + if ((streamEntry.getPipeline().getId().equals(pipelineId)) || + (containerID != -1 && + streamEntry.getBlockID().getContainerID() == containerID)) { + streamEntryIterator.remove(); + } + } + } + } + + List getStreamEntries() { + return streamEntries; + } + + XceiverClientFactory getXceiverClientFactory() { + return xceiverClientFactory; + } + + String getKeyName() { + return keyArgs.getKeyName(); + } + + long getKeyLength() { + return streamEntries.stream().mapToLong( + BlockDataStreamOutputEntry::getCurrentPosition).sum(); + } + /** + * Contact OM to get a new block. Set the new block with the index (e.g. + * first block has index = 0, second has index = 1 etc.) + * + * The returned block is made to new BlockDataStreamOutputEntry to write. + * + * @throws IOException + */ + private void allocateNewBlock() throws IOException { + if (!excludeList.isEmpty()) { + LOG.debug("Allocating block with {}", excludeList); + } + OmKeyLocationInfo subKeyInfo = + omClient.allocateBlock(keyArgs, openID, excludeList); + addKeyLocationInfo(subKeyInfo); + } + + + void commitKey(long offset) throws IOException { + if (keyArgs != null) { + // in test, this could be null + long length = getKeyLength(); + Preconditions.checkArgument(offset == length); + keyArgs.setDataSize(length); + keyArgs.setLocationInfoList(getLocationInfoList()); + // When the key is multipart upload part file upload, we should not + // commit the key, as this is not an actual key, this is a just a + // partial key of a large file. + if (keyArgs.getIsMultipartKey()) { + commitUploadPartInfo = + omClient.commitMultipartUploadPart(keyArgs, openID); + } else { + omClient.commitKey(keyArgs, openID); + } + } else { + LOG.warn("Closing KeyDataStreamOutput, but key args is null"); + } + } + + public BlockDataStreamOutputEntry getCurrentStreamEntry() { + if (streamEntries.isEmpty() || streamEntries.size() <= currentStreamIndex) { + return null; + } else { + return streamEntries.get(currentStreamIndex); + } + } + + BlockDataStreamOutputEntry allocateBlockIfNeeded() throws IOException { + BlockDataStreamOutputEntry streamEntry = getCurrentStreamEntry(); + if (streamEntry != null && streamEntry.isClosed()) { + // a stream entry gets closed either by : + // a. If the stream gets full + // b. it has encountered an exception + currentStreamIndex++; + } + if (streamEntries.size() <= currentStreamIndex) { + Preconditions.checkNotNull(omClient); + // allocate a new block, if a exception happens, log an error and + // throw exception to the caller directly, and the write fails. + allocateNewBlock(); + } + // in theory, this condition should never violate due the check above + // still do a sanity check. + Preconditions.checkArgument(currentStreamIndex < streamEntries.size()); + return streamEntries.get(currentStreamIndex); + } + + void cleanup() { + if (excludeList != null) { + excludeList.clear(); + } + + if (streamEntries != null) { + streamEntries.clear(); + } + } + + public OmMultipartCommitUploadPartInfo getCommitUploadPartInfo() { + return commitUploadPartInfo; + } + + public ExcludeList getExcludeList() { + return excludeList; + } + + boolean isEmpty() { + return streamEntries.isEmpty(); + } + + long computeBufferData() { + long totalDataLen = 0; + for (StreamBuffer b : bufferList) { + totalDataLen += b.position(); + } + return totalDataLen; + } +} diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntry.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntry.java index 7431a171eb33..2501803fc070 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntry.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntry.java @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.Collections; +import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; @@ -139,6 +140,18 @@ public void flush() throws IOException { } } + void hsync() throws IOException { + if (isInitialized()) { + final OutputStream out = getOutputStream(); + if (!(out instanceof Syncable)) { + throw new UnsupportedOperationException( + out.getClass() + " is not " + Syncable.class.getSimpleName()); + } + + ((Syncable)out).hsync(); + } + } + @Override public void close() throws IOException { if (isInitialized()) { diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntryPool.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntryPool.java index 687bb8474bce..acc70d0dda61 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntryPool.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntryPool.java @@ -23,11 +23,11 @@ import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol; +import java.time.Clock; import java.time.ZoneOffset; /** @@ -66,7 +66,7 @@ public ECBlockOutputStreamEntryPool(OzoneClientConfig config, @Override ExcludeList createExcludeList() { return new ExcludeList(getConfig().getExcludeNodesExpiryTime(), - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); } @Override diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECKeyOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECKeyOutputStream.java index 12440b44cf8f..7d5a06fc5291 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECKeyOutputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECKeyOutputStream.java @@ -23,6 +23,14 @@ import java.util.Arrays; import java.util.List; import java.util.Set; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -56,11 +64,16 @@ public final class ECKeyOutputStream extends KeyOutputStream { private OzoneClientConfig config; private ECChunkBuffers ecChunkBufferCache; + private final BlockingQueue ecStripeQueue; + private int chunkIndex; private int ecChunkSize; private final int numDataBlks; private final int numParityBlks; private final ByteBufferPool bufferPool; private final RawErasureEncoder encoder; + private final ExecutorService flushExecutor; + private final Future flushFuture; + private final AtomicLong flushCheckpoint; private enum StripeWriteStatus { SUCCESS, @@ -92,6 +105,16 @@ public List getLocationInfoList() { return blockOutputStreamEntryPool.getLocationInfoList(); } + @VisibleForTesting + public void insertFlushCheckpoint(long version) throws IOException { + addStripeToQueue(new CheckpointDummyStripe(version)); + } + + @VisibleForTesting + public long getFlushCheckpoint() { + return flushCheckpoint.get(); + } + private ECKeyOutputStream(Builder builder) { super(builder.getClientMetrics()); this.config = builder.getClientConfig(); @@ -105,6 +128,8 @@ private ECKeyOutputStream(Builder builder) { this.numParityBlks = builder.getReplicationConfig().getParity(); ecChunkBufferCache = new ECChunkBuffers( ecChunkSize, numDataBlks, numParityBlks, bufferPool); + chunkIndex = 0; + ecStripeQueue = new ArrayBlockingQueue<>(config.getEcStripeQueueSize()); OmKeyInfo info = builder.getOpenHandler().getKeyInfo(); blockOutputStreamEntryPool = new ECBlockOutputStreamEntryPool(config, @@ -119,6 +144,9 @@ private ECKeyOutputStream(Builder builder) { this.writeOffset = 0; this.encoder = CodecUtil.createRawEncoderWithFallback( builder.getReplicationConfig()); + this.flushExecutor = Executors.newSingleThreadExecutor(); + this.flushFuture = this.flushExecutor.submit(this::flushStripeFromQueue); + this.flushCheckpoint = new AtomicLong(0); } /** @@ -165,14 +193,14 @@ public void write(byte[] b, int off, int len) throws IOException { } } catch (Exception e) { markStreamClosed(); - throw new IOException(e.getMessage()); + throw e; } writeOffset += len; } - private StripeWriteStatus rewriteStripeToNewBlockGroup() throws IOException { + private void rollbackAndReset(ECChunkBuffers stripe) throws IOException { // Rollback the length/offset updated as part of this failed stripe write. - final ByteBuffer[] dataBuffers = ecChunkBufferCache.getDataBuffers(); + final ByteBuffer[] dataBuffers = stripe.getDataBuffers(); offset -= Arrays.stream(dataBuffers).mapToInt(Buffer::limit).sum(); final ECBlockOutputStreamEntry failedStreamEntry = @@ -185,28 +213,6 @@ private StripeWriteStatus rewriteStripeToNewBlockGroup() throws IOException { failedStreamEntry.getPipeline().getId()); // Let's close the current entry. failedStreamEntry.close(); - - // Let's rewrite the last stripe, so that it will be written to new block - // group. - // TODO: we can improve to write partial stripe failures. In that case, - // we just need to write only available buffers. - blockOutputStreamEntryPool.allocateBlockIfNeeded(); - final ECBlockOutputStreamEntry currentStreamEntry = - blockOutputStreamEntryPool.getCurrentStreamEntry(); - for (int i = 0; i < numDataBlks; i++) { - if (dataBuffers[i].limit() > 0) { - handleOutputStreamWrite(i, dataBuffers[i].limit(), false); - } - currentStreamEntry.useNextBlockStream(); - } - return handleParityWrites(); - } - - private void encodeAndWriteParityCells() throws IOException { - generateParityCells(); - if (handleParityWrites() == StripeWriteStatus.FAILED) { - retryStripeWrite(config.getMaxECStripeWriteRetries()); - } } private void logStreamError(List failedStreams, @@ -228,8 +234,8 @@ private void logStreamError(List failedStreams, } } - private StripeWriteStatus handleParityWrites() throws IOException { - writeParityCells(); + private StripeWriteStatus commitStripeWrite(ECChunkBuffers stripe) + throws IOException { ECBlockOutputStreamEntry streamEntry = blockOutputStreamEntryPool.getCurrentStreamEntry(); @@ -246,7 +252,7 @@ private StripeWriteStatus handleParityWrites() throws IOException { // By this time, we should have finished full stripe. So, lets call // executePutBlock for all. final boolean isLastStripe = streamEntry.getRemaining() <= 0 || - ecChunkBufferCache.getLastDataCell().limit() < ecChunkSize; + stripe.getLastDataCell().limit() < ecChunkSize; ByteString checksum = streamEntry.calculateChecksum(); streamEntry.executePutBlock(isLastStripe, streamEntry.getCurrentPosition(), checksum); @@ -261,7 +267,7 @@ private StripeWriteStatus handleParityWrites() throws IOException { } streamEntry.updateBlockGroupToAckedPosition( streamEntry.getCurrentPosition()); - ecChunkBufferCache.clear(); + stripe.clear(); if (streamEntry.getRemaining() <= 0) { streamEntry.close(); @@ -340,63 +346,66 @@ private void generateParityCells() throws IOException { } } - private void writeParityCells() { + private void writeDataCells(ECChunkBuffers stripe) throws IOException { + blockOutputStreamEntryPool.allocateBlockIfNeeded(); + ByteBuffer[] dataCells = stripe.getDataBuffers(); + for (int i = 0; i < numDataBlks; i++) { + if (dataCells[i].limit() > 0) { + handleOutputStreamWrite(dataCells[i], false); + } + blockOutputStreamEntryPool.getCurrentStreamEntry().useNextBlockStream(); + } + } + + private void writeParityCells(ECChunkBuffers stripe) { // Move the stream entry cursor to parity block index blockOutputStreamEntryPool .getCurrentStreamEntry().forceToFirstParityBlock(); - ByteBuffer[] parityCells = ecChunkBufferCache.getParityBuffers(); + ByteBuffer[] parityCells = stripe.getParityBuffers(); for (int i = 0; i < numParityBlks; i++) { - handleOutputStreamWrite(numDataBlks + i, parityCells[i].limit(), true); + handleOutputStreamWrite(parityCells[i], true); blockOutputStreamEntryPool.getCurrentStreamEntry().useNextBlockStream(); } } private int handleWrite(byte[] b, int off, int len) throws IOException { - - blockOutputStreamEntryPool.allocateBlockIfNeeded(); - - int currIdx = blockOutputStreamEntryPool - .getCurrentStreamEntry().getCurrentStreamIdx(); - int bufferRem = ecChunkBufferCache.dataBuffers[currIdx].remaining(); + int bufferRem = ecChunkBufferCache.dataBuffers[chunkIndex].remaining(); final int writeLen = Math.min(len, Math.min(bufferRem, ecChunkSize)); - int pos = ecChunkBufferCache.addToDataBuffer(currIdx, b, off, writeLen); + int pos = ecChunkBufferCache.addToDataBuffer(chunkIndex, b, off, writeLen); - // if this cell is full, send data to the OutputStream + // if this cell is full, use next buffer if (pos == ecChunkSize) { - handleOutputStreamWrite(currIdx, pos, false); - blockOutputStreamEntryPool.getCurrentStreamEntry().useNextBlockStream(); + chunkIndex++; // if this is last data cell in the stripe, - // compute and write the parity cells - if (currIdx == numDataBlks - 1) { - encodeAndWriteParityCells(); + // compute parity cells and write data + if (chunkIndex == numDataBlks) { + generateParityCells(); + addStripeToQueue(ecChunkBufferCache); + ecChunkBufferCache = new ECChunkBuffers(ecChunkSize, + numDataBlks, numParityBlks, bufferPool); + chunkIndex = 0; } } return writeLen; } - private void handleOutputStreamWrite(int currIdx, int len, boolean isParity) { - ByteBuffer bytesToWrite = isParity ? - ecChunkBufferCache.getParityBuffers()[currIdx - numDataBlks] : - ecChunkBufferCache.getDataBuffers()[currIdx]; + private void handleOutputStreamWrite(ByteBuffer buffer, boolean isParity) { try { // Since it's a full cell, let's write all content from buffer. // At a time we write max cell size in EC. So, it should safe to cast // the len to int to use the super class defined write API. // The len cannot be bigger than cell buffer size. - assert len <= ecChunkSize : " The len: " + len + ". EC chunk size: " - + ecChunkSize; - assert len <= bytesToWrite - .limit() : " The len: " + len + ". Chunk buffer limit: " - + bytesToWrite.limit(); + assert buffer.limit() <= ecChunkSize : "The buffer size: " + + buffer.limit() + " should not exceed EC chunk size: " + ecChunkSize; writeToOutputStream(blockOutputStreamEntryPool.getCurrentStreamEntry(), - bytesToWrite.array(), len, 0, isParity); + buffer.array(), buffer.limit(), 0, isParity); } catch (Exception e) { markStreamAsFailed(e); } } - private long writeToOutputStream(ECBlockOutputStreamEntry current, + private void writeToOutputStream(ECBlockOutputStreamEntry current, byte[] b, int writeLen, int off, boolean isParity) throws IOException { try { @@ -414,7 +423,6 @@ private long writeToOutputStream(ECBlockOutputStreamEntry current, .getCurrentStreamIdx(), ioe); handleException(current, ioe); } - return writeLen; } private void handleException(BlockOutputStreamEntry streamEntry, @@ -481,37 +489,95 @@ public void close() throws IOException { try { // If stripe buffer is not empty, encode and flush the stripe. if (ecChunkBufferCache.getFirstDataCell().position() > 0) { - final int index = blockOutputStreamEntryPool.getCurrentStreamEntry() - .getCurrentStreamIdx(); - ByteBuffer lastCell = ecChunkBufferCache.getDataBuffers()[index]; - - // Finish writing the current partial cached chunk - if (lastCell.position() % ecChunkSize != 0) { - handleOutputStreamWrite(index, lastCell.position(), false); - } - - encodeAndWriteParityCells(); + generateParityCells(); + addStripeToQueue(ecChunkBufferCache); } + // Send EOF mark to flush thread. + addStripeToQueue(new EOFDummyStripe()); + + // Wait for all the stripes to be written. + flushFuture.get(); + flushExecutor.shutdownNow(); closeCurrentStreamEntry(); Preconditions.checkArgument(writeOffset == offset, "Expected writeOffset= " + writeOffset + " Expected offset=" + offset); blockOutputStreamEntryPool.commitKey(offset); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof IOException) { + throw (IOException) cause; + } else if (cause instanceof RuntimeException) { + throw (RuntimeException) cause; + } else { + throw new IOException(cause); + } + } catch (InterruptedException e) { + throw new IOException("Flushing thread was interrupted", e); } finally { blockOutputStreamEntryPool.cleanup(); } - ecChunkBufferCache.release(); } - private void retryStripeWrite(int times) throws IOException { - for (int i = 0; i < times; i++) { - if (rewriteStripeToNewBlockGroup() == StripeWriteStatus.SUCCESS) { + private void addStripeToQueue(ECChunkBuffers stripe) throws IOException { + try { + do { + // If flushFuture is done, it means that the flush thread has + // encountered an exception. Call get() to throw that exception here. + if (flushFuture.isDone()) { + flushFuture.get(); + // We should never reach here. + throw new IOException("Flush thread has ended before stream close"); + } + } while (!ecStripeQueue.offer(stripe, 1, TimeUnit.SECONDS)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while adding stripe to queue", e); + } catch (ExecutionException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } else if (e.getCause() instanceof RuntimeException) { + throw (RuntimeException) e.getCause(); + } else { + throw new IOException(e.getCause()); + } + } + } + + private boolean flushStripeFromQueue() throws IOException { + try { + ECChunkBuffers stripe = ecStripeQueue.take(); + while (!(stripe instanceof EOFDummyStripe)) { + if (stripe instanceof CheckpointDummyStripe) { + flushCheckpoint.set(((CheckpointDummyStripe) stripe).version); + } else { + flushStripeToDatanodes(stripe); + stripe.release(); + } + stripe = ecStripeQueue.take(); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while polling stripe from queue", e); + } + return true; + } + + private void flushStripeToDatanodes(ECChunkBuffers stripe) + throws IOException { + int maxRetry = config.getMaxECStripeWriteRetries(); + for (int i = 0; i <= maxRetry; i++) { + writeDataCells(stripe); + writeParityCells(stripe); + if (commitStripeWrite(stripe) == StripeWriteStatus.SUCCESS) { return; } + // In case of failure, cleanup before retry + rollbackAndReset(stripe); } throw new IOException("Completed max allowed retries " + - times + " on stripe failures."); + maxRetry + " on stripe failures."); } public static void padBufferToLimit(ByteBuffer buf, int limit) { @@ -580,12 +646,30 @@ private void checkNotClosed() throws IOException { } } + private static class EOFDummyStripe extends ECChunkBuffers { + EOFDummyStripe() { + } + } + + private static class CheckpointDummyStripe extends ECChunkBuffers { + private final long version; + CheckpointDummyStripe(long version) { + super(); + this.version = version; + } + } + private static class ECChunkBuffers { private final ByteBuffer[] dataBuffers; private final ByteBuffer[] parityBuffers; private int cellSize; private ByteBufferPool byteBufferPool; + ECChunkBuffers() { + dataBuffers = null; + parityBuffers = null; + } + ECChunkBuffers(int cellSize, int numData, int numParity, ByteBufferPool byteBufferPool) { this.cellSize = cellSize; diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyDataStreamOutput.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyDataStreamOutput.java new file mode 100644 index 000000000000..dc5c3a016d70 --- /dev/null +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyDataStreamOutput.java @@ -0,0 +1,510 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.client.io; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.XceiverClientFactory; +import org.apache.hadoop.hdds.scm.client.HddsClientUtils; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.storage.AbstractDataStreamOutput; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.apache.hadoop.ozone.om.helpers.OmMultipartCommitUploadPartInfo; +import org.apache.hadoop.ozone.om.helpers.OpenKeySession; +import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +/** + * Maintaining a list of BlockInputStream. Write based on offset. + * + * Note that this may write to multiple containers in one write call. In case + * that first container succeeded but later ones failed, the succeeded writes + * are not rolled back. + * + * TODO : currently not support multi-thread access. + */ +public class KeyDataStreamOutput extends AbstractDataStreamOutput { + + private OzoneClientConfig config; + + /** + * Defines stream action while calling handleFlushOrClose. + */ + enum StreamAction { + FLUSH, CLOSE, FULL + } + + public static final Logger LOG = + LoggerFactory.getLogger(KeyDataStreamOutput.class); + + private boolean closed; + private FileEncryptionInfo feInfo; + + // how much of data is actually written yet to underlying stream + private long offset; + // how much data has been ingested into the stream + private long writeOffset; + + private final BlockDataStreamOutputEntryPool blockDataStreamOutputEntryPool; + + private long clientID; + + @VisibleForTesting + public List getStreamEntries() { + return blockDataStreamOutputEntryPool.getStreamEntries(); + } + + @VisibleForTesting + public XceiverClientFactory getXceiverClientFactory() { + return blockDataStreamOutputEntryPool.getXceiverClientFactory(); + } + + @VisibleForTesting + public List getLocationInfoList() { + return blockDataStreamOutputEntryPool.getLocationInfoList(); + } + + @VisibleForTesting + public long getClientID() { + return clientID; + } + + @SuppressWarnings({"parameternumber", "squid:S00107"}) + public KeyDataStreamOutput( + OzoneClientConfig config, + OpenKeySession handler, + XceiverClientFactory xceiverClientManager, + OzoneManagerProtocol omClient, int chunkSize, + String requestId, ReplicationConfig replicationConfig, + String uploadID, int partNumber, boolean isMultipart, + boolean unsafeByteBufferConversion + ) { + super(HddsClientUtils.getRetryPolicyByException( + config.getMaxRetryCount(), config.getRetryInterval())); + this.config = config; + OmKeyInfo info = handler.getKeyInfo(); + blockDataStreamOutputEntryPool = + new BlockDataStreamOutputEntryPool( + config, + omClient, + requestId, replicationConfig, + uploadID, partNumber, + isMultipart, info, + unsafeByteBufferConversion, + xceiverClientManager, + handler.getId()); + + // Retrieve the file encryption key info, null if file is not in + // encrypted bucket. + this.feInfo = info.getFileEncryptionInfo(); + this.writeOffset = 0; + this.clientID = handler.getId(); + } + + /** + * When a key is opened, it is possible that there are some blocks already + * allocated to it for this open session. In this case, to make use of these + * blocks, we need to add these blocks to stream entries. But, a key's version + * also includes blocks from previous versions, we need to avoid adding these + * old blocks to stream entries, because these old blocks should not be picked + * for write. To do this, the following method checks that, only those + * blocks created in this particular open version are added to stream entries. + * + * @param version the set of blocks that are pre-allocated. + * @param openVersion the version corresponding to the pre-allocation. + * @throws IOException + */ + public void addPreallocateBlocks(OmKeyLocationInfoGroup version, + long openVersion) throws IOException { + blockDataStreamOutputEntryPool.addPreallocateBlocks(version, openVersion); + } + + @Override + public void write(ByteBuffer b, int off, int len) throws IOException { + checkNotClosed(); + if (b == null) { + throw new NullPointerException(); + } + handleWrite(b, off, len, false); + writeOffset += len; + } + + private void handleWrite(ByteBuffer b, int off, long len, boolean retry) + throws IOException { + while (len > 0) { + try { + BlockDataStreamOutputEntry current = + blockDataStreamOutputEntryPool.allocateBlockIfNeeded(); + // length(len) will be in int range if the call is happening through + // write API of blockDataStreamOutput. Length can be in long range + // if it comes via Exception path. + int expectedWriteLen = Math.min((int) len, + (int) current.getRemaining()); + long currentPos = current.getWrittenDataLength(); + // writeLen will be updated based on whether the write was succeeded + // or if it sees an exception, how much the actual write was + // acknowledged. + int writtenLength = + writeToDataStreamOutput(current, retry, len, b, + expectedWriteLen, off, currentPos); + if (current.getRemaining() <= 0) { + // since the current block is already written close the stream. + handleFlushOrClose(StreamAction.FULL); + } + len -= writtenLength; + off += writtenLength; + } catch (Exception e) { + markStreamClosed(); + throw new IOException(e); + } + } + } + + private int writeToDataStreamOutput(BlockDataStreamOutputEntry current, + boolean retry, long len, ByteBuffer b, int writeLen, int off, + long currentPos) throws IOException { + try { + if (retry) { + current.writeOnRetry(len); + } else { + current.write(b, off, writeLen); + offset += writeLen; + } + } catch (IOException ioe) { + // for the current iteration, totalDataWritten - currentPos gives the + // amount of data already written to the buffer + + // In the retryPath, the total data to be written will always be equal + // to or less than the max length of the buffer allocated. + // The len specified here is the combined sum of the data length of + // the buffers + Preconditions.checkState(!retry || len <= config + .getStreamBufferMaxSize()); + int dataWritten = (int) (current.getWrittenDataLength() - currentPos); + writeLen = retry ? (int) len : dataWritten; + // In retry path, the data written is already accounted in offset. + if (!retry) { + offset += writeLen; + } + LOG.debug("writeLen {}, total len {}", writeLen, len); + handleException(current, ioe); + } + return writeLen; + } + + /** + * It performs following actions : + * a. Updates the committed length at datanode for the current stream in + * datanode. + * b. Reads the data from the underlying buffer and writes it the next stream. + * + * @param streamEntry StreamEntry + * @param exception actual exception that occurred + * @throws IOException Throws IOException if Write fails + */ + private void handleException(BlockDataStreamOutputEntry streamEntry, + IOException exception) throws IOException { + Throwable t = HddsClientUtils.checkForException(exception); + Preconditions.checkNotNull(t); + boolean retryFailure = checkForRetryFailure(t); + boolean containerExclusionException = false; + if (!retryFailure) { + containerExclusionException = checkIfContainerToExclude(t); + } + Pipeline pipeline = streamEntry.getPipeline(); + PipelineID pipelineId = pipeline.getId(); + long totalSuccessfulFlushedData = streamEntry.getTotalAckDataLength(); + //set the correct length for the current stream + streamEntry.setCurrentPosition(totalSuccessfulFlushedData); + long containerId = streamEntry.getBlockID().getContainerID(); + Collection failedServers = streamEntry.getFailedServers(); + Preconditions.checkNotNull(failedServers); + ExcludeList excludeList = blockDataStreamOutputEntryPool.getExcludeList(); + long bufferedDataLen = blockDataStreamOutputEntryPool.computeBufferData(); + if (!failedServers.isEmpty()) { + excludeList.addDatanodes(failedServers); + } + + // if the container needs to be excluded , add the container to the + // exclusion list , otherwise add the pipeline to the exclusion list + if (containerExclusionException) { + excludeList.addConatinerId(ContainerID.valueOf(containerId)); + } else { + excludeList.addPipeline(pipelineId); + } + // just clean up the current stream. + streamEntry.cleanup(retryFailure); + + // discard all subsequent blocks the containers and pipelines which + // are in the exclude list so that, the very next retry should never + // write data on the closed container/pipeline + if (containerExclusionException) { + // discard subsequent pre allocated blocks from the streamEntries list + // from the closed container + blockDataStreamOutputEntryPool + .discardPreallocatedBlocks(streamEntry.getBlockID().getContainerID(), + null); + } else { + // In case there is timeoutException or Watch for commit happening over + // majority or the client connection failure to the leader in the + // pipeline, just discard all the pre allocated blocks on this pipeline. + // Next block allocation will happen with excluding this specific pipeline + // This will ensure if 2 way commit happens , it cannot span over multiple + // blocks + blockDataStreamOutputEntryPool + .discardPreallocatedBlocks(-1, pipelineId); + } + if (bufferedDataLen > 0) { + // If the data is still cached in the underlying stream, we need to + // allocate new block and write this data in the datanode. + handleRetry(exception); + handleWrite(null, 0, bufferedDataLen, true); + // reset the retryCount after handling the exception + resetRetryCount(); + } + } + + private void markStreamClosed() { + blockDataStreamOutputEntryPool.cleanup(); + closed = true; + } + + @Override + public void flush() throws IOException { + checkNotClosed(); + handleFlushOrClose(StreamAction.FLUSH); + } + + /** + * Close or Flush the latest outputStream depending upon the action. + * This function gets called when while write is going on, the current stream + * gets full or explicit flush or close request is made by client. when the + * stream gets full and we try to close the stream , we might end up hitting + * an exception in the exception handling path, we write the data residing in + * in the buffer pool to a new Block. In cases, as such, when the data gets + * written to new stream , it will be at max half full. In such cases, we + * should just write the data and not close the stream as the block won't be + * completely full. + * + * @param op Flag which decides whether to call close or flush on the + * outputStream. + * @throws IOException In case, flush or close fails with exception. + */ + @SuppressWarnings("squid:S1141") + private void handleFlushOrClose(StreamAction op) throws IOException { + if (!blockDataStreamOutputEntryPool.isEmpty()) { + while (true) { + try { + BlockDataStreamOutputEntry entry = + blockDataStreamOutputEntryPool.getCurrentStreamEntry(); + if (entry != null) { + try { + handleStreamAction(entry, op); + } catch (IOException ioe) { + handleException(entry, ioe); + continue; + } + } + return; + } catch (Exception e) { + markStreamClosed(); + throw e; + } + } + } + } + + private void handleStreamAction(BlockDataStreamOutputEntry entry, + StreamAction op) throws IOException { + Collection failedServers = entry.getFailedServers(); + // failed servers can be null in case there is no data written in + // the stream + if (!failedServers.isEmpty()) { + blockDataStreamOutputEntryPool.getExcludeList().addDatanodes( + failedServers); + } + switch (op) { + case CLOSE: + entry.close(); + break; + case FULL: + if (entry.getRemaining() == 0) { + entry.close(); + } + break; + case FLUSH: + entry.flush(); + break; + default: + throw new IOException("Invalid Operation"); + } + } + + /** + * Commit the key to OM, this will add the blocks as the new key blocks. + * + * @throws IOException + */ + @Override + public void close() throws IOException { + if (closed) { + return; + } + closed = true; + try { + handleFlushOrClose(StreamAction.CLOSE); + if (!isException()) { + Preconditions.checkArgument(writeOffset == offset); + } + blockDataStreamOutputEntryPool.commitKey(offset); + } finally { + blockDataStreamOutputEntryPool.cleanup(); + } + } + + public OmMultipartCommitUploadPartInfo getCommitUploadPartInfo() { + return blockDataStreamOutputEntryPool.getCommitUploadPartInfo(); + } + + public FileEncryptionInfo getFileEncryptionInfo() { + return feInfo; + } + + @VisibleForTesting + public ExcludeList getExcludeList() { + return blockDataStreamOutputEntryPool.getExcludeList(); + } + + /** + * Builder class of KeyDataStreamOutput. + */ + public static class Builder { + private OpenKeySession openHandler; + private XceiverClientFactory xceiverManager; + private OzoneManagerProtocol omClient; + private int chunkSize; + private String requestID; + private String multipartUploadID; + private int multipartNumber; + private boolean isMultipartKey; + private boolean unsafeByteBufferConversion; + private OzoneClientConfig clientConfig; + private ReplicationConfig replicationConfig; + + public Builder setMultipartUploadID(String uploadID) { + this.multipartUploadID = uploadID; + return this; + } + + public Builder setMultipartNumber(int partNumber) { + this.multipartNumber = partNumber; + return this; + } + + public Builder setHandler(OpenKeySession handler) { + this.openHandler = handler; + return this; + } + + public Builder setXceiverClientManager(XceiverClientFactory manager) { + this.xceiverManager = manager; + return this; + } + + public Builder setOmClient(OzoneManagerProtocol client) { + this.omClient = client; + return this; + } + + public Builder setChunkSize(int size) { + this.chunkSize = size; + return this; + } + + public Builder setRequestID(String id) { + this.requestID = id; + return this; + } + + public Builder setIsMultipartKey(boolean isMultipart) { + this.isMultipartKey = isMultipart; + return this; + } + + public Builder setConfig(OzoneClientConfig config) { + this.clientConfig = config; + return this; + } + + public Builder enableUnsafeByteBufferConversion(boolean enabled) { + this.unsafeByteBufferConversion = enabled; + return this; + } + + + public Builder setReplicationConfig(ReplicationConfig replConfig) { + this.replicationConfig = replConfig; + return this; + } + + public KeyDataStreamOutput build() { + return new KeyDataStreamOutput( + clientConfig, + openHandler, + xceiverManager, + omClient, + chunkSize, + requestID, + replicationConfig, + multipartUploadID, + multipartNumber, + isMultipartKey, + unsafeByteBufferConversion); + } + + } + + /** + * Verify that the output stream is open. Non blocking; this gives + * the last state of the volatile {@link #closed} field. + * @throws IOException if the connection is closed. + */ + private void checkNotClosed() throws IOException { + if (closed) { + throw new IOException( + ": " + FSExceptionMessages.STREAM_IS_CLOSED + " Key: " + + blockDataStreamOutputEntryPool.getKeyName()); + } + } +} diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyInputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyInputStream.java index 20cb2b5d60e0..ce068f1b3643 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyInputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyInputStream.java @@ -17,29 +17,21 @@ */ package org.apache.hadoop.ozone.client.io; -import java.io.EOFException; import java.io.IOException; -import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; -import com.google.common.base.Preconditions; -import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.storage.BlockExtendedInputStream; -import org.apache.hadoop.hdds.scm.storage.BlockInputStream; -import org.apache.hadoop.hdds.scm.storage.ByteArrayReader; -import org.apache.hadoop.hdds.scm.storage.ByteBufferReader; +import org.apache.hadoop.hdds.scm.storage.BlockLocationInfo; import org.apache.hadoop.hdds.scm.storage.ByteReaderStrategy; -import org.apache.hadoop.hdds.scm.storage.ExtendedInputStream; +import org.apache.hadoop.hdds.scm.storage.MultipartInputStream; +import org.apache.hadoop.hdds.scm.storage.PartInputStream; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; @@ -48,42 +40,85 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static java.util.stream.Collectors.groupingBy; + /** * Maintaining a list of BlockInputStream. Read based on offset. */ -public class KeyInputStream extends ExtendedInputStream { +public class KeyInputStream extends MultipartInputStream { private static final Logger LOG = LoggerFactory.getLogger(KeyInputStream.class); - private static final int EOF = -1; - - private String key; - private long length = 0; - private boolean closed = false; - - // List of BlockInputStreams, one for each block in the key - private final List blockStreams; - - // blockOffsets[i] stores the index of the first data byte in - // blockStream w.r.t the key data. - // For example, let’s say the block size is 200 bytes and block[0] stores - // data from indices 0 - 199, block[1] from indices 200 - 399 and so on. - // Then, blockOffset[0] = 0 (the offset of the first byte of data in - // block[0]), blockOffset[1] = 200 and so on. - private long[] blockOffsets = null; - - // Index of the blockStream corresponding to the current position of the - // KeyInputStream i.e. offset of the data to be read next - private int blockIndex; + public KeyInputStream(String keyName, + List inputStreams) { + super(keyName, inputStreams); + } - // Tracks the blockIndex corresponding to the last seeked position so that it - // can be reset if a new position is seeked. - private int blockIndexOfPrevPosition; + private static List createStreams( + OmKeyInfo keyInfo, + List blockInfos, + XceiverClientFactory xceiverClientFactory, + boolean verifyChecksum, + Function retryFunction, + BlockInputStreamFactory blockStreamFactory) { + List partStreams = new ArrayList<>(); + for (OmKeyLocationInfo omKeyLocationInfo : blockInfos) { + if (LOG.isDebugEnabled()) { + LOG.debug("Adding stream for accessing {}. The stream will be " + + "initialized later.", omKeyLocationInfo); + } + // We also pass in functional reference which is used to refresh the + // pipeline info for a given OM Key location info. - public KeyInputStream() { - blockStreams = new ArrayList<>(); - blockIndex = 0; + // Append another BlockInputStream to the end of the list. Note that the + // BlockInputStream is only created here and not initialized. The + // BlockInputStream is initialized when a read operation is performed on + // the block for the first time. + BlockExtendedInputStream stream = + blockStreamFactory.create( + keyInfo.getReplicationConfig(), + omKeyLocationInfo, + omKeyLocationInfo.getPipeline(), + omKeyLocationInfo.getToken(), + verifyChecksum, + xceiverClientFactory, + keyBlockID -> { + OmKeyInfo newKeyInfo = retryFunction.apply(keyInfo); + return getPipeline(newKeyInfo, omKeyLocationInfo.getBlockID()); + }); + partStreams.add(stream); + } + return partStreams; + } + + private static Pipeline getPipeline(OmKeyInfo newKeyInfo, BlockID blockID) { + List collect = + newKeyInfo.getLatestVersionLocations() + .getLocationList() + .stream() + .filter(l -> l.getBlockID().equals(blockID)) + .collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(collect)) { + return collect.get(0).getPipeline(); + } else { + return null; + } + } + + private static LengthInputStream getFromOmKeyInfo( + OmKeyInfo keyInfo, + XceiverClientFactory xceiverClientFactory, + boolean verifyChecksum, + Function retryFunction, + BlockInputStreamFactory blockStreamFactory, + List locationInfos) { + List streams = createStreams(keyInfo, + locationInfos, xceiverClientFactory, verifyChecksum, retryFunction, + blockStreamFactory); + KeyInputStream keyInputStream = + new KeyInputStream(keyInfo.getKeyName(), streams); + return new LengthInputStream(keyInputStream, keyInputStream.getLength()); } /** @@ -93,331 +128,60 @@ public static LengthInputStream getFromOmKeyInfo(OmKeyInfo keyInfo, XceiverClientFactory xceiverClientFactory, boolean verifyChecksum, Function retryFunction, BlockInputStreamFactory blockStreamFactory) { + List keyLocationInfos = keyInfo .getLatestVersionLocations().getBlocksLatestVersionOnly(); - KeyInputStream keyInputStream = new KeyInputStream(); - keyInputStream.initialize(keyInfo, keyLocationInfos, - xceiverClientFactory, verifyChecksum, retryFunction, - blockStreamFactory); - - return new LengthInputStream(keyInputStream, keyInputStream.length); + return getFromOmKeyInfo(keyInfo, xceiverClientFactory, verifyChecksum, + retryFunction, blockStreamFactory, keyLocationInfos); } public static List getStreamsFromKeyInfo(OmKeyInfo keyInfo, XceiverClientFactory xceiverClientFactory, boolean verifyChecksum, Function retryFunction, BlockInputStreamFactory blockStreamFactory) { + List keyLocationInfos = keyInfo .getLatestVersionLocations().getBlocksLatestVersionOnly(); - List lengthInputStreams = new ArrayList<>(); - // Iterate through each block info in keyLocationInfos and assign it the - // corresponding part in the partsToBlockMap. Also increment each part's - // length accordingly. - Map> partsToBlocksMap = new HashMap<>(); - Map partsLengthMap = new HashMap<>(); - - for (OmKeyLocationInfo omKeyLocationInfo: keyLocationInfos) { - int partNumber = omKeyLocationInfo.getPartNumber(); - - if (!partsToBlocksMap.containsKey(partNumber)) { - partsToBlocksMap.put(partNumber, new ArrayList<>()); - partsLengthMap.put(partNumber, 0L); - } - // Add Block to corresponding partNumber in partsToBlocksMap - partsToBlocksMap.get(partNumber).add(omKeyLocationInfo); - // Update the part length - partsLengthMap.put(partNumber, - partsLengthMap.get(partNumber) + omKeyLocationInfo.getLength()); - } + // corresponding part in the partsToBlockMap. + Map> partsToBlocksMap = + keyLocationInfos.stream() + .collect(groupingBy(BlockLocationInfo::getPartNumber)); + List lengthInputStreams = new ArrayList<>(); // Create a KeyInputStream for each part. - for (Map.Entry> entry : - partsToBlocksMap.entrySet()) { - KeyInputStream keyInputStream = new KeyInputStream(); - keyInputStream.initialize(keyInfo, entry.getValue(), - xceiverClientFactory, verifyChecksum, retryFunction, - blockStreamFactory); - lengthInputStreams.add(new LengthInputStream(keyInputStream, - partsLengthMap.get(entry.getKey()))); + for (List locationInfo : partsToBlocksMap.values()) { + lengthInputStreams.add(getFromOmKeyInfo(keyInfo, xceiverClientFactory, + verifyChecksum, retryFunction, blockStreamFactory, locationInfo)); } - return lengthInputStreams; } - private synchronized void initialize(OmKeyInfo keyInfo, - List blockInfos, - XceiverClientFactory xceiverClientFactory, - boolean verifyChecksum, Function retryFunction, - BlockInputStreamFactory blockStreamFactory) { - this.key = keyInfo.getKeyName(); - this.blockOffsets = new long[blockInfos.size()]; - long keyLength = 0; - for (int i = 0; i < blockInfos.size(); i++) { - OmKeyLocationInfo omKeyLocationInfo = blockInfos.get(i); - if (LOG.isDebugEnabled()) { - LOG.debug("Adding stream for accessing {}. The stream will be " + - "initialized later.", omKeyLocationInfo); - } - - // We also pass in functional reference which is used to refresh the - // pipeline info for a given OM Key location info. - addStream(keyInfo.getReplicationConfig(), omKeyLocationInfo, - xceiverClientFactory, - verifyChecksum, keyLocationInfo -> { - OmKeyInfo newKeyInfo = retryFunction.apply(keyInfo); - BlockID blockID = keyLocationInfo.getBlockID(); - List collect = - newKeyInfo.getLatestVersionLocations() - .getLocationList() - .stream() - .filter(l -> l.getBlockID().equals(blockID)) - .collect(Collectors.toList()); - if (CollectionUtils.isNotEmpty(collect)) { - return collect.get(0).getPipeline(); - } else { - return null; - } - }, blockStreamFactory); - - this.blockOffsets[i] = keyLength; - keyLength += omKeyLocationInfo.getLength(); - } - this.length = keyLength; - } - - /** - * Append another BlockInputStream to the end of the list. Note that the - * BlockInputStream is only created here and not initialized. The - * BlockInputStream is initialized when a read operation is performed on - * the block for the first time. - */ - private synchronized void addStream(ReplicationConfig repConfig, - OmKeyLocationInfo blockInfo, - XceiverClientFactory xceiverClientFactory, boolean verifyChecksum, - Function refreshPipelineFunction, - BlockInputStreamFactory blockStreamFactory) { - blockStreams.add(blockStreamFactory.create(repConfig, blockInfo, - blockInfo.getPipeline(), blockInfo.getToken(), - verifyChecksum, xceiverClientFactory, - blockID -> refreshPipelineFunction.apply(blockInfo))); - } - - @VisibleForTesting - public void addStream(BlockInputStream blockInputStream) { - blockStreams.add(blockInputStream); - } - - @VisibleForTesting - public void addStream(BlockExtendedInputStream blockInputStream) { - blockStreams.add(blockInputStream); - } - - /** - * {@inheritDoc} - */ @Override - public synchronized int read() throws IOException { - byte[] buf = new byte[1]; - if (read(buf, 0, 1) == EOF) { - return EOF; - } - return Byte.toUnsignedInt(buf[0]); - } - - /** - * {@inheritDoc} - */ - @Override - public synchronized int read(byte[] b, int off, int len) throws IOException { - ByteReaderStrategy strategy = new ByteArrayReader(b, off, len); - int bufferLen = strategy.getTargetLength(); - if (bufferLen == 0) { - return 0; - } - return readWithStrategy(strategy); - } - - @Override - public synchronized int read(ByteBuffer byteBuffer) throws IOException { - ByteReaderStrategy strategy = new ByteBufferReader(byteBuffer); - int bufferLen = strategy.getTargetLength(); - if (bufferLen == 0) { - return 0; - } - return readWithStrategy(strategy); - } - - @Override - protected synchronized int readWithStrategy(ByteReaderStrategy strategy) - throws IOException { - Preconditions.checkArgument(strategy != null); - checkOpen(); - - int buffLen = strategy.getTargetLength(); - int totalReadLen = 0; - while (buffLen > 0) { - // if we are at the last block and have read the entire block, return - if (blockStreams.size() == 0 || - (blockStreams.size() - 1 <= blockIndex && - blockStreams.get(blockIndex) - .getRemaining() == 0)) { - return totalReadLen == 0 ? EOF : totalReadLen; - } - - // Get the current blockStream and read data from it - BlockExtendedInputStream current = blockStreams.get(blockIndex); - int numBytesToRead = (int)Math.min(buffLen, current.getRemaining()); - int numBytesRead = strategy.readFromBlock(current, numBytesToRead); - if (numBytesRead != numBytesToRead) { - // This implies that there is either data loss or corruption in the - // chunk entries. Even EOF in the current stream would be covered in - // this case. - throw new IOException(String.format("Inconsistent read for blockID=%s " - + "length=%d numBytesToRead=%d numBytesRead=%d", - current.getBlockID(), current.getLength(), numBytesToRead, - numBytesRead)); - } - totalReadLen += numBytesRead; - buffLen -= numBytesRead; - if (current.getRemaining() <= 0 && - ((blockIndex + 1) < blockStreams.size())) { - blockIndex += 1; - } - } - return totalReadLen; - } - - /** - * Seeks the KeyInputStream to the specified position. This involves 2 steps: - * 1. Updating the blockIndex to the blockStream corresponding to the - * seeked position. - * 2. Seeking the corresponding blockStream to the adjusted position. - * - * For example, let’s say the block size is 200 bytes and block[0] stores - * data from indices 0 - 199, block[1] from indices 200 - 399 and so on. - * Let’s say we seek to position 240. In the first step, the blockIndex - * would be updated to 1 as indices 200 - 399 reside in blockStream[1]. In - * the second step, the blockStream[1] would be seeked to position 40 (= - * 240 - blockOffset[1] (= 200)). - */ - @Override - public synchronized void seek(long pos) throws IOException { - checkOpen(); - if (pos == 0 && length == 0) { - // It is possible for length and pos to be zero in which case - // seek should return instead of throwing exception - return; - } - if (pos < 0 || pos > length) { - throw new EOFException( - "EOF encountered at pos: " + pos + " for key: " + key); - } - - // 1. Update the blockIndex - if (blockIndex >= blockStreams.size()) { - blockIndex = Arrays.binarySearch(blockOffsets, pos); - } else if (pos < blockOffsets[blockIndex]) { - blockIndex = - Arrays.binarySearch(blockOffsets, 0, blockIndex, pos); - } else if (pos >= blockOffsets[blockIndex] + blockStreams - .get(blockIndex).getLength()) { - blockIndex = Arrays - .binarySearch(blockOffsets, blockIndex + 1, - blockStreams.size(), pos); - } - if (blockIndex < 0) { - // Binary search returns -insertionPoint - 1 if element is not present - // in the array. insertionPoint is the point at which element would be - // inserted in the sorted array. We need to adjust the blockIndex - // accordingly so that blockIndex = insertionPoint - 1 - blockIndex = -blockIndex - 2; - } - - // Reset the previous blockStream's position - blockStreams.get(blockIndexOfPrevPosition).seek(0); - - // Reset all the blockStreams above the blockIndex. We do this to reset - // any previous reads which might have updated the blockPosition and - // chunkIndex. - for (int index = blockIndex + 1; index < blockStreams.size(); index++) { - blockStreams.get(index).seek(0); - } - // 2. Seek the blockStream to the adjusted position - blockStreams.get(blockIndex).seek(pos - blockOffsets[blockIndex]); - blockIndexOfPrevPosition = blockIndex; - } - - @Override - public synchronized long getPos() throws IOException { - return length == 0 ? 0 : blockOffsets[blockIndex] + - blockStreams.get(blockIndex).getPos(); - } - - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - return false; - } - - @Override - public synchronized int available() throws IOException { - checkOpen(); - long remaining = length - getPos(); - return remaining <= Integer.MAX_VALUE ? (int) remaining : Integer.MAX_VALUE; - } - - @Override - public synchronized void close() throws IOException { - closed = true; - for (ExtendedInputStream blockStream : blockStreams) { - blockStream.close(); - } - } - - /** - * Verify that the input stream is open. Non blocking; this gives - * the last state of the volatile {@link #closed} field. - * @throws IOException if the connection is closed. - */ - private void checkOpen() throws IOException { - if (closed) { - throw new IOException( - ": " + FSExceptionMessages.STREAM_IS_CLOSED + " Key: " + key); - } - } - - @VisibleForTesting - public synchronized int getCurrentStreamIndex() { - return blockIndex; - } - - @VisibleForTesting - public long getRemainingOfIndex(int index) throws IOException { - return blockStreams.get(index).getRemaining(); + protected int getNumBytesToRead(ByteReaderStrategy strategy, + PartInputStream current) throws IOException { + return (int) Math.min(strategy.getTargetLength(), current.getRemaining()); } @Override - public synchronized long skip(long n) throws IOException { - if (n <= 0) { - return 0; + protected void checkPartBytesRead(int numBytesToRead, int numBytesRead, + PartInputStream stream) throws IOException { + if (numBytesRead != numBytesToRead) { + // This implies that there is either data loss or corruption in the + // chunk entries. Even EOF in the current stream would be covered in + // this case. + throw new IOException(String.format("Inconsistent read for blockID=%s " + + "length=%d position=%d numBytesToRead=%d numBytesRead=%d", + ((BlockExtendedInputStream) stream).getBlockID(), stream.getLength(), + stream.getPos(), numBytesToRead, numBytesRead)); } - - long toSkip = Math.min(n, length - getPos()); - seek(getPos() + toSkip); - return toSkip; } @Override - public synchronized void unbuffer() { - for (ExtendedInputStream is : blockStreams) { - is.unbuffer(); - } - } - @VisibleForTesting - public List getBlockStreams() { - return blockStreams; + public List getPartStreams() { + return (List) super.getPartStreams(); } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java index b60b59c39955..47cd76dac49b 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java @@ -27,6 +27,7 @@ import java.util.stream.Collectors; import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; @@ -62,7 +63,7 @@ * * TODO : currently not support multi-thread access. */ -public class KeyOutputStream extends OutputStream { +public class KeyOutputStream extends OutputStream implements Syncable { private OzoneClientConfig config; @@ -70,7 +71,7 @@ public class KeyOutputStream extends OutputStream { * Defines stream action while calling handleFlushOrClose. */ enum StreamAction { - FLUSH, CLOSE, FULL + FLUSH, HSYNC, CLOSE, FULL } public static final Logger LOG = @@ -440,6 +441,22 @@ public void flush() throws IOException { handleFlushOrClose(StreamAction.FLUSH); } + @Override + public void hflush() throws IOException { + hsync(); + } + + @Override + public void hsync() throws IOException { + checkNotClosed(); + handleFlushOrClose(StreamAction.HSYNC); + //TODO HDDS-7593: send hsyncKey to update length; + // where the hsyncKey op is similar to + // blockOutputStreamEntryPool.commitKey(offset) + // except that hsyncKey only updates the key length + // instead of committing it. + } + /** * Close or Flush the latest outputStream depending upon the action. * This function gets called when while write is going on, the current stream @@ -500,6 +517,9 @@ private void handleStreamAction(BlockOutputStreamEntry entry, case FLUSH: entry.flush(); break; + case HSYNC: + entry.hsync(); + break; default: throw new IOException("Invalid Operation"); } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneCryptoInputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneCryptoInputStream.java index 1d0cb2bb82a2..04b8d3487891 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneCryptoInputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneCryptoInputStream.java @@ -24,7 +24,7 @@ import org.apache.hadoop.crypto.CryptoCodec; import org.apache.hadoop.crypto.CryptoInputStream; import org.apache.hadoop.crypto.CryptoStreamUtils; -import org.apache.hadoop.fs.Seekable; +import org.apache.hadoop.hdds.scm.storage.PartInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,7 +33,7 @@ * Keys in Encrypted Buckets. */ public class OzoneCryptoInputStream extends CryptoInputStream - implements Seekable { + implements PartInputStream { private static final Logger LOG = LoggerFactory.getLogger(OzoneCryptoInputStream.class); @@ -66,6 +66,7 @@ public OzoneCryptoInputStream(LengthInputStream in, this.partIndex = partIndex; } + @Override public long getLength() { return length; } @@ -74,10 +75,6 @@ public int getBufferSize() { return bufferSize; } - public long getRemaining() throws IOException { - return length - getPos(); - } - @Override public int read(byte[] b, int off, int len) throws IOException { // CryptoInputStream reads hadoop.security.crypto.buffer.size number of diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneDataStreamOutput.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneDataStreamOutput.java new file mode 100644 index 000000000000..d40ac2b332ef --- /dev/null +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneDataStreamOutput.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.client.io; + +import org.apache.hadoop.hdds.scm.storage.ByteBufferStreamOutput; +import org.apache.hadoop.ozone.om.helpers.OmMultipartCommitUploadPartInfo; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * OzoneDataStreamOutput is used to write data into Ozone. + * It uses SCM's {@link KeyDataStreamOutput} for writing the data. + */ +public class OzoneDataStreamOutput implements ByteBufferStreamOutput { + + private final ByteBufferStreamOutput byteBufferStreamOutput; + + /** + * Constructs OzoneDataStreamOutput with KeyDataStreamOutput. + * + * @param byteBufferStreamOutput the underlying ByteBufferStreamOutput + */ + public OzoneDataStreamOutput(ByteBufferStreamOutput byteBufferStreamOutput) { + this.byteBufferStreamOutput = byteBufferStreamOutput; + } + + @Override + public void write(ByteBuffer b, int off, int len) throws IOException { + byteBufferStreamOutput.write(b, off, len); + } + + @Override + public synchronized void flush() throws IOException { + byteBufferStreamOutput.flush(); + } + + @Override + public synchronized void close() throws IOException { + //commitKey can be done here, if needed. + byteBufferStreamOutput.close(); + } + + public OmMultipartCommitUploadPartInfo getCommitUploadPartInfo() { + if (byteBufferStreamOutput instanceof KeyDataStreamOutput) { + return ((KeyDataStreamOutput) + byteBufferStreamOutput).getCommitUploadPartInfo(); + } + // Otherwise return null. + return null; + } + + public ByteBufferStreamOutput getByteBufStreamOutput() { + return byteBufferStreamOutput; + } +} diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneOutputStream.java index bf88b6fd38de..bda805a76bc7 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneOutputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneOutputStream.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.client.io; import org.apache.hadoop.crypto.CryptoOutputStream; +import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.ozone.om.helpers.OmMultipartCommitUploadPartInfo; import java.io.IOException; @@ -30,14 +31,20 @@ public class OzoneOutputStream extends OutputStream { private final OutputStream outputStream; + private final Syncable syncable; /** * Constructs OzoneOutputStream with KeyOutputStream. * - * @param outputStream + * @param syncable */ - public OzoneOutputStream(OutputStream outputStream) { + public OzoneOutputStream(Syncable syncable) { + this((OutputStream)syncable, syncable); + } + + public OzoneOutputStream(OutputStream outputStream, Syncable syncable) { this.outputStream = outputStream; + this.syncable = syncable; } @Override @@ -61,6 +68,20 @@ public synchronized void close() throws IOException { outputStream.close(); } + public void hsync() throws IOException { + if (syncable != null) { + if (outputStream != null && outputStream != syncable) { + outputStream.flush(); + } + syncable.hsync(); + } else if (outputStream instanceof Syncable) { + ((Syncable)outputStream).hsync(); + } else { + throw new UnsupportedOperationException(outputStream.getClass() + + " is not " + Syncable.class.getSimpleName()); + } + } + public OmMultipartCommitUploadPartInfo getCommitUploadPartInfo() { if (outputStream instanceof KeyOutputStream) { return ((KeyOutputStream) outputStream).getCommitUploadPartInfo(); diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java index 2f6e666fd0db..3554dc2fa474 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java @@ -40,6 +40,7 @@ import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.TenantArgs; import org.apache.hadoop.ozone.client.VolumeArgs; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.om.OMConfigKeys; @@ -331,6 +332,20 @@ OzoneOutputStream createKey(String volumeName, String bucketName, Map metadata) throws IOException; + /** + * Writes a key in an existing bucket. + * @param volumeName Name of the Volume + * @param bucketName Name of the Bucket + * @param keyName Name of the Key + * @param size Size of the data + * @param metadata custom key value metadata + * @return {@link OzoneDataStreamOutput} + * + */ + OzoneDataStreamOutput createStreamKey(String volumeName, String bucketName, + String keyName, long size, ReplicationConfig replicationConfig, + Map metadata) + throws IOException; /** * Reads a key from an existing bucket. @@ -500,6 +515,24 @@ OzoneOutputStream createMultipartKey(String volumeName, String bucketName, int partNumber, String uploadID) throws IOException; + /** + * Create a part key for a multipart upload key. + * @param volumeName + * @param bucketName + * @param keyName + * @param size + * @param partNumber + * @param uploadID + * @return OzoneDataStreamOutput + * @throws IOException + */ + OzoneDataStreamOutput createMultipartStreamKey(String volumeName, + String bucketName, + String keyName, long size, + int partNumber, + String uploadID) + throws IOException; + /** * Complete Multipart upload. This will combine all the parts and make the * key visible in ozone. @@ -817,6 +850,11 @@ OzoneOutputStream createFile(String volumeName, String bucketName, String keyName, long size, ReplicationConfig replicationConfig, boolean overWrite, boolean recursive) throws IOException; + @SuppressWarnings("checkstyle:parameternumber") + OzoneDataStreamOutput createStreamFile(String volumeName, String bucketName, + String keyName, long size, ReplicationConfig replicationConfig, + boolean overWrite, boolean recursive) throws IOException; + /** * List the status for a file or a directory and its contents. diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index 42aeba310dd9..74d6a0529aef 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -94,11 +94,13 @@ import org.apache.hadoop.ozone.client.io.BlockInputStreamFactory; import org.apache.hadoop.ozone.client.io.BlockInputStreamFactoryImpl; import org.apache.hadoop.ozone.client.io.ECKeyOutputStream; +import org.apache.hadoop.ozone.client.io.KeyDataStreamOutput; import org.apache.hadoop.ozone.client.io.KeyInputStream; import org.apache.hadoop.ozone.client.io.KeyOutputStream; import org.apache.hadoop.ozone.client.io.LengthInputStream; -import org.apache.hadoop.ozone.client.io.MultipartCryptoKeyInputStream; +import org.apache.hadoop.hdds.scm.storage.MultipartInputStream; import org.apache.hadoop.ozone.client.io.OzoneCryptoInputStream; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.client.protocol.ClientProtocol; @@ -1213,6 +1215,48 @@ public OzoneOutputStream createKey( return createOutputStream(openKey, requestId); } + @Override + public OzoneDataStreamOutput createStreamKey( + String volumeName, String bucketName, String keyName, long size, + ReplicationConfig replicationConfig, + Map metadata) + throws IOException { + verifyVolumeName(volumeName); + verifyBucketName(bucketName); + if (checkKeyNameEnabled) { + HddsClientUtils.verifyKeyName(keyName); + } + HddsClientUtils.checkNotNull(keyName, replicationConfig); + String requestId = UUID.randomUUID().toString(); + + OmKeyArgs.Builder builder = new OmKeyArgs.Builder() + .setVolumeName(volumeName) + .setBucketName(bucketName) + .setKeyName(keyName) + .setDataSize(size) + .setReplicationConfig(replicationConfig) + .addAllMetadata(metadata) + .setAcls(getAclList()); + + if (Boolean.parseBoolean(metadata.get(OzoneConsts.GDPR_FLAG))) { + try { + GDPRSymmetricKey gKey = new GDPRSymmetricKey(new SecureRandom()); + builder.addAllMetadata(gKey.getKeyDetails()); + } catch (Exception e) { + if (e instanceof InvalidKeyException && + e.getMessage().contains("Illegal key size or default parameters")) { + LOG.error("Missing Unlimited Strength Policy jars. Please install " + + "Java Cryptography Extension (JCE) Unlimited Strength " + + "Jurisdiction Policy Files"); + } + throw new IOException(e); + } + } + + OpenKeySession openKey = ozoneManagerClient.openKey(builder.build()); + return createDataStreamOutput(openKey, requestId, replicationConfig); + } + private KeyProvider.KeyVersion getDEK(FileEncryptionInfo feInfo) throws IOException { // check crypto protocol version @@ -1622,6 +1666,70 @@ public OzoneOutputStream createMultipartKey(String volumeName, } } + @Override + public OzoneDataStreamOutput createMultipartStreamKey( + String volumeName, + String bucketName, + String keyName, + long size, + int partNumber, + String uploadID) + throws IOException { + verifyVolumeName(volumeName); + verifyBucketName(bucketName); + if (checkKeyNameEnabled) { + HddsClientUtils.verifyKeyName(keyName); + } + HddsClientUtils.checkNotNull(keyName, uploadID); + Preconditions.checkArgument(partNumber > 0 && partNumber <= 10000, "Part " + + "number should be greater than zero and less than or equal to 10000"); + Preconditions.checkArgument(size >= 0, "size should be greater than or " + + "equal to zero"); + String requestId = UUID.randomUUID().toString(); + + OmKeyArgs keyArgs = new OmKeyArgs.Builder() + .setVolumeName(volumeName) + .setBucketName(bucketName) + .setKeyName(keyName) + .setDataSize(size) + .setIsMultipartKey(true) + .setMultipartUploadID(uploadID) + .setMultipartUploadPartNumber(partNumber) + .setAcls(getAclList()) + .build(); + + OpenKeySession openKey = ozoneManagerClient.openKey(keyArgs); + + KeyDataStreamOutput keyOutputStream = + new KeyDataStreamOutput.Builder() + .setHandler(openKey) + .setXceiverClientManager(xceiverClientManager) + .setOmClient(ozoneManagerClient) + .setRequestID(requestId) + .setReplicationConfig(openKey.getKeyInfo().getReplicationConfig()) + .setMultipartNumber(partNumber) + .setMultipartUploadID(uploadID) + .setIsMultipartKey(true) + .enableUnsafeByteBufferConversion(unsafeByteBufferConversion) + .setConfig(clientConfig) + .build(); + keyOutputStream + .addPreallocateBlocks( + openKey.getKeyInfo().getLatestVersionLocations(), + openKey.getOpenVersion()); + + FileEncryptionInfo feInfo = openKey.getKeyInfo().getFileEncryptionInfo(); + if (feInfo != null) { + // todo: need to support file encrypt, + // https://issues.apache.org/jira/browse/HDDS-5892 + throw new UnsupportedOperationException( + "FileEncryptionInfo is not yet supported in " + + "createMultipartStreamKey"); + } else { + return new OzoneDataStreamOutput(keyOutputStream); + } + } + @Override public OmMultipartUploadCompleteInfo completeMultipartUpload( String volumeName, String bucketName, String keyName, String uploadID, @@ -1833,6 +1941,25 @@ private OmKeyArgs prepareOmKeyArgs(String volumeName, String bucketName, .build(); } + @Override + public OzoneDataStreamOutput createStreamFile(String volumeName, + String bucketName, String keyName, long size, + ReplicationConfig replicationConfig, boolean overWrite, boolean recursive) + throws IOException { + OmKeyArgs keyArgs = new OmKeyArgs.Builder() + .setVolumeName(volumeName) + .setBucketName(bucketName) + .setKeyName(keyName) + .setDataSize(size) + .setReplicationConfig(replicationConfig) + .setAcls(getAclList()) + .setLatestVersionLocation(getLatestVersionLocation) + .build(); + OpenKeySession keySession = + ozoneManagerClient.createFile(keyArgs, overWrite, recursive); + return createDataStreamOutput(keySession, UUID.randomUUID().toString(), + replicationConfig); + } @Override public List listStatus(String volumeName, String bucketName, @@ -1960,10 +2087,28 @@ private OzoneInputStream createInputStream( keyInfo.getKeyName(), i); cryptoInputStreams.add(ozoneCryptoInputStream); } - return new MultipartCryptoKeyInputStream(keyInfo.getKeyName(), - cryptoInputStreams); + return new OzoneInputStream( + new MultipartInputStream(keyInfo.getKeyName(), cryptoInputStreams)); } } + private OzoneDataStreamOutput createDataStreamOutput(OpenKeySession openKey, + String requestId, ReplicationConfig replicationConfig) + throws IOException { + KeyDataStreamOutput keyOutputStream = + new KeyDataStreamOutput.Builder() + .setHandler(openKey) + .setXceiverClientManager(xceiverClientManager) + .setOmClient(ozoneManagerClient) + .setRequestID(requestId) + .setReplicationConfig(replicationConfig) + .enableUnsafeByteBufferConversion(unsafeByteBufferConversion) + .setConfig(clientConfig) + .build(); + keyOutputStream + .addPreallocateBlocks(openKey.getKeyInfo().getLatestVersionLocations(), + openKey.getOpenVersion()); + return new OzoneDataStreamOutput(keyOutputStream); + } private OzoneOutputStream createOutputStream(OpenKeySession openKey, String requestId) throws IOException { @@ -1995,7 +2140,8 @@ private OzoneOutputStream createOutputStream(OpenKeySession openKey, ); gk.getCipher().init(Cipher.ENCRYPT_MODE, gk.getSecretKey()); return new OzoneOutputStream( - new CipherOutputStream(keyOutputStream, gk.getCipher())); + new CipherOutputStream(keyOutputStream, gk.getCipher()), + keyOutputStream); } } catch (Exception ex) { throw new IOException(ex); diff --git a/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/TestOzoneECClient.java b/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/TestOzoneECClient.java index 208ad82d76f2..237ed34e5bca 100644 --- a/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/TestOzoneECClient.java +++ b/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/TestOzoneECClient.java @@ -45,6 +45,7 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.ozone.erasurecode.rawcoder.RSRawErasureCoderFactory; import org.apache.ozone.erasurecode.rawcoder.RawErasureEncoder; +import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.junit.After; import org.junit.Assert; @@ -610,29 +611,29 @@ public void test10D4PConfigWithPartialStripe() @Test public void testWriteShouldFailIfMoreThanParityNodesFail() - throws IOException { + throws Exception { testNodeFailuresWhileWriting(new int[] {0, 1, 2}, 3, 2); } @Test public void testWriteShouldSuccessIfLessThanParityNodesFail() - throws IOException { + throws Exception { testNodeFailuresWhileWriting(new int[] {0}, 2, 2); } @Test - public void testWriteShouldSuccessIf4NodesFailed() throws IOException { + public void testWriteShouldSuccessIf4NodesFailed() throws Exception { testNodeFailuresWhileWriting(new int[] {0, 1, 2, 3}, 1, 2); } @Test public void testWriteShouldSuccessWithAdditional1BlockGroupAfterFailure() - throws IOException { + throws Exception { testNodeFailuresWhileWriting(new int[] {0, 1, 2, 3}, 10, 3); } @Test - public void testStripeWriteRetriesOn2Failures() throws IOException { + public void testStripeWriteRetriesOn2Failures() throws Exception { OzoneConfiguration con = new OzoneConfiguration(); con.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 2, StorageUnit.KB); // Cluster has 15 nodes. So, first we will create 3 block groups with @@ -655,7 +656,7 @@ public void testStripeWriteRetriesOn2Failures() throws IOException { } @Test - public void testStripeWriteRetriesOn3Failures() throws IOException { + public void testStripeWriteRetriesOn3Failures() throws Exception { OzoneConfiguration con = new OzoneConfiguration(); con.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 2, StorageUnit.KB); @@ -677,9 +678,9 @@ public void testStripeWriteRetriesOn3Failures() throws IOException { } // The mocked impl throws IllegalStateException when there are not enough - // nodes in allocateBlock request. But write() converts it to IOException. - @Test(expected = IOException.class) - public void testStripeWriteRetriesOnAllNodeFailures() throws IOException { + // nodes in allocateBlock request. + @Test(expected = IllegalStateException.class) + public void testStripeWriteRetriesOnAllNodeFailures() throws Exception { OzoneConfiguration con = new OzoneConfiguration(); con.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 2, StorageUnit.KB); @@ -697,7 +698,7 @@ public void testStripeWriteRetriesOnAllNodeFailures() throws IOException { @Test public void testStripeWriteRetriesOn4FailuresWith3RetriesAllowed() - throws IOException { + throws Exception { OzoneConfiguration con = new OzoneConfiguration(); con.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 2, StorageUnit.KB); con.setInt(OzoneConfigKeys.OZONE_CLIENT_MAX_EC_STRIPE_WRITE_RETRIES, 3); @@ -726,7 +727,7 @@ public void testStripeWriteRetriesOn4FailuresWith3RetriesAllowed() } public void testStripeWriteRetriesOnFailures(OzoneConfiguration con, - int clusterSize, int[] nodesIndexesToMarkFailure) throws IOException { + int clusterSize, int[] nodesIndexesToMarkFailure) throws Exception { close(); MultiNodePipelineBlockAllocator blkAllocator = new MultiNodePipelineBlockAllocator(con, dataBlocks + parityBlocks, @@ -744,6 +745,7 @@ public void testStripeWriteRetriesOnFailures(OzoneConfiguration con, for (int i = 0; i < dataBlocks; i++) { out.write(inputChunks[i]); } + waitForFlushingThreadToFinish((ECKeyOutputStream) out.getOutputStream()); Assert.assertTrue( ((MockXceiverClientFactory) factoryStub).getStorages().size() == 5); List failedDNs = new ArrayList<>(); @@ -787,7 +789,7 @@ public void testStripeWriteRetriesOnFailures(OzoneConfiguration con, public void testNodeFailuresWhileWriting(int[] nodesIndexesToMarkFailure, int numChunksToWriteAfterFailure, int numExpectedBlockGrps) - throws IOException { + throws Exception { store.createVolume(volumeName); OzoneVolume volume = store.getVolume(volumeName); volume.createBucket(bucketName); @@ -800,6 +802,7 @@ public void testNodeFailuresWhileWriting(int[] nodesIndexesToMarkFailure, for (int i = 0; i < dataBlocks; i++) { out.write(inputChunks[i]); } + waitForFlushingThreadToFinish((ECKeyOutputStream) out.getOutputStream()); List failedDNs = new ArrayList<>(); List dns = allocator.getClusterDns(); @@ -841,22 +844,22 @@ public void testNodeFailuresWhileWriting(int[] nodesIndexesToMarkFailure, } @Test - public void testExcludeOnDNFailure() throws IOException { + public void testExcludeOnDNFailure() throws Exception { testExcludeFailedDN(IntStream.range(0, 5), IntStream.empty()); } @Test - public void testExcludeOnDNClosed() throws IOException { + public void testExcludeOnDNClosed() throws Exception { testExcludeFailedDN(IntStream.empty(), IntStream.range(0, 5)); } @Test - public void testExcludeOnDNMixed() throws IOException { + public void testExcludeOnDNMixed() throws Exception { testExcludeFailedDN(IntStream.range(0, 3), IntStream.range(3, 5)); } private void testExcludeFailedDN(IntStream failedDNIndex, - IntStream closedDNIndex) throws IOException { + IntStream closedDNIndex) throws Exception { close(); OzoneConfiguration con = new OzoneConfiguration(); MultiNodePipelineBlockAllocator blkAllocator = @@ -882,6 +885,7 @@ private void testExcludeFailedDN(IntStream failedDNIndex, for (int i = 0; i < dataBlocks; i++) { out.write(inputChunks[i % dataBlocks]); } + waitForFlushingThreadToFinish((ECKeyOutputStream) out.getOutputStream()); // Then let's mark datanodes with closed container List closedDNs = closedDNIndex @@ -899,6 +903,7 @@ private void testExcludeFailedDN(IntStream failedDNIndex, for (int i = 0; i < dataBlocks; i++) { out.write(inputChunks[i % dataBlocks]); } + waitForFlushingThreadToFinish((ECKeyOutputStream) out.getOutputStream()); // Assert excludeList only includes failedDNs Assert.assertArrayEquals(failedDNs.toArray(new DatanodeDetails[0]), @@ -909,7 +914,7 @@ private void testExcludeFailedDN(IntStream failedDNIndex, @Test public void testLargeWriteOfMultipleStripesWithStripeFailure() - throws IOException { + throws Exception { close(); OzoneConfiguration con = new OzoneConfiguration(); // block size of 3KB could hold 3 full stripes @@ -943,6 +948,7 @@ public void testLargeWriteOfMultipleStripesWithStripeFailure() out.write(inputChunks[i]); } } + waitForFlushingThreadToFinish((ECKeyOutputStream) out.getOutputStream()); List failedDNs = new ArrayList<>(); List dns = allocator.getClusterDns(); @@ -1055,7 +1061,7 @@ public void testPartialStripeWithPartialChunkRetry() @Test public void testDiscardPreAllocatedBlocksPreventRetryExceeds() - throws IOException { + throws Exception { close(); OzoneConfiguration con = new OzoneConfiguration(); int maxRetries = 3; @@ -1114,6 +1120,7 @@ public void testDiscardPreAllocatedBlocksPreventRetryExceeds() out.write(inputChunks[i]); } } + waitForFlushingThreadToFinish((ECKeyOutputStream) out.getOutputStream()); // Make the writes fail to trigger retry List failedDNs = new ArrayList<>(); @@ -1221,4 +1228,12 @@ private List getAllLocationInfoList( } return locationInfoList; } + + private static void waitForFlushingThreadToFinish( + ECKeyOutputStream ecOut) throws Exception { + final long checkpoint = System.currentTimeMillis(); + ecOut.insertFlushCheckpoint(checkpoint); + GenericTestUtils.waitFor(() -> ecOut.getFlushCheckpoint() == checkpoint, + 100, 10000); + } } diff --git a/hadoop-ozone/common/pom.xml b/hadoop-ozone/common/pom.xml index 9a1970bebeef..fa8e8f0c434f 100644 --- a/hadoop-ozone/common/pom.xml +++ b/hadoop-ozone/common/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-common - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Common Apache Ozone Common jar diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OFSPath.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OFSPath.java index 636bd1379d8e..3c6bd1fa1083 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OFSPath.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OFSPath.java @@ -21,6 +21,7 @@ import com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.http.ParseException; import org.apache.hadoop.hdds.annotation.InterfaceAudience; @@ -35,6 +36,8 @@ import java.util.StringTokenizer; import static org.apache.hadoop.fs.FileSystem.TRASH_PREFIX; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR_DEFAULT; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; @@ -63,16 +66,22 @@ public class OFSPath { private String bucketName = ""; private String mountName = ""; private String keyName = ""; + private OzoneConfiguration conf; private static final String OFS_MOUNT_NAME_TMP = "tmp"; // Hard-code the volume name to tmp for the first implementation @VisibleForTesting public static final String OFS_MOUNT_TMP_VOLUMENAME = "tmp"; + private static final String OFS_SHARED_TMP_BUCKETNAME = "tmp"; + // Hard-coded bucket name to use when OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR + // enabled; HDDS-7746 to make this name configurable. - public OFSPath(Path path) { + public OFSPath(Path path, OzoneConfiguration conf) { + this.conf = conf; initOFSPath(path.toUri(), false); } - public OFSPath(String pathStr) { + public OFSPath(String pathStr, OzoneConfiguration conf) { + this.conf = conf; if (StringUtils.isEmpty(pathStr)) { return; } @@ -102,7 +111,12 @@ private void initOFSPath(URI uri, boolean endsWithSlash) { // TODO: Make this configurable in the future. volumeName = OFS_MOUNT_TMP_VOLUMENAME; try { - bucketName = getTempMountBucketNameOfCurrentUser(); + if (conf.getBoolean(OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR, + OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR_DEFAULT)) { + bucketName = OFS_SHARED_TMP_BUCKETNAME; + } else { + bucketName = getTempMountBucketNameOfCurrentUser(); + } } catch (IOException ex) { throw new ParseException( "Failed to get temp bucket name for current user."); diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index e72630a32486..d4a843f3b065 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -405,4 +405,9 @@ private OMConfigKeys() { public static final TimeDuration OZONE_OM_CONTAINER_LOCATION_CACHE_TTL_DEFAULT = TimeDuration.valueOf(360, TimeUnit.MINUTES); + + public static final String OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR + = "ozone.om.enable.ofs.shared.tmp.dir"; + public static final boolean OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR_DEFAULT + = false; } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java index b038f894adc2..f5a4c1930c42 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java @@ -343,9 +343,10 @@ public static OmBucketArgs getFromProtobuf(BucketArgs bucketArgs) { bucketArgs.getOwnerName() : null); // OmBucketArgs ctor already has more arguments, so setting the default // replication config separately. - omBucketArgs.setDefaultReplicationConfig( - new DefaultReplicationConfig(bucketArgs.getDefaultReplicationConfig())); - + if (bucketArgs.hasDefaultReplicationConfig()) { + omBucketArgs.setDefaultReplicationConfig(new DefaultReplicationConfig( + bucketArgs.getDefaultReplicationConfig())); + } if (bucketArgs.hasQuotaInBytes()) { omBucketArgs.setQuotaInBytes(bucketArgs.getQuotaInBytes()); } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolPB.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolPB.java index ff12da1c26b2..9c76bf8238c4 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolPB.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolPB.java @@ -27,7 +27,7 @@ * Protocol used for communication between OMs. */ @ProtocolInfo(protocolName = - "org.apache.hadoop.ozone.om.protocol.OzoneManagerMetadataProtocol", + "org.apache.hadoop.ozone.om.protocol.OMAdminProtocol", protocolVersion = 1) @KerberosInfo( serverPrincipal = OMConfigKeys.OZONE_OM_KERBEROS_PRINCIPAL_KEY) diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketArgs.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketArgs.java index 2e843f84b2df..251033ac2727 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketArgs.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmBucketArgs.java @@ -18,9 +18,13 @@ package org.apache.hadoop.ozone.om.helpers; +import org.apache.hadoop.hdds.client.DefaultReplicationConfig; +import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.junit.Assert; import org.junit.Test; +import static org.apache.hadoop.hdds.client.ReplicationType.EC; + /** * Tests for the OmBucketArgs class. */ @@ -58,4 +62,30 @@ public void testQuotaIsSetFlagsAreCorrectlySet() { Assert.assertEquals(true, argsFromProto.hasQuotaInBytes()); Assert.assertEquals(true, argsFromProto.hasQuotaInNamespace()); } + + @Test + public void testDefaultReplicationConfigIsSetCorrectly() { + OmBucketArgs bucketArgs = OmBucketArgs.newBuilder() + .setBucketName("bucket") + .setVolumeName("volume") + .build(); + + OmBucketArgs argsFromProto = OmBucketArgs.getFromProtobuf( + bucketArgs.getProtobuf()); + + Assert.assertEquals(null, argsFromProto.getDefaultReplicationConfig()); + + bucketArgs = OmBucketArgs.newBuilder() + .setBucketName("bucket") + .setVolumeName("volume") + .setDefaultReplicationConfig(new DefaultReplicationConfig( + EC, new ECReplicationConfig(3, 2))) + .build(); + + argsFromProto = OmBucketArgs.getFromProtobuf( + bucketArgs.getProtobuf()); + + Assert.assertEquals(EC, + argsFromProto.getDefaultReplicationConfig().getType()); + } } diff --git a/hadoop-ozone/csi/pom.xml b/hadoop-ozone/csi/pom.xml index 80e0573a67ce..c989783c7b27 100644 --- a/hadoop-ozone/csi/pom.xml +++ b/hadoop-ozone/csi/pom.xml @@ -20,10 +20,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-csi - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone CSI service Apache Ozone CSI service jar diff --git a/hadoop-ozone/datanode/pom.xml b/hadoop-ozone/datanode/pom.xml index 13f30873698a..f671ded62a33 100644 --- a/hadoop-ozone/datanode/pom.xml +++ b/hadoop-ozone/datanode/pom.xml @@ -19,12 +19,12 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-datanode Apache Ozone Datanode jar - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT true diff --git a/hadoop-ozone/dev-support/checks/coverage.sh b/hadoop-ozone/dev-support/checks/coverage.sh index dee0db9e1256..c88fe1950b84 100755 --- a/hadoop-ozone/dev-support/checks/coverage.sh +++ b/hadoop-ozone/dev-support/checks/coverage.sh @@ -26,13 +26,15 @@ REPORT_DIR="$DIR/../../../target/coverage" mkdir -p "$REPORT_DIR" +JACOCO_VERSION=$(mvn help:evaluate -Dexpression=jacoco.version -q -DforceStdout) + #Install jacoco cli mvn --non-recursive --no-transfer-progress \ org.apache.maven.plugins:maven-dependency-plugin:3.1.2:copy \ - -Dartifact=org.jacoco:org.jacoco.cli:0.8.5:jar:nodeps + -Dartifact=org.jacoco:org.jacoco.cli:${JACOCO_VERSION}:jar:nodeps jacoco() { - java -jar target/dependency/org.jacoco.cli-0.8.5-nodeps.jar "$@" + java -jar target/dependency/org.jacoco.cli-${JACOCO_VERSION}-nodeps.jar "$@" } #Merge all the jacoco.exec files diff --git a/hadoop-ozone/dev-support/intellij/ozone-site-ha.xml b/hadoop-ozone/dev-support/intellij/ozone-site-ha.xml new file mode 100644 index 000000000000..ff7883fc55f8 --- /dev/null +++ b/hadoop-ozone/dev-support/intellij/ozone-site-ha.xml @@ -0,0 +1,174 @@ + + + + hdds.profiler.endpoint.enabled + true + + + ozone.scm.block.client.address + localhost + + + ozone.csi.owner + hadoop + + + ozone.csi.socket + /tmp/csi.sock + + + ozone.scm.client.address + localhost + + + ozone.metadata.dirs + /tmp/metadata + + + ozone.scm.ratis.enable + true + + + ozone.scm.service.ids + scm-group + + + ozone.scm.nodes.scm-group + scm1,scm2,scm3 + + + ozone.scm.address.scm-group.scm1 + localhost + + + ozone.scm.address.scm-group.scm2 + localhost + + + ozone.scm.address.scm-group.scm3 + localhost + + + ozone.scm.client.port.scm-group.scm1 + 19860 + + + ozone.scm.client.port.scm-group.scm2 + 29860 + + + ozone.scm.client.port.scm-group.scm3 + 39860 + + + ozone.scm.datanode.port.scm-group.scm1 + 19861 + + + ozone.scm.datanode.port.scm-group.scm2 + 29861 + + + ozone.scm.datanode.port.scm-group.scm3 + 39861 + + + ozone.scm.block.client.port.scm-group.scm1 + 19863 + + + ozone.scm.block.client.port.scm-group.scm2 + 29863 + + + ozone.scm.block.client.port.scm-group.scm3 + 39863 + + + ozone.scm.security.service.port.scm-group.scm1 + 19961 + + + ozone.scm.security.service.port.scm-group.scm2 + 29961 + + + ozone.scm.security.service.port.scm-group.scm3 + 39961 + + + ozone.scm.ratis.port.scm-group.scm1 + 19894 + + + ozone.scm.ratis.port.scm-group.scm2 + 29894 + + + ozone.scm.ratis.port.scm-group.scm3 + 39894 + + + ozone.scm.grpc.port.scm-group.scm1 + 19895 + + + ozone.scm.grpc.port.scm-group.scm2 + 29895 + + + ozone.scm.grpc.port.scm-group.scm3 + 39895 + + + + ozone.om.address + localhost + + + ozone.scm.container.size + 1G + + + hdds.datanode.storage.utilization.critical.threshold + 0.99 + + + hdds.prometheus.endpoint.enabled + true + + + + ozone.recon.address + localhost:9891 + + + ozone.recon.db.dir + /tmp/recon + + + + datanode.replication.port + 0 + + + + ozone.security.enabled + false + + \ No newline at end of file diff --git a/hadoop-ozone/dev-support/intellij/ozone-site.xml b/hadoop-ozone/dev-support/intellij/ozone-site.xml index 4eed6fd84edb..2024fcf9490a 100644 --- a/hadoop-ozone/dev-support/intellij/ozone-site.xml +++ b/hadoop-ozone/dev-support/intellij/ozone-site.xml @@ -71,4 +71,24 @@ datanode.replication.port 0 + + ozone.scm.ratis.enable + false + + + hdds.container.report.interval + 60m + + + hdds.recon.heartbeat.interval + 60s + + + ozone.scm.stale.node.interval + 5m + + + ozone.scm.dead.node.interval + 10m + \ No newline at end of file diff --git a/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode1-ha.xml b/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode1-ha.xml new file mode 100644 index 000000000000..ad1e735d6b4c --- /dev/null +++ b/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode1-ha.xml @@ -0,0 +1,33 @@ + + + + + diff --git a/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode2-ha.xml b/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode2-ha.xml new file mode 100644 index 000000000000..a4edccfcee69 --- /dev/null +++ b/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode2-ha.xml @@ -0,0 +1,33 @@ + + + + + diff --git a/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode2.xml b/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode2.xml index 3d3302030d18..040b515b9fac 100644 --- a/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode2.xml +++ b/hadoop-ozone/dev-support/intellij/runConfigurations/Datanode2.xml @@ -18,7 +18,7 @@

AspectJTM - Compiler and Core Tools License

- -

This is a binary-only release.  Source code -is available from -http://eclipse.org/aspectj

- -

The Eclipse Foundation makes available all content in this distribution ("Content"). - Unless otherwise indicated below, the Content is provided to you under the terms and conditions of the - Eclipse Public License Version 1.0 ("EPL"). A copy of the EPL is available - at http://www.eclipse.org/legal/epl-v10.html. - For purposes of the EPL, "Program" will mean the Content.

- -

If you did not receive this Content directly from the Eclipse Foundation, the Content is - being redistributed by another party ("Redistributor") and different terms and conditions may - apply to your use of any object code in the Content. Check the Redistributor's license - that was provided with the Content. If no such license exists, contact the Redistributor. Unless otherwise - indicated below, the terms and conditions of the EPL still apply to any source code in the Content - and such source code may be obtained at http://www.eclipse.org.

- - -

Third Party Content

-

The Content includes items that have been sourced from third parties as set out below. If you - did not receive this Content directly from the Eclipse Foundation, the following is provided - for informational purposes only, and you should look to the Redistributor's license for - terms and conditions of use.

- - -

BCEL v5.1

-

This product contains software developed by the - Apache Software Foundation (http://www.apache.org).

- -

AspectJ includes a modified version of the Apache Jakarta Byte Code Engineering Library (BCEL) v5.1. - BCEL is available at https://commons.apache.org/bcel/. Source - code for the modified version of BCEL is available at Eclipse.org in the AspectJ source tree. This code - is made available under the Apache Software License v1.1

- -

ASM v2.2.1

-

AspectJ includes a binary version of ASM v2.2.1 (http://asm.objectweb.org/) - The source code for ASM is available from the ObjectWeb download site at - http://asm.objectweb.org/download/. -

The ASM license is available at http://asm.objectweb.org/license.html. - The license is also reproduced here: -

- -
Copyright (c) 2000-2005 INRIA, France Telecom
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holders nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
-
- -
- - - - diff --git a/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.aspectj.txt b/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.aspectj.txt new file mode 100644 index 000000000000..694f28853edb --- /dev/null +++ b/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.aspectj.txt @@ -0,0 +1,279 @@ +Per: https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt + +Eclipse Public License - v 2.0 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE +PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION +OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + +a) in the case of the initial Contributor, the initial content +Distributed under this Agreement, and + +b) in the case of each subsequent Contributor: +i) changes to the Program, and +ii) additions to the Program; +where such changes and/or additions to the Program originate from +and are Distributed by that particular Contributor. A Contribution +"originates" from a Contributor if it was added to the Program by +such Contributor itself or anyone acting on such Contributor's behalf. +Contributions do not include changes or additions to the Program that +are not Modified Works. + +"Contributor" means any person or entity that Distributes the Program. + +"Licensed Patents" mean patent claims licensable by a Contributor which +are necessarily infringed by the use or sale of its Contribution alone +or when combined with the Program. + +"Program" means the Contributions Distributed in accordance with this +Agreement. + +"Recipient" means anyone who receives the Program under this Agreement +or any Secondary License (as applicable), including Contributors. + +"Derivative Works" shall mean any work, whether in Source Code or other +form, that is based on (or derived from) the Program and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. + +"Modified Works" shall mean any work in Source Code or other form that +results from an addition to, deletion from, or modification of the +contents of the Program, including, for purposes of clarity any new file +in Source Code form that contains any contents of the Program. Modified +Works shall not include works that contain only declarations, +interfaces, types, classes, structures, or files of the Program solely +in each case in order to link to, bind by name, or subclass the Program +or Modified Works thereof. + +"Distribute" means the acts of a) distributing or b) making available +in any manner that enables the transfer of a copy. + +"Source Code" means the form of a Program preferred for making +modifications, including but not limited to software source code, +documentation source, and configuration files. + +"Secondary License" means either the GNU General Public License, +Version 2.0, or any later versions of that license, including any +exceptions or additional permissions as identified by the initial +Contributor. + +2. GRANT OF RIGHTS + +a) Subject to the terms of this Agreement, each Contributor hereby +grants Recipient a non-exclusive, worldwide, royalty-free copyright +license to reproduce, prepare Derivative Works of, publicly display, +publicly perform, Distribute and sublicense the Contribution of such +Contributor, if any, and such Derivative Works. + +b) Subject to the terms of this Agreement, each Contributor hereby +grants Recipient a non-exclusive, worldwide, royalty-free patent +license under Licensed Patents to make, use, sell, offer to sell, +import and otherwise transfer the Contribution of such Contributor, +if any, in Source Code or other form. This patent license shall +apply to the combination of the Contribution and the Program if, at +the time the Contribution is added by the Contributor, such addition +of the Contribution causes such combination to be covered by the +Licensed Patents. The patent license shall not apply to any other +combinations which include the Contribution. No hardware per se is +licensed hereunder. + +c) Recipient understands that although each Contributor grants the +licenses to its Contributions set forth herein, no assurances are +provided by any Contributor that the Program does not infringe the +patent or other intellectual property rights of any other entity. +Each Contributor disclaims any liability to Recipient for claims +brought by any other entity based on infringement of intellectual +property rights or otherwise. As a condition to exercising the +rights and licenses granted hereunder, each Recipient hereby +assumes sole responsibility to secure any other intellectual +property rights needed, if any. For example, if a third party +patent license is required to allow Recipient to Distribute the +Program, it is Recipient's responsibility to acquire that license +before distributing the Program. + +d) Each Contributor represents that to its knowledge it has +sufficient copyright rights in its Contribution, if any, to grant +the copyright license set forth in this Agreement. + +e) Notwithstanding the terms of any Secondary License, no +Contributor makes additional grants to any Recipient (other than +those set forth in this Agreement) as a result of such Recipient's +receipt of the Program under the terms of a Secondary License +(if permitted under the terms of Section 3). + +3. REQUIREMENTS + +3.1 If a Contributor Distributes the Program in any form, then: + +a) the Program must also be made available as Source Code, in +accordance with section 3.2, and the Contributor must accompany +the Program with a statement that the Source Code for the Program +is available under this Agreement, and informs Recipients how to +obtain it in a reasonable manner on or through a medium customarily +used for software exchange; and + +b) the Contributor may Distribute the Program under a license +different than this Agreement, provided that such license: +i) effectively disclaims on behalf of all other Contributors all +warranties and conditions, express and implied, including +warranties or conditions of title and non-infringement, and +implied warranties or conditions of merchantability and fitness +for a particular purpose; + +ii) effectively excludes on behalf of all other Contributors all +liability for damages, including direct, indirect, special, +incidental and consequential damages, such as lost profits; + +iii) does not attempt to limit or alter the recipients' rights +in the Source Code under section 3.2; and + +iv) requires any subsequent distribution of the Program by any +party to be under a license that satisfies the requirements +of this section 3. + +3.2 When the Program is Distributed as Source Code: + +a) it must be made available under this Agreement, or if the +Program (i) is combined with other material in a separate file or +files made available under a Secondary License, and (ii) the initial +Contributor attached to the Source Code the notice described in +Exhibit A of this Agreement, then the Program may be made available +under the terms of such Secondary Licenses, and + +b) a copy of this Agreement must be included with each copy of +the Program. + +3.3 Contributors may not remove or alter any copyright, patent, +trademark, attribution notices, disclaimers of warranty, or limitations +of liability ("notices") contained within the Program from any copy of +the Program which they Distribute, provided that Contributors may add +their own appropriate notices. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities +with respect to end users, business partners and the like. While this +license is intended to facilitate the commercial use of the Program, +the Contributor who includes the Program in a commercial product +offering should do so in a manner which does not create potential +liability for other Contributors. Therefore, if a Contributor includes +the Program in a commercial product offering, such Contributor +("Commercial Contributor") hereby agrees to defend and indemnify every +other Contributor ("Indemnified Contributor") against any losses, +damages and costs (collectively "Losses") arising from claims, lawsuits +and other legal actions brought by a third party against the Indemnified +Contributor to the extent caused by the acts or omissions of such +Commercial Contributor in connection with its distribution of the Program +in a commercial product offering. The obligations in this section do not +apply to any claims or Losses relating to any actual or alleged +intellectual property infringement. In order to qualify, an Indemnified +Contributor must: a) promptly notify the Commercial Contributor in +writing of such claim, and b) allow the Commercial Contributor to control, +and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may +participate in any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial +product offering, Product X. That Contributor is then a Commercial +Contributor. If that Commercial Contributor then makes performance +claims, or offers warranties related to Product X, those performance +claims and warranties are such Commercial Contributor's responsibility +alone. Under this section, the Commercial Contributor would have to +defend claims against the other Contributors related to those performance +claims and warranties, and if a court requires any other Contributor to +pay any damages as a result, the Commercial Contributor must pay +those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" +BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF +TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR +PURPOSE. Each Recipient is solely responsible for determining the +appropriateness of using and distributing the Program and assumes all +risks associated with its exercise of rights under this Agreement, +including but not limited to the risks and costs of program errors, +compliance with applicable laws, damage to or loss of data, programs +or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS +SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE +EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under +applicable law, it shall not affect the validity or enforceability of +the remainder of the terms of this Agreement, and without further +action by the parties hereto, such provision shall be reformed to the +minimum extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the +Program itself (excluding combinations of the Program with other software +or hardware) infringes such Recipient's patent(s), then such Recipient's +rights granted under Section 2(b) shall terminate as of the date such +litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it +fails to comply with any of the material terms or conditions of this +Agreement and does not cure such failure in a reasonable period of +time after becoming aware of such noncompliance. If all Recipient's +rights under this Agreement terminate, Recipient agrees to cease use +and distribution of the Program as soon as reasonably practicable. +However, Recipient's obligations under this Agreement and any licenses +granted by Recipient relating to the Program shall continue and survive. + +Everyone is permitted to copy and distribute copies of this Agreement, +but in order to avoid inconsistency the Agreement is copyrighted and +may only be modified in the following manner. The Agreement Steward +reserves the right to publish new versions (including revisions) of +this Agreement from time to time. No one other than the Agreement +Steward has the right to modify this Agreement. The Eclipse Foundation +is the initial Agreement Steward. The Eclipse Foundation may assign the +responsibility to serve as the Agreement Steward to a suitable separate +entity. Each new version of the Agreement will be given a distinguishing +version number. The Program (including Contributions) may always be +Distributed subject to the version of the Agreement under which it was +received. In addition, after a new version of the Agreement is published, +Contributor may elect to Distribute the Program (including its +Contributions) under the new version. + +Except as expressly stated in Sections 2(a) and 2(b) above, Recipient +receives no rights or licenses to the intellectual property of any +Contributor under this Agreement, whether expressly, by implication, +estoppel or otherwise. All rights in the Program not expressly granted +under this Agreement are reserved. Nothing in this Agreement is intended +to be enforceable by any entity that is not a Contributor or Recipient. +No third-party beneficiary rights are created under this Agreement. + +Exhibit A - Form of Secondary Licenses Notice + +"This Source Code may also be made available under the following +Secondary Licenses when the conditions for such availability set forth +in the Eclipse Public License, v. 2.0 are satisfied: {name license(s), +version(s), and exceptions or additional permissions here}." + +Simply including a copy of this Agreement, including this Exhibit A +is not sufficient to license the Source Code under Secondary Licenses. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to +look for such a notice. + +You may add additional accurate notices of copyright ownership. \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.ow2.asm-asm.txt b/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.ow2.asm-asm.txt index 4d191851af43..8806c7d030f4 100644 --- a/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.ow2.asm-asm.txt +++ b/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.ow2.asm-asm.txt @@ -1,4 +1,3 @@ - ASM: a very small and fast Java bytecode manipulation framework Copyright (c) 2000-2011 INRIA, France Telecom All rights reserved. diff --git a/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.slf4j.txt b/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.slf4j.txt index 744377c4372c..f687729a0b45 100644 --- a/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.slf4j.txt +++ b/hadoop-ozone/dist/src/main/license/bin/licenses/LICENSE-org.slf4j.txt @@ -1,4 +1,4 @@ -Copyright (c) 2004-2017 QOS.ch +Copyright (c) 2004-2022 QOS.ch Sarl (Switzerland) All rights reserved. Permission is hereby granted, free of charge, to any person obtaining diff --git a/hadoop-ozone/dist/src/main/license/bin/licenses/NOTICE-ratis-thirtparty-misc.txt b/hadoop-ozone/dist/src/main/license/bin/licenses/NOTICE-ratis-thirdparty-misc.txt similarity index 94% rename from hadoop-ozone/dist/src/main/license/bin/licenses/NOTICE-ratis-thirtparty-misc.txt rename to hadoop-ozone/dist/src/main/license/bin/licenses/NOTICE-ratis-thirdparty-misc.txt index 7e3cbd6fba96..c47cb75fe255 100644 --- a/hadoop-ozone/dist/src/main/license/bin/licenses/NOTICE-ratis-thirtparty-misc.txt +++ b/hadoop-ozone/dist/src/main/license/bin/licenses/NOTICE-ratis-thirdparty-misc.txt @@ -1,5 +1,5 @@ Apache Ratis -Copyright 2017-2019 The Apache Software Foundation +Copyright 2017-2022 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). @@ -316,25 +316,3 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------- -This product uses the dropwizard-hadoop-metrics2. - -Copyright 2016 Josh Elser - -Licensed under the Apache License v2.0 - ------------------------------------------------------------------------ -This product uses https://github.com/mbocek/docker-ganglia/ - -Contributed by Michal Bocek - -Licensed under the Apache License v2.0 -https://github.com/mbocek/docker-ganglia/blob/master/LICENSE - ------------------------------------------------------------------------ -This product uses https://github.com/graphite-project/docker-graphite-statsd - -Copyright (c) 2013-2016 Nathan Hopkins - -Licensed under the MIT License - --- diff --git a/hadoop-ozone/dist/src/main/license/jar-report.txt b/hadoop-ozone/dist/src/main/license/jar-report.txt index 0e900808c1fb..c2561d889976 100644 --- a/hadoop-ozone/dist/src/main/license/jar-report.txt +++ b/hadoop-ozone/dist/src/main/license/jar-report.txt @@ -265,3 +265,4 @@ share/ozone/lib/token-provider.jar share/ozone/lib/txw2.jar share/ozone/lib/weld-servlet.Final.jar share/ozone/lib/woodstox-core.jar +share/ozone/lib/zstd-jni.jar diff --git a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-angular.txt b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-angular.txt index 6f3880f4c290..4cb2ee959e54 100644 --- a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-angular.txt +++ b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-angular.txt @@ -1,6 +1,6 @@ The MIT License -Copyright (c) 2010-2017 Google, Inc. http://angularjs.org +Copyright (c) 2010-2020 Google, Inc. http://angularjs.org Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-bootstrap.txt b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-bootstrap.txt new file mode 100644 index 000000000000..e2640928b3e8 --- /dev/null +++ b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-bootstrap.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2011-2019 Twitter, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-d3.txt b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-d3.txt index c71e3f254c06..ff3f2e5419a8 100644 --- a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-d3.txt +++ b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-d3.txt @@ -1,4 +1,4 @@ -Copyright (c) 2010-2015, Michael Bostock +Copyright (c) 2010-2016, Michael Bostock All rights reserved. Redistribution and use in source and binary forms, with or without @@ -23,4 +23,4 @@ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-glyphicons.txt b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-glyphicons.txt new file mode 100644 index 000000000000..aba0312db84f --- /dev/null +++ b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-glyphicons.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2011-2019 GLYPHICONS.com. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-guava.txt b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-guava.txt new file mode 100644 index 000000000000..a6886f3cc6c3 --- /dev/null +++ b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-guava.txt @@ -0,0 +1,13 @@ +Copyright (C) 2007 The Guava Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-jquery.txt b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-jquery.txt index 45930542204f..e3dbacb999ce 100644 --- a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-jquery.txt +++ b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-jquery.txt @@ -17,4 +17,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-nvd3.txt b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-nvd3.txt index 0955544cdf32..ec98d9434d5b 100644 --- a/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-nvd3.txt +++ b/hadoop-ozone/dist/src/main/license/src/licenses/LICENSE-nvd3.txt @@ -1,10 +1,13 @@ Copyright (c) 2011-2014 Novus Partners, Inc. -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this -file except in compliance with the License. You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software distributed under the - License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/hadoop-ozone/dist/src/main/license/update-jar-report.sh b/hadoop-ozone/dist/src/main/license/update-jar-report.sh index dfd68a264dc6..6cc6238f4040 100755 --- a/hadoop-ozone/dist/src/main/license/update-jar-report.sh +++ b/hadoop-ozone/dist/src/main/license/update-jar-report.sh @@ -30,4 +30,4 @@ cd "$OZONE_DIST_DIR" #sed expression removes the version. Usually license is not changed with version bumps #jacoco and test dependencies are excluded -find . -type f -name "*.jar" | cut -c3- | perl -wpl -e 's/-[0-9]+(.[0-9]+)*(-([0-9a-z]+-)?SNAPSHOT)?+//g; s/\.v\d+\.jar/.jar/g;' | grep -v -e jacoco -e hdds-test-utils | sort > "$SCRIPTDIR"/$REPORT_NAME +find . -type f -name "*.jar" | cut -c3- | perl -wpl -e 's/-[0-9]+(\.[0-9]+)*(-([0-9a-z]+-)?SNAPSHOT)?+//g; s/\.v\d+\.jar/.jar/g;' | grep -v -e jacoco -e hdds-test-utils | sort > "$SCRIPTDIR"/$REPORT_NAME diff --git a/hadoop-ozone/dist/src/main/smoketest/createbucketenv.robot b/hadoop-ozone/dist/src/main/smoketest/createbucketenv.robot index 8a79fd2fca47..1042f520b61c 100644 --- a/hadoop-ozone/dist/src/main/smoketest/createbucketenv.robot +++ b/hadoop-ozone/dist/src/main/smoketest/createbucketenv.robot @@ -30,7 +30,7 @@ Create volume ${result} = Execute ozone sh volume create /${volume} --user hadoop --space-quota 100TB --namespace-quota 100 Should not contain ${result} Failed Create bucket - Execute ozone sh bucket create /${volume}/${bucket} + Execute ozone sh bucket create /${volume}/${bucket} --space-quota 1TB *** Test Cases *** Test ozone shell diff --git a/hadoop-ozone/dist/src/main/smoketest/createmrenv.robot b/hadoop-ozone/dist/src/main/smoketest/createmrenv.robot index 79de78c8b224..d3049bd5ae8e 100644 --- a/hadoop-ozone/dist/src/main/smoketest/createmrenv.robot +++ b/hadoop-ozone/dist/src/main/smoketest/createmrenv.robot @@ -32,7 +32,7 @@ Create volume ${result} = Execute ozone sh volume create /${volume} --user hadoop --space-quota 100TB --namespace-quota 100 Should not contain ${result} Failed Create bucket - Execute ozone sh bucket create /${volume}/${bucket} --layout FILE_SYSTEM_OPTIMIZED + Execute ozone sh bucket create /${volume}/${bucket} --space-quota 1TB --layout FILE_SYSTEM_OPTIMIZED *** Test Cases *** Create test volume, bucket and key diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot index f41635724134..c26e163a051b 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot @@ -31,7 +31,7 @@ ${TESTFILE} testfile *** Keywords *** Write keys Execute ozone sh volume create o3://om/${VOLUME} --space-quota 100TB --namespace-quota 100 - Execute ozone sh bucket create o3://om/${VOLUME}/${BUCKET} + Execute ozone sh bucket create o3://om/${VOLUME}/${BUCKET} --space-quota 1TB Execute dd if=/dev/urandom of=${TESTFILE} bs=100000 count=15 Execute ozone sh key put o3://om/${VOLUME}/${BUCKET}/${TESTFILE} ${TESTFILE} diff --git a/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot b/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot index 4487fa998dba..1078d955405a 100644 --- a/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot +++ b/hadoop-ozone/dist/src/main/smoketest/gdpr/gdpr.robot @@ -47,7 +47,7 @@ Test GDPR -g=false Test GDPR(disabled) without explicit options [arguments] ${volume} Execute ozone sh volume create /${volume} --space-quota 100TB --namespace-quota 100 - Execute ozone sh bucket create /${volume}/mybucket1 + Execute ozone sh bucket create /${volume}/mybucket1 --space-quota 1TB ${result} = Execute ozone sh bucket info /${volume}/mybucket1 | jq -r '. | select(.name=="mybucket1") | .metadata | .gdprEnabled' Should Be Equal ${result} null Execute ozone sh key put /${volume}/mybucket1/mykey /opt/hadoop/NOTICE.txt @@ -58,7 +58,7 @@ Test GDPR(disabled) without explicit options Test GDPR with --enforcegdpr=true [arguments] ${volume} - Execute ozone sh bucket create --enforcegdpr=true /${volume}/mybucket2 + Execute ozone sh bucket create --enforcegdpr=true /${volume}/mybucket2 --space-quota 1TB ${result} = Execute ozone sh bucket info /${volume}/mybucket2 | jq -r '. | select(.name=="mybucket2") | .metadata | .gdprEnabled' Should Be Equal ${result} true Execute ozone sh key put /${volume}/mybucket2/mykey /opt/hadoop/NOTICE.txt @@ -69,7 +69,7 @@ Test GDPR with --enforcegdpr=true Test GDPR with -g=true [arguments] ${volume} - Execute ozone sh bucket create -g=true /${volume}/mybucket3 + Execute ozone sh bucket create -g=true /${volume}/mybucket3 --space-quota 1TB ${result} = Execute ozone sh bucket info /${volume}/mybucket3 | jq -r '. | select(.name=="mybucket3") | .metadata | .gdprEnabled' Should Be Equal ${result} true Execute ozone sh key put /${volume}/mybucket3/mykey /opt/hadoop/NOTICE.txt @@ -80,7 +80,7 @@ Test GDPR with -g=true Test GDPR with -g=false [arguments] ${volume} - Execute ozone sh bucket create /${volume}/mybucket4 + Execute ozone sh bucket create /${volume}/mybucket4 --space-quota 1TB ${result} = Execute ozone sh bucket info /${volume}/mybucket4 | jq -r '. | select(.name=="mybucket4") | .metadata | .gdprEnabled' Should Be Equal ${result} null Execute ozone sh key put /${volume}/mybucket4/mykey /opt/hadoop/NOTICE.txt diff --git a/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot b/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot index 514e0e1dc941..d22a167d80da 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs.robot @@ -70,10 +70,18 @@ Check disk usage after create a file which uses RATIS replication type Should contain ${result} ${expectedDiskUsage} +Put with Streaming + ${result} = Execute ozone fs -D ozone.fs.datastream.enabled=true -put NOTICE.txt ${DEEP_URL}/STREAMING.txt + Should Be Empty ${result} + ${result} = Execute ozone sh key list ${VOLUME}/${BUCKET} | jq -r '.[].name' + Should contain ${result} STREAMING.txt + + List ${result} = Execute ozone fs -ls ${DEEP_URL}/ Should contain ${result} NOTICE.txt Should contain ${result} PUTFILE.txt + Should contain ${result} STREAMING.txt Move Execute ozone fs -mv ${DEEP_URL}/NOTICE.txt ${DEEP_URL}/MOVED.TXT diff --git a/hadoop-ozone/dist/src/main/smoketest/ozonefs/setup.robot b/hadoop-ozone/dist/src/main/smoketest/ozonefs/setup.robot index eb94bd033d5b..e7aca825f35a 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozonefs/setup.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozonefs/setup.robot @@ -41,16 +41,16 @@ Create volumes for FS test Execute And Ignore Error ozone sh volume create ${VOL2} --space-quota 100TB Create buckets for FS test - Execute ozone sh bucket create ${VOLUME}/${BUCKET} --layout FILE_SYSTEM_OPTIMIZED - Execute ozone sh bucket create ${VOLUME}/${BUCKET2} --layout FILE_SYSTEM_OPTIMIZED - Execute ozone sh bucket create ${VOL2}/${BUCKET_IN_VOL2} --layout FILE_SYSTEM_OPTIMIZED + Execute ozone sh bucket create ${VOLUME}/${BUCKET} --space-quota 1TB --layout FILE_SYSTEM_OPTIMIZED + Execute ozone sh bucket create ${VOLUME}/${BUCKET2} --space-quota 1TB --layout FILE_SYSTEM_OPTIMIZED + Execute ozone sh bucket create ${VOL2}/${BUCKET_IN_VOL2} --space-quota 1TB --layout FILE_SYSTEM_OPTIMIZED Create links for FS test Execute And Ignore Error ozone sh volume create ${VOLUME}-src --space-quota 100TB Execute And Ignore Error ozone sh volume create ${VOL2}-src --space-quota 100TB - Execute ozone sh bucket create ${VOLUME}-src/${BUCKET}-src --layout FILE_SYSTEM_OPTIMIZED - Execute ozone sh bucket create ${VOLUME}-src/${BUCKET2}-src --layout FILE_SYSTEM_OPTIMIZED - Execute ozone sh bucket create ${VOL2}-src/${BUCKET_IN_VOL2}-src --layout FILE_SYSTEM_OPTIMIZED + Execute ozone sh bucket create ${VOLUME}-src/${BUCKET}-src --space-quota 1TB --layout FILE_SYSTEM_OPTIMIZED + Execute ozone sh bucket create ${VOLUME}-src/${BUCKET2}-src --space-quota 1TB --layout FILE_SYSTEM_OPTIMIZED + Execute ozone sh bucket create ${VOL2}-src/${BUCKET_IN_VOL2}-src --space-quota 1TB --layout FILE_SYSTEM_OPTIMIZED Execute ozone sh bucket link ${VOLUME}-src/${BUCKET}-src ${VOLUME}/${BUCKET} Execute ozone sh bucket link ${VOLUME}-src/${BUCKET2}-src ${VOLUME}/${BUCKET2} Execute ozone sh bucket link ${VOL2}-src/${BUCKET_IN_VOL2}-src ${VOL2}/${BUCKET_IN_VOL2} diff --git a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot index 32cda0917e28..df5cdfaafdda 100644 --- a/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/security/ozone-secure-fs.robot @@ -19,11 +19,15 @@ Library OperatingSystem Library String Library BuiltIn Resource ../commonlib.robot +Resource ../lib/fs.robot Test Timeout 5 minutes *** Variables *** ${ENDPOINT_URL} http://s3g:9878 ${SCM} scm +${TMP_MOUNT} tmp +${TMP_DIR} tmp +${SCHEME} ofs *** Keywords *** Setup volume names @@ -33,6 +37,17 @@ Setup volume names Set Suite Variable ${volume3} fstest3${random} Set Suite Variable ${volume4} fstest4${random} +Format ofs TMPMOUNT + [arguments] ${volume} ${path}=${EMPTY} ${om}=${OM_SERVICE_ID} + + ${om_with_trailing} = Run Keyword If '${om}' != '${EMPTY}' Ensure Trailing / ${om} + ... ELSE Set Variable ${EMPTY} + + ${path_with_leading} = Run Keyword If '${path}' != '${EMPTY}' Ensure Leading / ${path} + ... ELSE Set Variable ${EMPTY} + + [return] ofs://${om_with_trailing}${volume}/${path_with_leading} + *** Test Cases *** Create volume bucket with wrong credentials Execute kdestroy @@ -153,3 +168,39 @@ Test native authorizer Execute ozone sh key list /${volume3}/bk1 Execute kdestroy Run Keyword Kinit test user testuser testuser.keytab + +Test tmp mount for shared ofs tmp dir + ${result} = Execute And Ignore Error ozone getconf confKey ozone.om.enable.ofs.shared.tmp.dir + ${contains} = Evaluate "true" in """${result}""" + IF ${contains} == ${True} + Run Keyword Kinit test user testuser testuser.keytab + Execute ozone sh volume create /${TMP_MOUNT} -u testuser + Execute ozone sh bucket create /${TMP_MOUNT}/${TMP_DIR} -u testuser + Execute ozone sh volume addacl /${TMP_MOUNT} -a user:testuser/scm@EXAMPLE.COM:a,user:testuser2/scm@EXAMPLE.COM:rw + Execute ozone sh bucket addacl /${TMP_MOUNT}/${TMP_DIR} -a user:testuser/scm@EXAMPLE.COM:a,user:testuser2/scm@EXAMPLE.COM:rwlc + + ${tmpdirmount} = Format ofs TMPMOUNT ${TMP_MOUNT} + ${result} = Execute ozone fs -put ./NOTICE.txt ${tmpdirmount} + Should Be Empty ${result} + Run Keyword Kinit test user testuser2 testuser2.keytab + ${result} = Execute ozone fs -put ./LICENSE.txt ${tmpdirmount} + Should Be Empty ${result} + + ${result} = Execute ozone fs -ls ${tmpdirmount} + Should contain ${result} NOTICE.txt + Should contain ${result} LICENSE.txt + + + ${result} = Execute And Ignore Error ozone fs -rm -skipTrash ${tmpdirmount}/NOTICE.txt + Should contain ${result} error + ${result} = Execute ozone fs -rm -skipTrash ${tmpdirmount}/LICENSE.txt + Should contain ${result} Deleted + + Run Keyword Kinit test user testuser testuser.keytab + ${result} = Execute ozone fs -rm -skipTrash ${tmpdirmount}/NOTICE.txt + Should contain ${result} Deleted + + Execute ozone fs -rm -r -skipTrash ${tmpdirmount} + Execute ozone sh volume delete /${TMP_MOUNT} + END + diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/pom.xml b/hadoop-ozone/fault-injection-test/mini-chaos-tests/pom.xml index 8a2564fa0074..7b08d32ddd58 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/pom.xml +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/pom.xml @@ -20,9 +20,9 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> ozone-fault-injection-test org.apache.ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Mini Ozone Chaos Tests Apache Ozone Mini Ozone Chaos Tests diff --git a/hadoop-ozone/fault-injection-test/network-tests/pom.xml b/hadoop-ozone/fault-injection-test/network-tests/pom.xml index 6113b15841ab..888bd215b65c 100644 --- a/hadoop-ozone/fault-injection-test/network-tests/pom.xml +++ b/hadoop-ozone/fault-injection-test/network-tests/pom.xml @@ -20,7 +20,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone-fault-injection-test - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-network-tests Apache Ozone Network Tests diff --git a/hadoop-ozone/fault-injection-test/pom.xml b/hadoop-ozone/fault-injection-test/pom.xml index 54bfb8880cd7..5c6f60c5ec5e 100644 --- a/hadoop-ozone/fault-injection-test/pom.xml +++ b/hadoop-ozone/fault-injection-test/pom.xml @@ -20,10 +20,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-fault-injection-test - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Fault Injection Tests Apache Ozone Fault Injection Tests pom diff --git a/hadoop-ozone/insight/pom.xml b/hadoop-ozone/insight/pom.xml index bcb911c5eded..314177aa71d8 100644 --- a/hadoop-ozone/insight/pom.xml +++ b/hadoop-ozone/insight/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-insight - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Insight Tool Apache Ozone Insight Tool jar diff --git a/hadoop-ozone/integration-test/pom.xml b/hadoop-ozone/integration-test/pom.xml index 3e5fe5624503..b0d61ec72b23 100644 --- a/hadoop-ozone/integration-test/pom.xml +++ b/hadoop-ozone/integration-test/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-integration-test - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Integration Tests Apache Ozone Integration Tests jar diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index b790284cfcfe..789bb88b51b1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -237,7 +237,7 @@ public void testDeleteWithLargeSubPathsThanBatchSize() throws Exception { long elapsedRunCount = dirDeletingService.getRunCount() - preRunCount; assertTrue(dirDeletingService.getRunCount() > 1); // Ensure dir deleting speed, here provide a backup value for safe CI - assertTrue(elapsedRunCount == 8 || elapsedRunCount == 9); + assertTrue(elapsedRunCount >= 7); } @Test @@ -283,7 +283,7 @@ public void testDeleteWithMultiLevels() throws Exception { assertTableRowCount(dirTable, 0); assertSubPathsCount(dirDeletingService::getMovedFilesCount, 3); - assertSubPathsCount(dirDeletingService::getMovedDirsCount, 4); + assertSubPathsCount(dirDeletingService::getMovedDirsCount, 2); assertSubPathsCount(dirDeletingService::getDeletedDirsCount, 5); assertTrue(dirDeletingService.getRunCount() > 1); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java new file mode 100644 index 000000000000..52d4e1b3c3d3 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.ozone; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.junit.AfterClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.rules.Timeout; + +import java.util.concurrent.ThreadLocalRandom; + +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_ROOT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_SCHEME; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY; + + +/** + * Test HSync. + */ +public class TestHSync { + @Rule + public Timeout timeout = Timeout.seconds(300); + + private static MiniOzoneCluster cluster; + private static OzoneBucket bucket; + + private final OzoneConfiguration conf = new OzoneConfiguration(); + + { + try { + init(); + } catch (Exception e) { + throw new IllegalStateException(e); + } + } + + private void init() throws Exception { + final int chunkSize = 16 << 10; + final int flushSize = 2 * chunkSize; + final int maxFlushSize = 2 * flushSize; + final int blockSize = 2 * maxFlushSize; + final BucketLayout layout = BucketLayout.FILE_SYSTEM_OPTIMIZED; + + conf.setBoolean(OZONE_OM_RATIS_ENABLE_KEY, false); + conf.set(OZONE_DEFAULT_BUCKET_LAYOUT, layout.name()); + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(5) + .setTotalPipelineNumLimit(10) + .setBlockSize(blockSize) + .setChunkSize(chunkSize) + .setStreamBufferFlushSize(flushSize) + .setStreamBufferMaxSize(maxFlushSize) + .setDataStreamBufferFlushize(maxFlushSize) + .setStreamBufferSizeUnit(StorageUnit.BYTES) + .setDataStreamMinPacketSize(chunkSize) + .setDataStreamStreamWindowSize(5 * chunkSize) + .build(); + cluster.waitForClusterToBeReady(); + + // create a volume and a bucket to be used by OzoneFileSystem + bucket = TestDataUtil.createVolumeAndBucket(cluster, layout); + } + + @AfterClass + public static void teardown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testO3fsHSync() throws Exception { + // Set the fs.defaultFS + final String rootPath = String.format("%s://%s.%s/", + OZONE_URI_SCHEME, bucket.getName(), bucket.getVolumeName()); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + + final Path file = new Path("/file"); + + try (FileSystem fs = FileSystem.get(conf)) { + runTestHSync(fs, file); + } + } + + @Test + public void testOfsHSync() throws Exception { + // Set the fs.defaultFS + final String rootPath = String.format("%s://%s/", + OZONE_OFS_URI_SCHEME, conf.get(OZONE_OM_ADDRESS_KEY)); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + + final String dir = OZONE_ROOT + bucket.getVolumeName() + + OZONE_URI_DELIMITER + bucket.getName(); + final Path file = new Path(dir, "file"); + + try (FileSystem fs = FileSystem.get(conf)) { + runTestHSync(fs, file); + } + } + + static void runTestHSync(FileSystem fs, Path file) throws Exception { + final byte[] data = new byte[1 << 20]; + ThreadLocalRandom.current().nextBytes(data); + + final FSDataOutputStream stream = fs.create(file, true); + stream.write(data); + stream.hsync(); + + //TODO once OM change has been done, read the file without closing it. + stream.close(); + + final byte[] buffer = new byte[4 << 10]; + int offset = 0; + try (FSDataInputStream in = fs.open(file)) { + for (; ;) { + final int n = in.read(buffer, 0, buffer.length); + if (n <= 0) { + break; + } + for (int i = 0; i < n; i++) { + Assertions.assertEquals(data[offset + i], buffer[i]); + } + offset += n; + } + } + Assertions.assertEquals(data.length, offset); + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileSystem.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileSystem.java index 58ffd4256e07..10f326362c4a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileSystem.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileSystem.java @@ -153,6 +153,7 @@ public TestOzoneFileSystem(boolean setDefaultFs, boolean enableOMRatis) { private static OzoneManagerProtocol writeClient; private static FileSystem fs; private static OzoneFileSystem o3fs; + private static OzoneBucket ozoneBucket; private static String volumeName; private static String bucketName; private static Trash trash; @@ -179,10 +180,9 @@ private void init() throws Exception { writeClient = cluster.getRpcClient().getObjectStore() .getClientProxy().getOzoneManagerClient(); // create a volume and a bucket to be used by OzoneFileSystem - OzoneBucket bucket = - TestDataUtil.createVolumeAndBucket(cluster, bucketLayout); - volumeName = bucket.getVolumeName(); - bucketName = bucket.getName(); + ozoneBucket = TestDataUtil.createVolumeAndBucket(cluster, bucketLayout); + volumeName = ozoneBucket.getVolumeName(); + bucketName = ozoneBucket.getName(); String rootPath = String.format("%s://%s.%s/", OzoneConsts.OZONE_URI_SCHEME, bucketName, volumeName); @@ -334,6 +334,30 @@ public void testMakeDirsWithAnExistingDirectoryPath() throws Exception { assertTrue("Shouldn't send error if dir exists", status); } + @Test + public void testMakeDirsWithAnFakeDirectory() throws Exception { + /* + * Op 1. commit a key -> "dir1/dir2/key1" + * Op 2. create dir -> "dir1/testDir", the dir1 is a fake dir, + * "dir1/testDir" can be created normal + */ + + String fakeGrandpaKey = "dir1"; + String fakeParentKey = fakeGrandpaKey + "/dir2"; + String fullKeyName = fakeParentKey + "/key1"; + TestDataUtil.createKey(ozoneBucket, fullKeyName, ""); + + // /dir1/dir2 should not exist + assertFalse(fs.exists(new Path(fakeParentKey))); + + // /dir1/dir2/key2 should be created because has a fake parent directory + Path subdir = new Path(fakeParentKey, "key2"); + assertTrue(fs.mkdirs(subdir)); + // the intermediate directories /dir1 and /dir1/dir2 will be created too + assertTrue(fs.exists(new Path(fakeGrandpaKey))); + assertTrue(fs.exists(new Path(fakeParentKey))); + } + @Test public void testCreateWithInvalidPaths() throws Exception { // Test for path with .. @@ -727,6 +751,37 @@ public void testListStatusOnLargeDirectory() throws Exception { } } + @Test + public void testListStatusOnKeyNameContainDelimiter() throws Exception { + /* + * op1: create a key -> "dir1/dir2/key1" + * op2: `ls /` child dir "/dir1/" will be return + * op2: `ls /dir1` child dir "/dir1/dir2/" will be return + * op3: `ls /dir1/dir2` file "/dir1/dir2/key" will be return + * + * the "/dir1", "/dir1/dir2/" are fake directory + * */ + String keyName = "dir1/dir2/key1"; + TestDataUtil.createKey(ozoneBucket, keyName, ""); + FileStatus[] fileStatuses; + + fileStatuses = fs.listStatus(new Path("/")); + assertEquals(1, fileStatuses.length); + assertEquals("/dir1", fileStatuses[0].getPath().toUri().getPath()); + assertTrue(fileStatuses[0].isDirectory()); + + fileStatuses = fs.listStatus(new Path("/dir1")); + assertEquals(1, fileStatuses.length); + assertEquals("/dir1/dir2", fileStatuses[0].getPath().toUri().getPath()); + assertTrue(fileStatuses[0].isDirectory()); + + fileStatuses = fs.listStatus(new Path("/dir1/dir2")); + assertEquals(1, fileStatuses.length); + assertEquals("/dir1/dir2/key1", + fileStatuses[0].getPath().toUri().getPath()); + assertTrue(fileStatuses[0].isFile()); + } + /** * Cleanup files and directories. * @@ -1273,6 +1328,24 @@ public void testRenameFileToDir() throws Exception { "file1"))); } + @Test + public void testRenameContainDelimiterFile() throws Exception { + String fakeGrandpaKey = "dir1"; + String fakeParentKey = fakeGrandpaKey + "/dir2"; + String sourceKeyName = fakeParentKey + "/key1"; + String targetKeyName = fakeParentKey + "/key2"; + TestDataUtil.createKey(ozoneBucket, sourceKeyName, ""); + + Path sourcePath = new Path(fs.getUri().toString() + "/" + sourceKeyName); + Path targetPath = new Path(fs.getUri().toString() + "/" + targetKeyName); + assertTrue(fs.rename(sourcePath, targetPath)); + assertFalse(fs.exists(sourcePath)); + assertTrue(fs.exists(targetPath)); + // intermediate directories will not be created + assertFalse(fs.exists(new Path(fakeGrandpaKey))); + assertFalse(fs.exists(new Path(fakeParentKey))); + } + /** * Fails if the (a) parent of dst does not exist or (b) parent is a file. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileSystemWithStreaming.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileSystemWithStreaming.java new file mode 100644 index 000000000000..f2aa52759833 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFileSystemWithStreaming.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.ozone; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.junit.AfterClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.rules.Timeout; + +import java.util.concurrent.ThreadLocalRandom; + +import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_ROOT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_SCHEME; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY; + +/** + * Ozone file system tests with Streaming. + */ +public class TestOzoneFileSystemWithStreaming { + @Rule + public Timeout timeout = Timeout.seconds(300); + + private static MiniOzoneCluster cluster; + private static OzoneBucket bucket; + + private final OzoneConfiguration conf = new OzoneConfiguration(); + + { + try { + init(); + } catch (Exception e) { + throw new IllegalStateException(e); + } + } + + private void init() throws Exception { + final int chunkSize = 16 << 10; + final int flushSize = 2 * chunkSize; + final int maxFlushSize = 2 * flushSize; + final int blockSize = 2 * maxFlushSize; + final BucketLayout layout = BucketLayout.FILE_SYSTEM_OPTIMIZED; + + conf.setBoolean(DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, true); + conf.setBoolean(OZONE_FS_DATASTREAM_ENABLED, true); + conf.setBoolean(OZONE_OM_RATIS_ENABLE_KEY, false); + conf.set(OZONE_DEFAULT_BUCKET_LAYOUT, layout.name()); + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(5) + .setTotalPipelineNumLimit(10) + .setBlockSize(blockSize) + .setChunkSize(chunkSize) + .setStreamBufferFlushSize(flushSize) + .setStreamBufferMaxSize(maxFlushSize) + .setDataStreamBufferFlushize(maxFlushSize) + .setStreamBufferSizeUnit(StorageUnit.BYTES) + .setDataStreamMinPacketSize(chunkSize) + .setDataStreamStreamWindowSize(5 * chunkSize) + .build(); + cluster.waitForClusterToBeReady(); + + // create a volume and a bucket to be used by OzoneFileSystem + bucket = TestDataUtil.createVolumeAndBucket(cluster, layout); + } + + @AfterClass + public static void teardown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testO3fsCreateFile() throws Exception { + // Set the fs.defaultFS + final String rootPath = String.format("%s://%s.%s/", + OZONE_URI_SCHEME, bucket.getName(), bucket.getVolumeName()); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + + final Path file = new Path("/file"); + + try (FileSystem fs = FileSystem.get(conf)) { + runTestCreateFile(fs, file); + } + } + + @Test + public void testOfsCreateFile() throws Exception { + // Set the fs.defaultFS + final String rootPath = String.format("%s://%s/", + OZONE_OFS_URI_SCHEME, conf.get(OZONE_OM_ADDRESS_KEY)); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + + final String dir = OZONE_ROOT + bucket.getVolumeName() + + OZONE_URI_DELIMITER + bucket.getName(); + final Path file = new Path(dir, "file"); + + try (FileSystem fs = FileSystem.get(conf)) { + runTestCreateFile(fs, file); + } + } + + static void runTestCreateFile(FileSystem fs, Path file) throws Exception { + final byte[] bytes = new byte[1 << 20]; + ThreadLocalRandom.current().nextBytes(bytes); + + ContractTestUtils.createFile(fs, file, true, bytes); + + final byte[] buffer = new byte[4 << 10]; + int offset = 0; + try (FSDataInputStream in = fs.open(file)) { + for (; ;) { + final int n = in.read(buffer, 0, buffer.length); + if (n <= 0) { + break; + } + for (int i = 0; i < n; i++) { + Assertions.assertEquals(bytes[offset + i], buffer[i]); + } + offset += n; + } + } + Assertions.assertEquals(bytes.length, offset); + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java index dbcb8218f340..9f34eb89acee 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java @@ -22,6 +22,7 @@ import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -35,7 +36,6 @@ import org.apache.hadoop.fs.TrashPolicy; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.hdds.client.DefaultReplicationConfig; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.RatisReplicationConfig; @@ -87,12 +87,13 @@ import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; -import java.util.HashMap; import java.util.Optional; import java.util.Random; import java.util.Set; @@ -110,8 +111,14 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_FS_ITERATE_BATCH_SIZE; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.BUCKET_NOT_FOUND; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.VOLUME_NOT_FOUND; +import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.PERMISSION_DENIED; +import static org.apache.hadoop.ozone.security.acl.IAccessAuthorizer.ACLType.READ; +import static org.apache.hadoop.ozone.security.acl.IAccessAuthorizer.ACLType.WRITE; +import static org.apache.hadoop.ozone.security.acl.IAccessAuthorizer.ACLType.DELETE; +import static org.apache.hadoop.ozone.security.acl.IAccessAuthorizer.ACLType.LIST; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -296,7 +303,7 @@ public void testCreateDoesNotAddParentDirKeys() throws Exception { ContractTestUtils.touch(fs, child); OzoneKeyDetails key = getKey(child, false); - OFSPath childOFSPath = new OFSPath(child); + OFSPath childOFSPath = new OFSPath(child, conf); Assert.assertEquals(key.getName(), childOFSPath.getKeyName()); // Creating a child should not add parent keys to the bucket @@ -318,6 +325,44 @@ public void testCreateDoesNotAddParentDirKeys() throws Exception { fs.delete(grandparent, true); } + @Test + public void testListStatusWithIntermediateDirWithECEnabled() + throws Exception { + String key = "object-dir/object-name1"; + + // write some test data into bucket + try (OzoneOutputStream outputStream = objectStore.getVolume(volumeName). + getBucket(bucketName).createKey(key, 1, + new ECReplicationConfig("RS-3-2-1024"), + new HashMap<>())) { + outputStream.write(RandomUtils.nextBytes(1)); + } + + List dirs = Arrays.asList(volumeName, bucketName, "object-dir", + "object-name1"); + for (int size = 1; size <= dirs.size(); size++) { + String path = "/" + dirs.subList(0, size).stream() + .collect(Collectors.joining("/")); + Path parent = new Path(path); + // Wait until the filestatus is updated + if (!enabledFileSystemPaths) { + GenericTestUtils.waitFor(() -> { + try { + fs.getFileStatus(parent); + return true; + } catch (IOException e) { + return false; + } + }, 1000, 120000); + } + FileStatus fileStatus = fs.getFileStatus(parent); + Assert.assertEquals((size == dirs.size() - 1 && + !bucketLayout.isFileSystemOptimized()) || size == dirs.size(), + fileStatus.isErasureCoded()); + } + + } + @Test public void testDeleteCreatesFakeParentDir() throws Exception { // TODO: Request for comment. @@ -344,7 +389,7 @@ public void testDeleteCreatesFakeParentDir() throws Exception { // Deleting the only child should create the parent dir key if it does // not exist - OFSPath parentOFSPath = new OFSPath(parent); + OFSPath parentOFSPath = new OFSPath(parent, conf); String parentKey = parentOFSPath.getKeyName() + "/"; OzoneKeyDetails parentKeyInfo = getKey(parent, true); Assert.assertEquals(parentKey, parentKeyInfo.getName()); @@ -657,7 +702,7 @@ public void testMkdirOnNonExistentVolumeBucketDir() throws Exception { // Check volume and bucket existence, they should both be created. OzoneVolume ozoneVolume = objectStore.getVolume(volumeNameLocal); OzoneBucket ozoneBucket = ozoneVolume.getBucket(bucketNameLocal); - OFSPath ofsPathDir1 = new OFSPath(dir12); + OFSPath ofsPathDir1 = new OFSPath(dir12, conf); String key = ofsPathDir1.getKeyName() + "/"; OzoneKeyDetails ozoneKeyDetails = ozoneBucket.getKey(key); Assert.assertEquals(key, ozoneKeyDetails.getName()); @@ -927,7 +972,7 @@ private OzoneKeyDetails getKey(Path keyPath, boolean isDirectory) if (isDirectory) { key = key + OZONE_URI_DELIMITER; } - OFSPath ofsPath = new OFSPath(key); + OFSPath ofsPath = new OFSPath(key, conf); String keyInBucket = ofsPath.getKeyName(); return cluster.getClient().getObjectStore().getVolume(volumeName) .getBucket(bucketName).getKey(keyInBucket); @@ -968,7 +1013,7 @@ private void teardownVolumeBucketWithDir(Path bucketPath1) throws IOException { fs.delete(new Path(bucketPath1, "dir1"), true); fs.delete(new Path(bucketPath1, "dir2"), true); - OFSPath ofsPath = new OFSPath(bucketPath1); + OFSPath ofsPath = new OFSPath(bucketPath1, conf); OzoneVolume volume = objectStore.getVolume(ofsPath.getVolumeName()); volume.deleteBucket(ofsPath.getBucketName()); objectStore.deleteVolume(ofsPath.getVolumeName()); @@ -991,7 +1036,7 @@ public void testListStatusRootAndVolumeNonRecursive() throws Exception { Assert.assertEquals(2, fileStatusBucket.length); // listStatus("/volume") Path volume = new Path( - OZONE_URI_DELIMITER + new OFSPath(bucketPath1).getVolumeName()); + OZONE_URI_DELIMITER + new OFSPath(bucketPath1, conf).getVolumeName()); FileStatus[] fileStatusVolume = ofs.listStatus(volume); Assert.assertEquals(1, fileStatusVolume.length); Assert.assertEquals(ownerShort, fileStatusVolume[0].getOwner()); @@ -1092,7 +1137,7 @@ public void testListStatusRootAndVolumeRecursive() throws IOException { listStatusCheckHelper(bucketPath1); // listStatus("/volume") Path volume = new Path( - OZONE_URI_DELIMITER + new OFSPath(bucketPath1).getVolumeName()); + OZONE_URI_DELIMITER + new OFSPath(bucketPath1, conf).getVolumeName()); listStatusCheckHelper(volume); // listStatus("/") Path root = new Path(OZONE_URI_DELIMITER); @@ -1178,7 +1223,123 @@ public void testListStatusRootAndVolumeContinuation() throws IOException { } } - /* + @Test + public void testSharedTmpDir() throws IOException { + // Prep + conf.setBoolean(OZONE_OM_ENABLE_OFS_SHARED_TMP_DIR, true); + // Use ClientProtocol to pass in volume ACL, ObjectStore won't do it + ClientProtocol proxy = objectStore.getClientProxy(); + // Get default acl rights for user + OzoneAclConfig aclConfig = conf.getObject(OzoneAclConfig.class); + ACLType userRights = aclConfig.getUserDefaultRights(); + // Construct ACL for world access + // ACL admin owner, world read+write + BitSet aclRights = new BitSet(); + aclRights.set(READ.ordinal()); + aclRights.set(WRITE.ordinal()); + List objectAcls = new ArrayList<>(); + objectAcls.add(new OzoneAcl(ACLIdentityType.WORLD, "", + aclRights, ACCESS)); + objectAcls.add(new OzoneAcl(ACLIdentityType.USER, "admin", userRights, + ACCESS)); + // volume acls have all access to admin and read+write access to world + + // Construct VolumeArgs + VolumeArgs volumeArgs = new VolumeArgs.Builder() + .setAdmin("admin") + .setOwner("admin") + .setAcls(Collections.unmodifiableList(objectAcls)) + .setQuotaInNamespace(1000) + .setQuotaInBytes(Long.MAX_VALUE).build(); + // Sanity check + Assert.assertEquals("admin", volumeArgs.getOwner()); + Assert.assertEquals("admin", volumeArgs.getAdmin()); + Assert.assertEquals(Long.MAX_VALUE, volumeArgs.getQuotaInBytes()); + Assert.assertEquals(1000, volumeArgs.getQuotaInNamespace()); + Assert.assertEquals(0, volumeArgs.getMetadata().size()); + Assert.assertEquals(2, volumeArgs.getAcls().size()); + // Create volume "tmp" with world access read+write to access tmp mount + // admin has all access to tmp mount + proxy.createVolume(OFSPath.OFS_MOUNT_TMP_VOLUMENAME, volumeArgs); + + OzoneVolume vol = objectStore.getVolume(OFSPath.OFS_MOUNT_TMP_VOLUMENAME); + Assert.assertNotNull(vol); + + // Begin test + String hashedUsername = OFSPath.getTempMountBucketNameOfCurrentUser(); + + // Expect failure since temp bucket for current user is not created yet + try { + vol.getBucket(hashedUsername); + } catch (OMException ex) { + // Expect BUCKET_NOT_FOUND + if (!ex.getResult().equals(BUCKET_NOT_FOUND)) { + Assert.fail("Temp bucket for current user shouldn't have been created"); + } + } + + // set acls for shared tmp mount under the tmp volume + objectAcls.clear(); + objectAcls.add(new OzoneAcl(ACLIdentityType.USER, "admin", userRights, + ACCESS)); + aclRights.clear(DELETE.ordinal()); + aclRights.set(LIST.ordinal()); + objectAcls.add(new OzoneAcl(ACLIdentityType.WORLD, "", + aclRights, ACCESS)); + objectAcls.add(new OzoneAcl(ACLIdentityType.USER, "admin", userRights, + ACCESS)); + // bucket acls have all access to admin and read+write+list access to world + + BucketArgs bucketArgs = new BucketArgs.Builder() + .setOwner("admin") + .setAcls(Collections.unmodifiableList(objectAcls)) + .setQuotaInNamespace(1000) + .setQuotaInBytes(Long.MAX_VALUE).build(); + + // Create bucket "tmp" with world access read+write+list to tmp directory + // admin has all access to tmp mount + proxy.createBucket(OFSPath.OFS_MOUNT_TMP_VOLUMENAME, + OFSPath.OFS_MOUNT_TMP_VOLUMENAME, bucketArgs); + + // Write under /tmp/ + Path dir1 = new Path("/tmp/dir1"); + userOfs.mkdirs(dir1); + + try (FSDataOutputStream stream = userOfs.create(new Path( + "/tmp/dir1/file1"))) { + stream.write(1); + } + + // Verify temp bucket creation + OzoneBucket bucket = vol.getBucket("tmp"); + Assert.assertNotNull(bucket); + // Verify dir1 creation + FileStatus[] fileStatuses = fs.listStatus(new Path("/tmp/")); + Assert.assertEquals(1, fileStatuses.length); + Assert.assertEquals( + "/tmp/dir1", fileStatuses[0].getPath().toUri().getPath()); + // Verify file1 creation + FileStatus[] fileStatusesInDir1 = fs.listStatus(dir1); + Assert.assertEquals(1, fileStatusesInDir1.length); + Assert.assertEquals("/tmp/dir1/file1", + fileStatusesInDir1[0].getPath().toUri().getPath()); + + // Cleanup + userOfs.delete(dir1, true); + try { + userOfs.delete(new Path("/tmp"), true); + } catch (OMException ex) { + // Expect PERMISSION_DENIED, User regularuser1 doesn't have DELETE + // permission for /tmp + if (!ex.getResult().equals(PERMISSION_DENIED)) { + Assert.fail("Temp bucket cannot be deleted by current user"); + } + } + fs.delete(new Path("/tmp"), true); + proxy.deleteVolume(OFSPath.OFS_MOUNT_TMP_VOLUMENAME); + } + + /* * OFS: Test /tmp mount behavior. */ @Test @@ -1195,12 +1356,11 @@ public void testTempMount() throws IOException { // Construct VolumeArgs VolumeArgs volumeArgs = new VolumeArgs.Builder() .setAcls(Collections.singletonList(aclWorldAccess)) - .setQuotaInNamespace(1000) - .setQuotaInBytes(Long.MAX_VALUE).build(); + .setQuotaInNamespace(1000).build(); // Sanity check Assert.assertNull(volumeArgs.getOwner()); Assert.assertNull(volumeArgs.getAdmin()); - Assert.assertEquals(Long.MAX_VALUE, volumeArgs.getQuotaInBytes()); + Assert.assertEquals(-1, volumeArgs.getQuotaInBytes()); Assert.assertEquals(1000, volumeArgs.getQuotaInNamespace()); Assert.assertEquals(0, volumeArgs.getMetadata().size()); Assert.assertEquals(1, volumeArgs.getAcls().size()); @@ -1879,7 +2039,7 @@ public void testBucketDefaultsShouldNotBeInheritedToFileForNonEC() .createFile(vol + "/" + buck + "/test", (short) 3, true, false)) { file.write(new byte[1024]); } - OFSPath ofsPath = new OFSPath(vol + "/" + buck + "/test"); + OFSPath ofsPath = new OFSPath(vol + "/" + buck + "/test", conf); final OzoneBucket bucket = adapter.getBucket(ofsPath, false); final OzoneKeyDetails key = bucket.getKey(ofsPath.getKeyName()); Assert.assertEquals(key.getReplicationConfig().getReplicationType().name(), @@ -1909,7 +2069,7 @@ public void testBucketDefaultsShouldBeInheritedToFileForEC() .createFile(vol + "/" + buck + "/test", (short) 3, true, false)) { file.write(new byte[1024]); } - OFSPath ofsPath = new OFSPath(vol + "/" + buck + "/test"); + OFSPath ofsPath = new OFSPath(vol + "/" + buck + "/test", conf); final OzoneBucket bucket = adapter.getBucket(ofsPath, false); final OzoneKeyDetails key = bucket.getKey(ofsPath.getKeyName()); Assert.assertEquals(ReplicationType.EC.name(), diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMDatanodeProtocolServer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMDatanodeProtocolServer.java new file mode 100644 index 000000000000..cfa34fc445a0 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMDatanodeProtocolServer.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm; + +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; +import org.junit.Assert; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.Collections; +import java.util.concurrent.TimeoutException; + +/** + * Test for StorageContainerDatanodeProtocolProtos. + */ +public class TestSCMDatanodeProtocolServer { + + @Test + public void ensureTermAndDeadlineOnCommands() + throws IOException, TimeoutException { + OzoneStorageContainerManager scm = + Mockito.mock(OzoneStorageContainerManager.class); + + ReplicateContainerCommand command = new ReplicateContainerCommand(1L, + Collections.emptyList()); + command.setTerm(5L); + command.setDeadline(1234L); + StorageContainerDatanodeProtocolProtos.SCMCommandProto proto = + SCMDatanodeProtocolServer.getCommandResponse(command, scm); + + Assert.assertEquals(StorageContainerDatanodeProtocolProtos.SCMCommandProto + .Type.replicateContainerCommand, proto.getCommandType()); + Assert.assertEquals(5L, proto.getTerm()); + Assert.assertEquals(1234L, proto.getDeadlineMsSinceEpoch()); + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index df8ee103f6df..2a8a945a0b27 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -392,7 +392,7 @@ private void testECReconstructionCoordinator(List missingIndexes) createKeyAndWriteData(keyString, bucket); ECReconstructionCoordinator coordinator = new ECReconstructionCoordinator(config, certClient, - ECReconstructionMetrics.create()); + null, ECReconstructionMetrics.create()); ECReconstructionMetrics metrics = coordinator.getECReconstructionMetrics(); OzoneKeyDetails key = bucket.getKey(keyString); @@ -569,7 +569,7 @@ public void testECReconstructionCoordinatorShouldCleanupContainersOnFailure() Assert.assertThrows(IOException.class, () -> { ECReconstructionCoordinator coordinator = new ECReconstructionCoordinator(config, certClient, - ECReconstructionMetrics.create()); + null, ECReconstructionMetrics.create()); coordinator.reconstructECContainerGroup(conID, (ECReplicationConfig) containerPipeline.getReplicationConfig(), sourceNodeMap, targetNodeMap); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java index 0eb2e30f1733..e7a4cf03197c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java @@ -320,11 +320,16 @@ abstract class Builder { protected Optional omId = Optional.empty(); protected Boolean randomContainerPort = true; + protected Boolean randomContainerStreamPort = true; + protected Boolean enableContainerDatastream = true; protected Optional datanodeReservedSpace = Optional.empty(); protected Optional chunkSize = Optional.empty(); protected OptionalInt streamBufferSize = OptionalInt.empty(); protected Optional streamBufferFlushSize = Optional.empty(); + protected Optional dataStreamBufferFlushSize = Optional.empty(); + protected Optional datastreamWindowSize = Optional.empty(); protected Optional streamBufferMaxSize = Optional.empty(); + protected OptionalInt dataStreamMinPacketSize = OptionalInt.empty(); protected Optional blockSize = Optional.empty(); protected Optional streamBufferSizeUnit = Optional.empty(); protected boolean includeRecon = false; @@ -565,6 +570,21 @@ public Builder setStreamBufferMaxSize(long size) { return this; } + public Builder setDataStreamBufferFlushize(long size) { + dataStreamBufferFlushSize = Optional.of(size); + return this; + } + + public Builder setDataStreamMinPacketSize(int size) { + dataStreamMinPacketSize = OptionalInt.of(size); + return this; + } + + public Builder setDataStreamStreamWindowSize(long size) { + datastreamWindowSize = Optional.of(size); + return this; + } + /** * Sets the block size for stream buffer. * diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 1bdaef3a9b69..b9e338870f3b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -86,10 +86,12 @@ import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_ADDRESS_KEY; import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_DATANODE_ADDRESS_KEY; import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_HTTP_ADDRESS_KEY; +import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_TASK_SAFEMODE_WAIT_THRESHOLD; import static org.apache.hadoop.hdds.scm.ScmConfig.ConfigStrings.HDDS_SCM_INIT_DEFAULT_LAYOUT_VERSION; import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_IPC_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_IPC_RANDOM_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_RATIS_ADMIN_PORT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_RANDOM_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_RATIS_SERVER_PORT; @@ -653,6 +655,15 @@ protected void initializeConfiguration() throws IOException { if (!streamBufferMaxSize.isPresent()) { streamBufferMaxSize = Optional.of(2 * streamBufferFlushSize.get()); } + if (!dataStreamBufferFlushSize.isPresent()) { + dataStreamBufferFlushSize = Optional.of((long) 4 * chunkSize.get()); + } + if (!dataStreamMinPacketSize.isPresent()) { + dataStreamMinPacketSize = OptionalInt.of(chunkSize.get() / 4); + } + if (!datastreamWindowSize.isPresent()) { + datastreamWindowSize = Optional.of((long) 8 * chunkSize.get()); + } if (!blockSize.isPresent()) { blockSize = Optional.of(2 * streamBufferMaxSize.get()); } @@ -669,6 +680,13 @@ protected void initializeConfiguration() throws IOException { streamBufferSizeUnit.get().toBytes(streamBufferMaxSize.get()))); clientConfig.setStreamBufferFlushSize(Math.round( streamBufferSizeUnit.get().toBytes(streamBufferFlushSize.get()))); + clientConfig.setDataStreamBufferFlushSize(Math.round( + streamBufferSizeUnit.get().toBytes(dataStreamBufferFlushSize.get()))); + clientConfig.setDataStreamMinPacketSize((int) Math.round( + streamBufferSizeUnit.get() + .toBytes(dataStreamMinPacketSize.getAsInt()))); + clientConfig.setStreamWindowSize(Math.round( + streamBufferSizeUnit.get().toBytes(datastreamWindowSize.get()))); conf.setFromObject(clientConfig); conf.setStorageSize(ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, @@ -838,8 +856,6 @@ protected List createHddsDatanodes( reservedSpaceString); dnConf.set(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, ratisDir.toString()); - dnConf.set(OzoneConfigKeys.OZONE_CONTAINER_COPY_WORKDIR, - workDir.toString()); if (reconServer != null) { OzoneStorageContainerManager reconScm = reconServer.getReconStorageContainerManager(); @@ -916,6 +932,10 @@ private void configureHddsDatanodes() { randomContainerPort); conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_RANDOM_PORT, randomContainerPort); + conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, + enableContainerDatastream); + conf.setBoolean(DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT, + randomContainerStreamPort); conf.setFromObject(new ReplicationConfig().setPort(0)); } @@ -949,6 +969,7 @@ protected void configureRecon() throws IOException { conf.set(OZONE_RECON_HTTP_ADDRESS_KEY, "0.0.0.0:0"); conf.set(OZONE_RECON_DATANODE_ADDRESS_KEY, "0.0.0.0:0"); + conf.set(OZONE_RECON_TASK_SAFEMODE_WAIT_THRESHOLD, "10s"); ConfigurationProvider.setConfiguration(conf); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java index 09fb5e9ef79d..4d4e9fb8b615 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java @@ -216,6 +216,10 @@ public void testContainerRandomPort() throws IOException { ozoneConf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_IPC_RANDOM_PORT, true); ozoneConf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_RANDOM_PORT, true); + conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, + true); + ozoneConf.setBoolean( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT, true); List stateMachines = new ArrayList<>(); try { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java index 137ae08daded..cac4c1124171 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java @@ -112,6 +112,7 @@ private void addPropertiesNotInXml() { ReconConfigKeys.OZONE_RECON_DATANODE_ADDRESS_KEY, ReconConfigKeys.OZONE_RECON_DATANODE_BIND_HOST_KEY, ReconConfigKeys.OZONE_RECON_PROMETHEUS_HTTP_ENDPOINT, + ReconConfigKeys.OZONE_RECON_TASK_SAFEMODE_WAIT_THRESHOLD, ReconServerConfigKeys.OZONE_RECON_SCM_DB_DIR, ReconServerConfigKeys.OZONE_RECON_METRICS_HTTP_CONNECTION_TIMEOUT, ReconServerConfigKeys @@ -132,7 +133,8 @@ private void addPropertiesNotInXml() { OMConfigKeys.OZONE_RANGER_OM_CONNECTION_REQUEST_TIMEOUT, OMConfigKeys.OZONE_RANGER_HTTPS_ADDRESS_KEY, OMConfigKeys.OZONE_OM_RANGER_HTTPS_ADMIN_API_USER, - OMConfigKeys.OZONE_OM_RANGER_HTTPS_ADMIN_API_PASSWD + OMConfigKeys.OZONE_OM_RANGER_HTTPS_ADMIN_API_PASSWD, + ScmConfigKeys.OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY )); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java index 76776f86a1eb..5952ce5947cc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java @@ -24,7 +24,9 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.security.KeyPair; +import java.security.cert.CertificateExpiredException; import java.security.cert.X509Certificate; +import java.time.Duration; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.temporal.ChronoUnit; @@ -37,6 +39,9 @@ import org.apache.hadoop.hdds.conf.DefaultConfigManager; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.SCMSecurityProtocol; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto; +import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.ScmConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.ScmInfo; @@ -51,7 +56,10 @@ import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.client.DNCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.certificates.utils.SelfSignedCertificate; +import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; import org.apache.hadoop.hdds.utils.HAUtils; @@ -71,11 +79,13 @@ import org.apache.hadoop.ozone.om.helpers.S3SecretValue; import org.apache.hadoop.ozone.om.protocolPB.OmTransportFactory; import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB; +import org.apache.hadoop.ozone.security.OMCertificateClient; import org.apache.hadoop.ozone.security.OzoneTokenIdentifier; import org.apache.hadoop.security.KerberosAuthException; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.client.AuthenticationException; +import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.hadoop.security.token.Token; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.GenericTestUtils.LogCapturer; @@ -85,6 +95,7 @@ import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_RENEW_GRACE_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.scm.ScmConfig.ConfigStrings.HDDS_SCM_KERBEROS_KEYTAB_FILE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfig.ConfigStrings.HDDS_SCM_KERBEROS_PRINCIPAL_KEY; @@ -117,6 +128,7 @@ import org.bouncycastle.asn1.x500.RDN; import org.bouncycastle.asn1.x500.X500Name; import org.bouncycastle.asn1.x500.style.BCStyle; +import org.bouncycastle.cert.X509CertificateHolder; import org.bouncycastle.cert.jcajce.JcaX509CertificateHolder; import org.junit.After; import static org.junit.Assert.assertEquals; @@ -126,13 +138,21 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; + +import org.junit.Assert; import org.junit.Before; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.rules.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import static org.mockito.ArgumentMatchers.anyObject; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.slf4j.event.Level.INFO; /** @@ -195,8 +215,12 @@ public void init() { conf.set(OZONE_METADATA_DIRS, metaDirPath.toString()); conf.setBoolean(OZONE_SECURITY_ENABLED_KEY, true); conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.name()); + conf.set(HDDS_X509_RENEW_GRACE_DURATION, "PT5S"); // 5s workDir = GenericTestUtils.getTestDir(getClass().getSimpleName()); + clusterId = UUID.randomUUID().toString(); + scmId = UUID.randomUUID().toString(); + omId = UUID.randomUUID().toString(); startMiniKdc(); setSecureConfig(); @@ -215,6 +239,9 @@ public void stop() { if (scm != null) { scm.stop(); } + if (om != null) { + om.stop(); + } IOUtils.closeQuietly(om); IOUtils.closeQuietly(omClient); } catch (Exception e) { @@ -372,10 +399,6 @@ public void testAdminAccessControlException() throws Exception { } private void initSCM() throws IOException { - clusterId = UUID.randomUUID().toString(); - scmId = UUID.randomUUID().toString(); - omId = UUID.randomUUID().toString(); - final String path = folder.newFolder().toString(); Path scmPath = Paths.get(path, "scm-meta"); Files.createDirectories(scmPath); @@ -823,6 +846,218 @@ public void testSecureOmInitSuccess() throws Exception { } + /** + * Test successful certificate rotation. + */ + @Test + public void testCertificateRotation() throws Exception { + OMStorage omStorage = new OMStorage(conf); + omStorage.setClusterId(clusterId); + omStorage.setOmId(omId); + OzoneManager.setTestSecureOmFlag(true); + + SecurityConfig securityConfig = new SecurityConfig(conf); + CertificateCodec certCodec = new CertificateCodec(securityConfig, "om"); + OMCertificateClient client = + new OMCertificateClient(securityConfig, omStorage, scmId); + client.init(); + + // save first cert + final int certificateLifetime = 20; // seconds + X509CertificateHolder certHolder = generateX509CertHolder(conf, + new KeyPair(client.getPublicKey(), client.getPrivateKey()), + null, Duration.ofSeconds(certificateLifetime)); + String certId = certHolder.getSerialNumber().toString(); + certCodec.writeCertificate(certHolder); + client.setCertificateId(certId); + omStorage.setOmCertSerialId(certId); + omStorage.forceInitialize(); + + // first renewed cert + X509CertificateHolder newCertHolder = generateX509CertHolder(conf, + null, LocalDateTime.now().plus(securityConfig.getRenewalGracePeriod()), + Duration.ofSeconds(certificateLifetime)); + String pemCert = CertificateCodec.getPEMEncodedString(newCertHolder); + SCMGetCertResponseProto responseProto = SCMGetCertResponseProto.newBuilder() + .setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .setX509CACertificate(pemCert) + .build(); + SCMSecurityProtocolClientSideTranslatorPB scmClient = + mock(SCMSecurityProtocolClientSideTranslatorPB.class); + when(scmClient.getOMCertChain(anyObject(), anyString())) + .thenReturn(responseProto); + client.setSecureScmClient(scmClient); + + // create Ozone Manager instance, it will start the monitor task + conf.set(OZONE_SCM_CLIENT_ADDRESS_KEY, "localhost"); + om = OzoneManager.createOm(conf); + om.setCertClient(client); + + // check after renew, client will have the new cert ID + String id1 = newCertHolder.getSerialNumber().toString(); + GenericTestUtils.waitFor(() -> + id1.equals(client.getCertificate().getSerialNumber().toString()), + 1000, certificateLifetime * 1000); + + // test the second time certificate rotation + // second renewed cert + newCertHolder = generateX509CertHolder(conf, + null, null, Duration.ofSeconds(certificateLifetime)); + pemCert = CertificateCodec.getPEMEncodedString(newCertHolder); + responseProto = SCMGetCertResponseProto.newBuilder() + .setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .setX509CACertificate(pemCert) + .build(); + when(scmClient.getOMCertChain(anyObject(), anyString())) + .thenReturn(responseProto); + String id2 = newCertHolder.getSerialNumber().toString(); + + // check after renew, client will have the new cert ID + GenericTestUtils.waitFor(() -> + id2.equals(client.getCertificate().getSerialNumber().toString()), + 1000, certificateLifetime * 1000); + } + /** + * Test unexpected SCMGetCertResponseProto returned from SCM. + */ + @Test + public void testCertificateRotationRecoverableFailure() throws Exception { + LogCapturer omLogs = LogCapturer.captureLogs(OMCertificateClient.LOG); + OMStorage omStorage = new OMStorage(conf); + omStorage.setClusterId(clusterId); + omStorage.setOmId(omId); + OzoneManager.setTestSecureOmFlag(true); + + SecurityConfig securityConfig = new SecurityConfig(conf); + CertificateCodec certCodec = new CertificateCodec(securityConfig, "om"); + OMCertificateClient client = + new OMCertificateClient(securityConfig, omStorage, scmId); + client.init(); + + // save first cert + final int certificateLifetime = 20; // seconds + X509CertificateHolder certHolder = generateX509CertHolder(conf, + new KeyPair(client.getPublicKey(), client.getPrivateKey()), + null, Duration.ofSeconds(certificateLifetime)); + String certId = certHolder.getSerialNumber().toString(); + certCodec.writeCertificate(certHolder); + client.setCertificateId(certId); + omStorage.setOmCertSerialId(certId); + omStorage.forceInitialize(); + + // prepare a mocked scmClient to certificate signing + SCMSecurityProtocolClientSideTranslatorPB scmClient = + mock(SCMSecurityProtocolClientSideTranslatorPB.class); + client.setSecureScmClient(scmClient); + + Duration gracePeriod = securityConfig.getRenewalGracePeriod(); + X509CertificateHolder newCertHolder = generateX509CertHolder(conf, null, + LocalDateTime.now().plus(gracePeriod), + Duration.ofSeconds(certificateLifetime)); + String pemCert = CertificateCodec.getPEMEncodedString(newCertHolder); + // provide an invalid SCMGetCertResponseProto. Without + // setX509CACertificate(pemCert), signAndStoreCert will throw exception. + SCMSecurityProtocolProtos.SCMGetCertResponseProto responseProto = + SCMSecurityProtocolProtos.SCMGetCertResponseProto + .newBuilder().setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .build(); + when(scmClient.getOMCertChain(anyObject(), anyString())) + .thenReturn(responseProto); + + // check that new cert ID should not equal to current cert ID + String certId1 = newCertHolder.getSerialNumber().toString(); + Assert.assertFalse(certId1.equals( + client.getCertificate().getSerialNumber().toString())); + + // certificate failed to renew, client still hold the old expired cert. + Thread.sleep(certificateLifetime * 1000); + Assert.assertTrue(certId.equals( + client.getCertificate().getSerialNumber().toString())); + try { + client.getCertificate().checkValidity(); + } catch (Exception e) { + Assert.assertTrue(e instanceof CertificateExpiredException); + } + Assert.assertTrue(omLogs.getOutput().contains( + "Error while signing and storing SCM signed certificate.")); + + // provide a new valid SCMGetCertResponseProto + newCertHolder = generateX509CertHolder(conf, null, null, + Duration.ofSeconds(certificateLifetime)); + pemCert = CertificateCodec.getPEMEncodedString(newCertHolder); + responseProto = SCMSecurityProtocolProtos.SCMGetCertResponseProto + .newBuilder().setResponseCode(SCMSecurityProtocolProtos + .SCMGetCertResponseProto.ResponseCode.success) + .setX509Certificate(pemCert) + .setX509CACertificate(pemCert) + .build(); + when(scmClient.getOMCertChain(anyObject(), anyString())) + .thenReturn(responseProto); + String certId2 = newCertHolder.getSerialNumber().toString(); + + // check after renew, client will have the new cert ID + GenericTestUtils.waitFor(() -> { + String newCertId = client.getCertificate().getSerialNumber().toString(); + return newCertId.equals(certId2); + }, 1000, certificateLifetime * 1000); + } + + /** + * Test the directory rollback failure case. + */ + @Test + @Ignore("Run it locally since it will terminate the process.") + public void testCertificateRotationUnRecoverableFailure() throws Exception { + LogCapturer omLogs = LogCapturer.captureLogs(OzoneManager.getLogger()); + OMStorage omStorage = new OMStorage(conf); + omStorage.setClusterId(clusterId); + omStorage.setOmId(omId); + OzoneManager.setTestSecureOmFlag(true); + + SecurityConfig securityConfig = new SecurityConfig(conf); + CertificateCodec certCodec = new CertificateCodec(securityConfig, "om"); + OMCertificateClient client = + new OMCertificateClient(securityConfig, omStorage, scmId); + client.init(); + + // save first cert + final int certificateLifetime = 20; // seconds + X509CertificateHolder certHolder = generateX509CertHolder(conf, + new KeyPair(client.getPublicKey(), client.getPrivateKey()), + null, Duration.ofSeconds(certificateLifetime)); + String certId = certHolder.getSerialNumber().toString(); + certCodec.writeCertificate(certHolder); + omStorage.setOmCertSerialId(certId); + omStorage.forceInitialize(); + + X509CertificateHolder newCertHolder = generateX509CertHolder(conf, null, + null, Duration.ofSeconds(certificateLifetime)); + DNCertificateClient mockClient = mock(DNCertificateClient.class); + when(mockClient.getCertificate()).thenReturn( + CertificateCodec.getX509Certificate(newCertHolder)); + when(mockClient.timeBeforeExpiryGracePeriod(anyObject())) + .thenReturn(Duration.ZERO); + when(mockClient.renewAndStoreKeyAndCertificate(anyObject())).thenThrow( + new CertificateException("renewAndStoreKeyAndCert failed ", + CertificateException.ErrorCode.ROLLBACK_ERROR)); + + // create Ozone Manager instance, it will start the monitor task + conf.set(OZONE_SCM_CLIENT_ADDRESS_KEY, "localhost"); + om = OzoneManager.createOm(conf); + om.setCertClient(mockClient); + + // check error message during renew + GenericTestUtils.waitFor(() -> omLogs.getOutput().contains( + "OzoneManage shutdown because certificate rollback failure."), + 1000, certificateLifetime * 1000); + } + public void validateCertificate(X509Certificate cert) throws Exception { // Assert that we indeed have a self signed certificate. @@ -871,4 +1106,23 @@ private void initializeOmStorage(OMStorage omStorage) throws IOException { } omStorage.initialize(); } + + private static X509CertificateHolder generateX509CertHolder( + OzoneConfiguration conf, KeyPair keyPair, LocalDateTime startDate, + Duration certLifetime) throws Exception { + if (keyPair == null) { + keyPair = KeyStoreTestUtil.generateKeyPair("RSA"); + } + LocalDateTime start = startDate == null ? LocalDateTime.now() : startDate; + LocalDateTime end = start.plus(certLifetime); + return SelfSignedCertificate.newBuilder() + .setBeginDate(start) + .setEndDate(end) + .setClusterID("cluster") + .setKey(keyPair) + .setSubject("localhost") + .setConfiguration(conf) + .setScmID("test") + .build(); + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java index 19c570cd0fce..89a17f7b9528 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java @@ -261,7 +261,7 @@ private void testRpcPermissionWithConf( private void verifyPermissionDeniedException(Exception e, String userName) { String expectedErrorMessage = "Access denied for user " - + userName + ". " + "Superuser privilege is required."; + + userName + ". " + "SCM superuser privilege is required."; Assert.assertTrue(e instanceof IOException); Assert.assertEquals(expectedErrorMessage, e.getMessage()); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManagerHelper.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManagerHelper.java index 3ff60412322f..2d031ada23d4 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManagerHelper.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManagerHelper.java @@ -112,7 +112,7 @@ public List getPendingDeletionBlocks(Long containerID) for (Table.KeyValue entry : kvs) { pendingDeletionBlocks - .add(entry.getKey().replace(cData.deletingBlockKeyPrefix(), "")); + .add(entry.getKey().replace(cData.getDeletingBlockKeyPrefix(), "")); } } return pendingDeletionBlocks; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/CertificateClientTestImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/CertificateClientTestImpl.java index 253d45355a3e..aed1a3852233 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/CertificateClientTestImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/CertificateClientTestImpl.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.file.Path; import java.security.KeyPair; import java.security.PrivateKey; import java.security.PublicKey; @@ -25,11 +26,16 @@ import java.security.cert.X509Certificate; import java.time.Duration; import java.time.LocalDateTime; +import java.time.ZoneId; import java.util.Collections; +import java.util.Date; import java.util.List; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.security.ssl.KeyStoresFactory; import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.authority.DefaultApprover; +import org.apache.hadoop.hdds.security.x509.certificate.authority.PKIProfiles.DefaultProfile; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.certificates.utils.SelfSignedCertificate; @@ -37,11 +43,15 @@ import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; +import org.apache.hadoop.hdds.security.x509.keys.SecurityUtil; import org.bouncycastle.cert.X509CertificateHolder; import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DEFAULT_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DEFAULT_DURATION_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_MAX_DURATION; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_MAX_DURATION_DEFAULT; /** * Test implementation for CertificateClient. To be used only for test @@ -50,10 +60,16 @@ public class CertificateClientTestImpl implements CertificateClient { - private final SecurityConfig securityConfig; - private final KeyPair keyPair; private final OzoneConfiguration config; - private final X509Certificate x509Certificate; + private final SecurityConfig securityConfig; + private KeyPair keyPair; + private X509Certificate x509Certificate; + private final KeyPair rootKeyPair; + private final X509Certificate rootCert; + private HDDSKeyGenerator keyGen; + private DefaultApprover approver; + private KeyStoresFactory serverKeyStoresFactory; + private KeyStoresFactory clientKeyStoresFactory; public CertificateClientTestImpl(OzoneConfiguration conf) throws Exception { this(conf, true); @@ -62,32 +78,59 @@ public CertificateClientTestImpl(OzoneConfiguration conf) throws Exception { public CertificateClientTestImpl(OzoneConfiguration conf, boolean rootCA) throws Exception { securityConfig = new SecurityConfig(conf); - HDDSKeyGenerator keyGen = - new HDDSKeyGenerator(securityConfig.getConfiguration()); + keyGen = new HDDSKeyGenerator(securityConfig.getConfiguration()); keyPair = keyGen.generateKey(); + rootKeyPair = keyGen.generateKey(); config = conf; LocalDateTime start = LocalDateTime.now(); - String certDurationString = conf.get(HDDS_X509_DEFAULT_DURATION, - HDDS_X509_DEFAULT_DURATION_DEFAULT); - Duration certDuration = Duration.parse(certDurationString); - LocalDateTime end = start.plus(certDuration); + String rootCACertDuration = conf.get(HDDS_X509_MAX_DURATION, + HDDS_X509_MAX_DURATION_DEFAULT); + LocalDateTime end = start.plus(Duration.parse(rootCACertDuration)); + // Generate RootCA certificate SelfSignedCertificate.Builder builder = SelfSignedCertificate.newBuilder() .setBeginDate(start) .setEndDate(end) .setClusterID("cluster1") - .setKey(keyPair) - .setSubject("localhost") + .setKey(rootKeyPair) + .setSubject("rootCA@localhost") .setConfiguration(config) - .setScmID("TestScmId1"); - if (rootCA) { - builder.makeCA(); - } - X509CertificateHolder certificateHolder = null; - certificateHolder = builder.build(); - x509Certificate = new JcaX509CertificateConverter().getCertificate( - certificateHolder); + .setScmID("scm1") + .makeCA(); + rootCert = new JcaX509CertificateConverter().getCertificate( + builder.build()); + + // Generate normal certificate, signed by RootCA certificate + approver = new DefaultApprover(new DefaultProfile(), securityConfig); + + CertificateSignRequest.Builder csrBuilder = getCSRBuilder(); + // Get host name. + csrBuilder.setKey(keyPair) + .setConfiguration(config) + .setScmID("scm1") + .setClusterID("cluster1") + .setSubject("localhost") + .setDigitalSignature(true) + .setDigitalEncryption(true); + + start = LocalDateTime.now(); + String certDuration = conf.get(HDDS_X509_DEFAULT_DURATION, + HDDS_X509_DEFAULT_DURATION_DEFAULT); + X509CertificateHolder certificateHolder = + approver.sign(securityConfig, rootKeyPair.getPrivate(), + new X509CertificateHolder(rootCert.getEncoded()), + Date.from(start.atZone(ZoneId.systemDefault()).toInstant()), + Date.from(start.plus(Duration.parse(certDuration)) + .atZone(ZoneId.systemDefault()).toInstant()), + csrBuilder.build(), "scm1", "cluster1"); + x509Certificate = + new JcaX509CertificateConverter().getCertificate(certificateHolder); + + serverKeyStoresFactory = SecurityUtil.getServerKeyStoresFactory( + securityConfig, this, true); + clientKeyStoresFactory = SecurityUtil.getClientKeyStoresFactory( + securityConfig, this, true); } @Override @@ -119,7 +162,7 @@ public X509Certificate getCertificate() { @Override public X509Certificate getCACertificate() { - return x509Certificate; + return rootCert; } @Override @@ -127,6 +170,10 @@ public boolean verifyCertificate(X509Certificate certificate) { return true; } + @Override + public void setCertificateId(String certSerialId) { + } + @Override public byte[] signDataStream(InputStream stream) throws CertificateException { @@ -150,11 +197,29 @@ public boolean verifySignature(byte[] data, byte[] signature, return true; } + @Override + public CertificateSignRequest.Builder getCSRBuilder(KeyPair key) + throws CertificateException { + return null; + } + @Override public CertificateSignRequest.Builder getCSRBuilder() { return new CertificateSignRequest.Builder(); } + @Override + public String signAndStoreCertificate(PKCS10CertificationRequest request, + Path certPath) throws CertificateException { + return null; + } + + @Override + public String signAndStoreCertificate(PKCS10CertificationRequest request) + throws CertificateException { + return null; + } + @Override public X509Certificate queryCertificate(String query) { return null; @@ -255,4 +320,53 @@ public boolean processCrl(CRLInfo crl) { return false; } + public void renewKey() throws Exception { + KeyPair newKeyPair = keyGen.generateKey(); + CertificateSignRequest.Builder csrBuilder = getCSRBuilder(); + // Get host name. + csrBuilder.setKey(newKeyPair) + .setConfiguration(config) + .setScmID("scm1") + .setClusterID("cluster1") + .setSubject("localhost") + .setDigitalSignature(true); + + String certDuration = config.get(HDDS_X509_DEFAULT_DURATION, + HDDS_X509_DEFAULT_DURATION_DEFAULT); + Date start = new Date(); + X509CertificateHolder certificateHolder = + approver.sign(securityConfig, rootKeyPair.getPrivate(), + new X509CertificateHolder(rootCert.getEncoded()), start, + new Date(start.getTime() + Duration.parse(certDuration).toMillis()), + csrBuilder.build(), "scm1", "cluster1"); + X509Certificate newX509Certificate = + new JcaX509CertificateConverter().getCertificate(certificateHolder); + + // Save the new private key and certificate to file + // Save certificate and private key to keyStore + keyPair = newKeyPair; + x509Certificate = newX509Certificate; + System.out.println(new Date() + " certificated is renewed"); + } + + @Override + public KeyStoresFactory getServerKeyStoresFactory() { + return serverKeyStoresFactory; + } + + @Override + public KeyStoresFactory getClientKeyStoresFactory() { + return clientKeyStoresFactory; + } + + @Override + public void close() throws IOException { + if (serverKeyStoresFactory != null) { + serverKeyStoresFactory.destroy(); + } + + if (clientKeyStoresFactory != null) { + clientKeyStoresFactory.destroy(); + } + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockDataStreamOutput.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockDataStreamOutput.java new file mode 100644 index 000000000000..f232a9298e5f --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestBlockDataStreamOutput.java @@ -0,0 +1,273 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.client.rpc; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.ReplicationType; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientMetrics; +import org.apache.hadoop.hdds.scm.storage.BlockDataStreamOutput; +import org.apache.hadoop.hdds.scm.storage.ByteBufferStreamOutput; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.io.BlockDataStreamOutputEntry; +import org.apache.hadoop.ozone.client.io.KeyDataStreamOutput; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; +import org.apache.hadoop.ozone.container.ContainerTestHelper; +import org.apache.hadoop.ozone.container.TestHelper; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; + +/** + * Tests BlockDataStreamOutput class. + */ +public class TestBlockDataStreamOutput { + + /** + * Set a timeout for each test. + */ + @Rule + public Timeout timeout = Timeout.seconds(300); + private static MiniOzoneCluster cluster; + private static OzoneConfiguration conf = new OzoneConfiguration(); + private static OzoneClient client; + private static ObjectStore objectStore; + private static int chunkSize; + private static int flushSize; + private static int maxFlushSize; + private static int blockSize; + private static String volumeName; + private static String bucketName; + private static String keyString; + + /** + * Create a MiniDFSCluster for testing. + *

+ * Ozone is made active by setting OZONE_ENABLED = true + * + * @throws IOException + */ + @BeforeClass + public static void init() throws Exception { + chunkSize = 100; + flushSize = 2 * chunkSize; + maxFlushSize = 2 * flushSize; + blockSize = 2 * maxFlushSize; + + OzoneClientConfig clientConfig = conf.getObject(OzoneClientConfig.class); + conf.setFromObject(clientConfig); + + conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS); + conf.setQuietMode(false); + conf.setStorageSize(OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE, 4, + StorageUnit.MB); + + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(7) + .setTotalPipelineNumLimit(10) + .setBlockSize(blockSize) + .setChunkSize(chunkSize) + .setStreamBufferFlushSize(flushSize) + .setStreamBufferMaxSize(maxFlushSize) + .setDataStreamBufferFlushize(maxFlushSize) + .setStreamBufferSizeUnit(StorageUnit.BYTES) + .setDataStreamMinPacketSize(chunkSize) + .setDataStreamStreamWindowSize(5 * chunkSize) + .build(); + cluster.waitForClusterToBeReady(); + //the easiest way to create an open container is creating a key + client = OzoneClientFactory.getRpcClient(conf); + objectStore = client.getObjectStore(); + keyString = UUID.randomUUID().toString(); + volumeName = "testblockoutputstream"; + bucketName = volumeName; + objectStore.createVolume(volumeName); + objectStore.getVolume(volumeName).createBucket(bucketName); + } + + static String getKeyName() { + return UUID.randomUUID().toString(); + } + + /** + * Shutdown MiniDFSCluster. + */ + @AfterClass + public static void shutdown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testHalfChunkWrite() throws Exception { + testWrite(chunkSize / 2); + testWriteWithFailure(chunkSize / 2); + } + + @Test + public void testSingleChunkWrite() throws Exception { + testWrite(chunkSize); + testWriteWithFailure(chunkSize); + } + + @Test + public void testMultiChunkWrite() throws Exception { + testWrite(chunkSize + 50); + testWriteWithFailure(chunkSize + 50); + } + + @Test + public void testMultiBlockWrite() throws Exception { + testWrite(blockSize + 50); + testWriteWithFailure(blockSize + 50); + } + + static void testWrite(int dataLength) throws Exception { + String keyName = getKeyName(); + OzoneDataStreamOutput key = createKey( + keyName, ReplicationType.RATIS, dataLength); + final byte[] data = ContainerTestHelper.generateData(dataLength, false); + key.write(ByteBuffer.wrap(data)); + // now close the stream, It will update the key length. + key.close(); + validateData(keyName, data); + } + + private void testWriteWithFailure(int dataLength) throws Exception { + String keyName = getKeyName(); + OzoneDataStreamOutput key = createKey( + keyName, ReplicationType.RATIS, dataLength); + byte[] data = + ContainerTestHelper.getFixedLengthString(keyString, dataLength) + .getBytes(UTF_8); + ByteBuffer b = ByteBuffer.wrap(data); + key.write(b); + KeyDataStreamOutput keyDataStreamOutput = + (KeyDataStreamOutput) key.getByteBufStreamOutput(); + ByteBufferStreamOutput stream = + keyDataStreamOutput.getStreamEntries().get(0).getByteBufStreamOutput(); + Assert.assertTrue(stream instanceof BlockDataStreamOutput); + TestHelper.waitForContainerClose(key, cluster); + key.write(b); + key.close(); + String dataString = new String(data, UTF_8); + validateData(keyName, dataString.concat(dataString).getBytes(UTF_8)); + } + + @Test + public void testPutBlockAtBoundary() throws Exception { + int dataLength = 500; + XceiverClientMetrics metrics = + XceiverClientManager.getXceiverClientMetrics(); + long putBlockCount = metrics.getContainerOpCountMetrics( + ContainerProtos.Type.PutBlock); + long pendingPutBlockCount = metrics.getPendingContainerOpCountMetrics( + ContainerProtos.Type.PutBlock); + String keyName = getKeyName(); + OzoneDataStreamOutput key = createKey( + keyName, ReplicationType.RATIS, 0); + byte[] data = + ContainerTestHelper.getFixedLengthString(keyString, dataLength) + .getBytes(UTF_8); + key.write(ByteBuffer.wrap(data)); + Assert.assertTrue( + metrics.getPendingContainerOpCountMetrics(ContainerProtos.Type.PutBlock) + <= pendingPutBlockCount + 1); + key.close(); + // Since data length is 500 , first putBlock will be at 400(flush boundary) + // and the other at 500 + Assert.assertTrue( + metrics.getContainerOpCountMetrics(ContainerProtos.Type.PutBlock) + == putBlockCount + 2); + validateData(keyName, data); + } + + + static OzoneDataStreamOutput createKey(String keyName, ReplicationType type, + long size) throws Exception { + return TestHelper.createStreamKey( + keyName, type, size, objectStore, volumeName, bucketName); + } + static void validateData(String keyName, byte[] data) throws Exception { + TestHelper.validateData( + keyName, data, objectStore, volumeName, bucketName); + } + + + @Test + public void testMinPacketSize() throws Exception { + String keyName = getKeyName(); + XceiverClientMetrics metrics = + XceiverClientManager.getXceiverClientMetrics(); + OzoneDataStreamOutput key = createKey(keyName, ReplicationType.RATIS, 0); + long writeChunkCount = + metrics.getContainerOpCountMetrics(ContainerProtos.Type.WriteChunk); + byte[] data = + ContainerTestHelper.getFixedLengthString(keyString, chunkSize / 2) + .getBytes(UTF_8); + key.write(ByteBuffer.wrap(data)); + // minPacketSize= 100, so first write of 50 wont trigger a writeChunk + Assert.assertEquals(writeChunkCount, + metrics.getContainerOpCountMetrics(ContainerProtos.Type.WriteChunk)); + key.write(ByteBuffer.wrap(data)); + Assert.assertEquals(writeChunkCount + 1, + metrics.getContainerOpCountMetrics(ContainerProtos.Type.WriteChunk)); + // now close the stream, It will update the key length. + key.close(); + String dataString = new String(data, UTF_8); + validateData(keyName, dataString.concat(dataString).getBytes(UTF_8)); + } + + @Test + public void testTotalAckDataLength() throws Exception { + int dataLength = 400; + String keyName = getKeyName(); + OzoneDataStreamOutput key = createKey( + keyName, ReplicationType.RATIS, 0); + byte[] data = + ContainerTestHelper.getFixedLengthString(keyString, dataLength) + .getBytes(UTF_8); + KeyDataStreamOutput keyDataStreamOutput = + (KeyDataStreamOutput) key.getByteBufStreamOutput(); + BlockDataStreamOutputEntry stream = + keyDataStreamOutput.getStreamEntries().get(0); + key.write(ByteBuffer.wrap(data)); + key.close(); + Assert.assertEquals(dataLength, stream.getTotalAckDataLength()); + } + +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineStream.java new file mode 100644 index 000000000000..ad9eca6af70a --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineStream.java @@ -0,0 +1,219 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.client.rpc; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.ReplicationType; +import org.apache.hadoop.hdds.conf.DatanodeRatisServerConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.ratis.conf.RatisClientConfig; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.io.KeyDataStreamOutput; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; +import org.apache.hadoop.ozone.container.ContainerTestHelper; +import org.apache.hadoop.ozone.container.TestHelper; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_COMMAND_STATUS_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; + +/** + * Tests the containerStateMachine stream handling. + */ +public class TestContainerStateMachineStream { + + /** + * Set a timeout for each test. + */ + @Rule + public Timeout timeout = Timeout.seconds(300); + + private MiniOzoneCluster cluster; + private OzoneConfiguration conf = new OzoneConfiguration(); + private OzoneClient client; + private ObjectStore objectStore; + private String volumeName; + private String bucketName; + + private static final int CHUNK_SIZE = 100; + private static final int FLUSH_SIZE = 2 * CHUNK_SIZE; + private static final int MAX_FLUSH_SIZE = 2 * FLUSH_SIZE; + private static final int BLOCK_SIZE = 2 * MAX_FLUSH_SIZE; + + /** + * Create a MiniDFSCluster for testing. + * + * @throws IOException + */ + @Before + public void setup() throws Exception { + conf = new OzoneConfiguration(); + + OzoneClientConfig clientConfig = conf.getObject(OzoneClientConfig.class); + clientConfig.setStreamBufferFlushDelay(false); + conf.setFromObject(clientConfig); + + conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, + TimeUnit.MILLISECONDS); + conf.setTimeDuration(HDDS_COMMAND_STATUS_REPORT_INTERVAL, 200, + TimeUnit.MILLISECONDS); + conf.setTimeDuration(HDDS_PIPELINE_REPORT_INTERVAL, 200, + TimeUnit.MILLISECONDS); + conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 30, TimeUnit.SECONDS); + conf.setTimeDuration(OZONE_SCM_PIPELINE_DESTROY_TIMEOUT, 1, + TimeUnit.SECONDS); + + RatisClientConfig ratisClientConfig = + conf.getObject(RatisClientConfig.class); + ratisClientConfig.setWriteRequestTimeout(Duration.ofSeconds(10)); + ratisClientConfig.setWatchRequestTimeout(Duration.ofSeconds(10)); + conf.setFromObject(ratisClientConfig); + + DatanodeRatisServerConfig ratisServerConfig = + conf.getObject(DatanodeRatisServerConfig.class); + ratisServerConfig.setRequestTimeOut(Duration.ofSeconds(3)); + ratisServerConfig.setWatchTimeOut(Duration.ofSeconds(10)); + conf.setFromObject(ratisServerConfig); + + RatisClientConfig.RaftConfig raftClientConfig = + conf.getObject(RatisClientConfig.RaftConfig.class); + raftClientConfig.setRpcRequestTimeout(Duration.ofSeconds(3)); + raftClientConfig.setRpcWatchRequestTimeout(Duration.ofSeconds(10)); + conf.setFromObject(raftClientConfig); + + conf.setLong(OzoneConfigKeys.DFS_RATIS_SNAPSHOT_THRESHOLD_KEY, 1); + conf.setQuietMode(false); + cluster = + MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(3) + .setHbInterval(200) + .setDataStreamMinPacketSize(1024) + .setBlockSize(BLOCK_SIZE) + .setChunkSize(CHUNK_SIZE) + .setStreamBufferFlushSize(FLUSH_SIZE) + .setStreamBufferMaxSize(MAX_FLUSH_SIZE) + .setStreamBufferSizeUnit(StorageUnit.BYTES) + .build(); + cluster.waitForClusterToBeReady(); + cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 60000); + //the easiest way to create an open container is creating a key + client = OzoneClientFactory.getRpcClient(conf); + objectStore = client.getObjectStore(); + + volumeName = "testcontainerstatemachinestream"; + bucketName = "teststreambucket"; + objectStore.createVolume(volumeName); + objectStore.getVolume(volumeName).createBucket(bucketName); + + } + + /** + * Shutdown MiniDFSCluster. + */ + @After + public void shutdown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testContainerStateMachineForStreaming() throws Exception { + long size = CHUNK_SIZE + 1; + + OzoneDataStreamOutput key = TestHelper.createStreamKey( + "ozone-stream-test.txt", ReplicationType.RATIS, size, objectStore, + volumeName, bucketName); + + byte[] data = ContainerTestHelper.generateData((int) size, true); + key.write(ByteBuffer.wrap(data)); + key.flush(); + + KeyDataStreamOutput streamOutput = + (KeyDataStreamOutput) key.getByteBufStreamOutput(); + List locationInfoList = + streamOutput.getLocationInfoList(); + + key.close(); + + OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0); + HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, + cluster); + + long bytesUsed = dn.getDatanodeStateMachine() + .getContainer().getContainerSet() + .getContainer(omKeyLocationInfo.getContainerID()). + getContainerData().getBytesUsed(); + + Assert.assertTrue(bytesUsed == size); + } + + + @Test + public void testContainerStateMachineForStreamingSmallFile() + throws Exception { + long size = CHUNK_SIZE - 1; + + OzoneDataStreamOutput key = TestHelper.createStreamKey( + "ozone-stream-test-small-file.txt", ReplicationType.RATIS, size, + objectStore, volumeName, bucketName); + + byte[] data = ContainerTestHelper.generateData((int) size, true); + key.write(ByteBuffer.wrap(data)); + key.flush(); + + KeyDataStreamOutput streamOutput = + (KeyDataStreamOutput) key.getByteBufStreamOutput(); + List locationInfoList = + streamOutput.getLocationInfoList(); + key.close(); + OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0); + HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, + cluster); + + long bytesUsed = dn.getDatanodeStateMachine() + .getContainer().getContainerSet() + .getContainer(omKeyLocationInfo.getContainerID()). + getContainerData().getBytesUsed(); + + Assert.assertTrue(bytesUsed == size); + } + +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java index e638ab5e0204..068bdc721236 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestECKeyOutputStream.java @@ -399,19 +399,25 @@ public void testWriteShouldSucceedWhenDNKilled() throws Exception { try (OzoneOutputStream out = bucket.createKey(keyName, 1024, new ECReplicationConfig(3, 2, ECReplicationConfig.EcCodec.RS, chunkSize), new HashMap<>())) { + ECKeyOutputStream ecOut = (ECKeyOutputStream) out.getOutputStream(); out.write(inputData); // Kill a node from first pipeline - nodeToKill = - ((ECKeyOutputStream) out.getOutputStream()).getStreamEntries() - .get(0).getPipeline().getFirstNode(); + nodeToKill = ecOut.getStreamEntries() + .get(0).getPipeline().getFirstNode(); cluster.shutdownHddsDatanode(nodeToKill); out.write(inputData); - // Check the second blockGroup pipeline to make sure that the failed not - // is not selected. - Assert.assertFalse( - ((ECKeyOutputStream) out.getOutputStream()).getStreamEntries() - .get(1).getPipeline().getNodes().contains(nodeToKill)); + + // Wait for flushing thread to finish its work. + final long checkpoint = System.currentTimeMillis(); + ecOut.insertFlushCheckpoint(checkpoint); + GenericTestUtils.waitFor(() -> ecOut.getFlushCheckpoint() == checkpoint, + 100, 10000); + + // Check the second blockGroup pipeline to make sure that the failed + // node is not selected. + Assert.assertFalse(ecOut.getStreamEntries() + .get(1).getPipeline().getNodes().contains(nodeToKill)); } try (OzoneInputStream is = bucket.readKey(keyName)) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java index 6fa5324af606..7ddb6e13d3b6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java @@ -268,7 +268,7 @@ private void testBlockCountOnFailures(OmKeyInfo omKeyInfo) throws Exception { .getContainer(containerId1)).getContainerData(); try (DBHandle containerDb1 = BlockUtils.getDB(containerData1, conf)) { BlockData blockData1 = containerDb1.getStore().getBlockDataTable().get( - containerData1.blockKey(locationList.get(0).getBlockID() + containerData1.getBlockKey(locationList.get(0).getBlockID() .getLocalID())); // The first Block could have 1 or 2 chunkSize of data int block1NumChunks = blockData1.getChunks().size(); @@ -287,7 +287,7 @@ private void testBlockCountOnFailures(OmKeyInfo omKeyInfo) throws Exception { .getContainer(containerId2)).getContainerData(); try (DBHandle containerDb2 = BlockUtils.getDB(containerData2, conf)) { BlockData blockData2 = containerDb2.getStore().getBlockDataTable().get( - containerData2.blockKey(locationList.get(1).getBlockID() + containerData2.getBlockKey(locationList.get(1).getBlockID() .getLocalID())); // The second Block should have 0.5 chunkSize of data Assert.assertEquals(block2ExpectedChunkCount, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneAtRestEncryption.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneAtRestEncryption.java index 563e628f94c7..9a649b07f868 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneAtRestEncryption.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneAtRestEncryption.java @@ -57,7 +57,7 @@ import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.OzoneKeyDetails; import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.client.io.MultipartCryptoKeyInputStream; +import org.apache.hadoop.hdds.scm.storage.MultipartInputStream; import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.om.OMMetadataManager; @@ -497,7 +497,9 @@ public void testMultipartUploadWithEncryption(OzoneBucket bucket, // Create an input stream to read the data OzoneInputStream inputStream = bucket.readKey(keyName); - Assert.assertTrue(inputStream instanceof MultipartCryptoKeyInputStream); + + Assert.assertTrue(inputStream.getInputStream() + instanceof MultipartInputStream); // Test complete read byte[] completeRead = new byte[keySize]; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientMultipartUploadWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientMultipartUploadWithFSO.java index 262b444cc9ec..d26802497159 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientMultipartUploadWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneClientMultipartUploadWithFSO.java @@ -17,7 +17,10 @@ package org.apache.hadoop.ozone.client.rpc; +import java.util.HashMap; import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.hdds.client.DefaultReplicationConfig; +import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; @@ -26,6 +29,7 @@ import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.OzoneTestUtils; +import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; @@ -52,12 +56,14 @@ import static org.apache.hadoop.hdds.StringUtils.string2Bytes; import static org.apache.hadoop.hdds.client.ReplicationFactor.THREE; +import org.apache.hadoop.ozone.om.helpers.QuotaUtil; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.ozone.test.GenericTestUtils; import org.junit.AfterClass; import org.junit.Assert; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; @@ -98,6 +104,11 @@ public class TestOzoneClientMultipartUploadWithFSO { */ @Rule public Timeout timeout = new Timeout(300000); + private String volumeName; + private String bucketName; + private String keyName; + private OzoneVolume volume; + private OzoneBucket bucket; /** * Create a MiniOzoneCluster for testing. @@ -129,7 +140,7 @@ public static void shutdown() throws IOException { */ static void startCluster(OzoneConfiguration conf) throws Exception { cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(3) + .setNumDatanodes(5) .setTotalPipelineNumLimit(10) .setScmId(scmId) .build(); @@ -150,52 +161,35 @@ static void shutdownCluster() throws IOException { cluster.shutdown(); } } - - @Test - public void testInitiateMultipartUploadWithReplicationInformationSet() throws - IOException { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); + + @Before + public void preTest() throws Exception { + volumeName = UUID.randomUUID().toString(); + bucketName = UUID.randomUUID().toString(); + keyName = UUID.randomUUID().toString(); store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); + volume = store.getVolume(volumeName); volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - OmMultipartInfo multipartInfo = bucket.initiateMultipartUpload(keyName, - RATIS, ONE); + bucket = volume.getBucket(bucketName); + } - Assert.assertNotNull(multipartInfo); - String uploadID = multipartInfo.getUploadID(); - Assert.assertEquals(volumeName, multipartInfo.getVolumeName()); - Assert.assertEquals(bucketName, multipartInfo.getBucketName()); - Assert.assertEquals(keyName, multipartInfo.getKeyName()); - Assert.assertNotNull(multipartInfo.getUploadID()); + @Test + public void testInitiateMultipartUploadWithReplicationInformationSet() throws + IOException { + String uploadID = initiateMultipartUpload(bucket, keyName, + ReplicationType.RATIS, ONE); // Call initiate multipart upload for the same key again, this should // generate a new uploadID. - multipartInfo = bucket.initiateMultipartUpload(keyName, - RATIS, ONE); - - Assert.assertNotNull(multipartInfo); - Assert.assertEquals(volumeName, multipartInfo.getVolumeName()); - Assert.assertEquals(bucketName, multipartInfo.getBucketName()); - Assert.assertEquals(keyName, multipartInfo.getKeyName()); - Assert.assertNotEquals(multipartInfo.getUploadID(), uploadID); - Assert.assertNotNull(multipartInfo.getUploadID()); + String uploadIDNew = initiateMultipartUpload(bucket, keyName, + ReplicationType.RATIS, ONE); + Assert.assertNotEquals(uploadIDNew, uploadID); } @Test public void testInitiateMultipartUploadWithDefaultReplication() throws IOException { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); OmMultipartInfo multipartInfo = bucket.initiateMultipartUpload(keyName); Assert.assertNotNull(multipartInfo); @@ -219,24 +213,9 @@ public void testInitiateMultipartUploadWithDefaultReplication() throws @Test public void testUploadPartWithNoOverride() throws IOException { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); String sampleData = "sample Value"; - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - OmMultipartInfo multipartInfo = bucket.initiateMultipartUpload(keyName, - RATIS, ONE); - - Assert.assertNotNull(multipartInfo); - String uploadID = multipartInfo.getUploadID(); - Assert.assertEquals(volumeName, multipartInfo.getVolumeName()); - Assert.assertEquals(bucketName, multipartInfo.getBucketName()); - Assert.assertEquals(keyName, multipartInfo.getKeyName()); - Assert.assertNotNull(multipartInfo.getUploadID()); + String uploadID = initiateMultipartUpload(bucket, keyName, + ReplicationType.RATIS, ONE); OzoneOutputStream ozoneOutputStream = bucket.createMultipartKey(keyName, sampleData.length(), 1, uploadID); @@ -251,53 +230,18 @@ public void testUploadPartWithNoOverride() throws IOException { } @Test - public void testUploadPartOverrideWithRatis() throws IOException { - - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); + public void testUploadPartOverrideWithRatis() throws Exception { String sampleData = "sample Value"; - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - OmMultipartInfo multipartInfo = bucket.initiateMultipartUpload(keyName, - ReplicationType.RATIS, THREE); - - Assert.assertNotNull(multipartInfo); - String uploadID = multipartInfo.getUploadID(); - Assert.assertEquals(volumeName, multipartInfo.getVolumeName()); - Assert.assertEquals(bucketName, multipartInfo.getBucketName()); - Assert.assertEquals(keyName, multipartInfo.getKeyName()); - Assert.assertNotNull(multipartInfo.getUploadID()); + String uploadID = initiateMultipartUpload(bucket, keyName, + ReplicationType.RATIS, THREE); int partNumber = 1; - - OzoneOutputStream ozoneOutputStream = bucket.createMultipartKey(keyName, - sampleData.length(), partNumber, uploadID); - ozoneOutputStream.write(string2Bytes(sampleData), 0, sampleData.length()); - ozoneOutputStream.close(); - - OmMultipartCommitUploadPartInfo commitUploadPartInfo = ozoneOutputStream - .getCommitUploadPartInfo(); - - Assert.assertNotNull(commitUploadPartInfo); - String partName = commitUploadPartInfo.getPartName(); - Assert.assertNotNull(commitUploadPartInfo.getPartName()); + String partName = uploadPart(bucket, keyName, uploadID, partNumber, + sampleData.getBytes(UTF_8)); //Overwrite the part by creating part key with same part number. - sampleData = "sample Data Changed"; - ozoneOutputStream = bucket.createMultipartKey(keyName, - sampleData.length(), partNumber, uploadID); - ozoneOutputStream.write(string2Bytes(sampleData), 0, "name".length()); - ozoneOutputStream.close(); - - commitUploadPartInfo = ozoneOutputStream - .getCommitUploadPartInfo(); - - Assert.assertNotNull(commitUploadPartInfo); - Assert.assertNotNull(commitUploadPartInfo.getPartName()); + String partNameNew = uploadPart(bucket, keyName, uploadID, partNumber, + "name".getBytes(UTF_8)); // PartName should be same from old part Name. // AWS S3 for same content generates same partName during upload part. @@ -308,20 +252,94 @@ public void testUploadPartOverrideWithRatis() throws IOException { // of content in ozone s3. This will make S3 Mpu completeMPU pass when // comparing part names and large file uploads work using aws cp. Assert.assertEquals("Part names should be same", partName, - commitUploadPartInfo.getPartName()); + partNameNew); + + // old part bytes written needs discard and have only + // new part bytes in quota for this bucket + long byteWritten = "name".length() * 3; // data written with replication + Assert.assertEquals(volume.getBucket(bucketName).getUsedBytes(), + byteWritten); } @Test - public void testMultipartUploadWithPartsLessThanMinSize() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); + public void testUploadTwiceWithEC() throws IOException { + bucketName = UUID.randomUUID().toString(); + bucket = getOzoneECBucket(bucketName); - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); + byte[] data = generateData(81920, (byte) 97); + // perform upload and complete + OmMultipartInfo multipartInfo = bucket.initiateMultipartUpload(keyName); + + String uploadID = multipartInfo.getUploadID(); + int partNumber = 1; + + String partName = uploadPart(bucket, keyName, uploadID, partNumber, + data); + + Map partsMap = new HashMap<>(); + partsMap.put(partNumber, partName); + bucket.completeMultipartUpload(keyName, uploadID, partsMap); + + long replicatedSize = QuotaUtil.getReplicatedSize(data.length, + bucket.getReplicationConfig()); + Assert.assertEquals(volume.getBucket(bucketName).getUsedBytes(), + replicatedSize); + + //upload same key again + multipartInfo = bucket.initiateMultipartUpload(keyName); + uploadID = multipartInfo.getUploadID(); + partName = uploadPart(bucket, keyName, uploadID, partNumber, + data); + + partsMap = new HashMap<>(); + partsMap.put(partNumber, partName); + bucket.completeMultipartUpload(keyName, uploadID, partsMap); + + // used sized should remain same, overwrite previous upload + Assert.assertEquals(volume.getBucket(bucketName).getUsedBytes(), + replicatedSize); + } + + @Test + public void testUploadAbortWithEC() throws IOException { + byte[] data = generateData(81920, (byte) 97); + + bucketName = UUID.randomUUID().toString(); + bucket = getOzoneECBucket(bucketName); + + // perform upload and abort + OmMultipartInfo multipartInfo = bucket.initiateMultipartUpload(keyName); + + String uploadID = multipartInfo.getUploadID(); + int partNumber = 1; + uploadPart(bucket, keyName, uploadID, partNumber, data); + + long replicatedSize = QuotaUtil.getReplicatedSize(data.length, + bucket.getReplicationConfig()); + Assert.assertEquals(volume.getBucket(bucketName).getUsedBytes(), + replicatedSize); + + bucket.abortMultipartUpload(keyName, uploadID); + + // used size should become zero after aport upload + Assert.assertEquals(volume.getBucket(bucketName).getUsedBytes(), 0); + } + + private OzoneBucket getOzoneECBucket(String myBucket) + throws IOException { + final BucketArgs.Builder bucketArgs = BucketArgs.newBuilder(); + bucketArgs.setDefaultReplicationConfig( + new DefaultReplicationConfig(ReplicationType.EC, + new ECReplicationConfig(3, 2, ECReplicationConfig.EcCodec.RS, + 1024))); + + volume.createBucket(myBucket, bucketArgs.build()); + return volume.getBucket(myBucket); + } + + @Test + public void testMultipartUploadWithPartsLessThanMinSize() throws Exception { // Initiate multipart upload String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); @@ -345,15 +363,6 @@ public void testMultipartUploadWithPartsLessThanMinSize() throws Exception { @Test public void testMultipartUploadWithPartsMisMatchWithListSizeDifferent() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); @@ -369,15 +378,6 @@ public void testMultipartUploadWithPartsMisMatchWithListSizeDifferent() @Test public void testMultipartUploadWithPartsMisMatchWithIncorrectPartName() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); @@ -393,15 +393,6 @@ public void testMultipartUploadWithPartsMisMatchWithIncorrectPartName() @Test public void testMultipartUploadWithMissingParts() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); @@ -417,22 +408,11 @@ public void testMultipartUploadWithMissingParts() throws Exception { @Test public void testCommitPartAfterCompleteUpload() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); String parentDir = "a/b/c/d/"; - String keyName = parentDir + UUID.randomUUID().toString(); + keyName = parentDir + UUID.randomUUID().toString(); + String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - - OmMultipartInfo omMultipartInfo = bucket.initiateMultipartUpload(keyName, - RATIS, ONE); - - Assert.assertNotNull(omMultipartInfo.getUploadID()); - - String uploadID = omMultipartInfo.getUploadID(); + Assert.assertEquals(volume.getBucket(bucketName).getUsedNamespace(), 4); // upload part 1. byte[] data = generateData(5 * 1024 * 1024, @@ -447,7 +427,7 @@ public void testCommitPartAfterCompleteUpload() throws Exception { // Do not close output stream for part 2. ozoneOutputStream = bucket.createMultipartKey(keyName, - data.length, 2, omMultipartInfo.getUploadID()); + data.length, 2, uploadID); ozoneOutputStream.write(data, 0, data.length); Map partsMap = new LinkedHashMap<>(); @@ -481,44 +461,26 @@ public void testCommitPartAfterCompleteUpload() throws Exception { @Test public void testAbortUploadFail() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - OzoneTestUtils.expectOmException(NO_SUCH_MULTIPART_UPLOAD_ERROR, () -> bucket.abortMultipartUpload(keyName, "random")); } @Test public void testAbortUploadFailWithInProgressPartUpload() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); String parentDir = "a/b/c/d/"; - String keyName = parentDir + UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); + keyName = parentDir + UUID.randomUUID().toString(); - OmMultipartInfo omMultipartInfo = bucket.initiateMultipartUpload(keyName, + String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); - Assert.assertNotNull(omMultipartInfo.getUploadID()); - // Do not close output stream. byte[] data = "data".getBytes(UTF_8); OzoneOutputStream ozoneOutputStream = bucket.createMultipartKey(keyName, - data.length, 1, omMultipartInfo.getUploadID()); + data.length, 1, uploadID); ozoneOutputStream.write(data, 0, data.length); // Abort before completing part upload. - bucket.abortMultipartUpload(keyName, omMultipartInfo.getUploadID()); + bucket.abortMultipartUpload(keyName, uploadID); try { ozoneOutputStream.close(); @@ -532,15 +494,8 @@ public void testAbortUploadFailWithInProgressPartUpload() throws Exception { @Test public void testAbortUploadSuccessWithOutAnyParts() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); String parentDir = "a/b/c/d/"; - String keyName = parentDir + UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); + keyName = parentDir + UUID.randomUUID().toString(); String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); @@ -549,15 +504,9 @@ public void testAbortUploadSuccessWithOutAnyParts() throws Exception { @Test public void testAbortUploadSuccessWithParts() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); String parentDir = "a/b/c/d/"; - String keyName = parentDir + UUID.randomUUID().toString(); + keyName = parentDir + UUID.randomUUID().toString(); - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); OzoneManager ozoneManager = cluster.getOzoneManager(); String buckKey = ozoneManager.getMetadataManager() .getBucketKey(volume.getName(), bucket.getName()); @@ -572,8 +521,7 @@ public void testAbortUploadSuccessWithParts() throws Exception { OMMetadataManager metadataMgr = cluster.getOzoneManager().getMetadataManager(); - String multipartKey = verifyUploadedPart(volumeName, bucketName, keyName, - uploadID, partName, metadataMgr); + String multipartKey = verifyUploadedPart(uploadID, partName, metadataMgr); bucket.abortMultipartUpload(keyName, uploadID); @@ -594,15 +542,8 @@ public void testAbortUploadSuccessWithParts() throws Exception { @Test public void testListMultipartUploadParts() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); String parentDir = "a/b/c/d/e/f/"; - String keyName = parentDir + "file-ABC"; - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); + keyName = parentDir + "file-ABC"; Map partsMap = new TreeMap<>(); String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, @@ -629,14 +570,13 @@ public void testListMultipartUploadParts() throws Exception { Assert.assertEquals(3, ozoneMultipartUploadPartListParts.getPartInfoList().size()); - verifyPartNamesInDB(volumeName, bucketName, keyName, partsMap, + verifyPartNamesInDB(partsMap, ozoneMultipartUploadPartListParts, uploadID); Assert.assertFalse(ozoneMultipartUploadPartListParts.isTruncated()); } - private void verifyPartNamesInDB(String volumeName, String bucketName, - String keyName, Map partsMap, + private void verifyPartNamesInDB(Map partsMap, OzoneMultipartUploadPartListParts ozoneMultipartUploadPartListParts, String uploadID) throws IOException { @@ -699,15 +639,6 @@ private String verifyPartNames(Map partsMap, int index, @Test public void testListMultipartUploadPartsWithContinuation() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - Map partsMap = new TreeMap<>(); String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); @@ -763,15 +694,6 @@ public void testListMultipartUploadPartsWithContinuation() @Test public void testListPartsInvalidPartMarker() throws Exception { try { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - bucket.listParts(keyName, "random", -1, 2); Assert.fail("Should throw exception as partNumber is an invalid number!"); } catch (IllegalArgumentException ex) { @@ -783,15 +705,6 @@ public void testListPartsInvalidPartMarker() throws Exception { @Test public void testListPartsInvalidMaxParts() throws Exception { try { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - bucket.listParts(keyName, "random", 1, -1); Assert.fail("Should throw exception as max parts is an invalid number!"); } catch (IllegalArgumentException ex) { @@ -803,16 +716,6 @@ public void testListPartsInvalidMaxParts() throws Exception { @Test public void testListPartsWithPartMarkerGreaterThanPartCount() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - - String uploadID = initiateMultipartUpload(bucket, keyName, RATIS, ONE); uploadPart(bucket, keyName, uploadID, 1, @@ -840,14 +743,6 @@ public void testListPartsWithPartMarkerGreaterThanPartCount() public void testListPartsWithInvalidUploadID() throws Exception { OzoneTestUtils .expectOmException(NO_SUCH_MULTIPART_UPLOAD_ERROR, () -> { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); - String keyName = UUID.randomUUID().toString(); - - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); OzoneMultipartUploadPartListParts ozoneMultipartUploadPartListParts = bucket.listParts(keyName, "random", 100, 2); }); @@ -855,8 +750,6 @@ public void testListPartsWithInvalidUploadID() throws Exception { @Test public void testListMultipartUpload() throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); String dirName = "dir1/dir2/dir3"; String key1 = "dir1" + "/key1"; String key2 = "dir1/dir2" + "/key2"; @@ -866,11 +759,6 @@ public void testListMultipartUpload() throws Exception { keys.add(key2); keys.add(key3); - store.createVolume(volumeName); - OzoneVolume volume = store.getVolume(volumeName); - volume.createBucket(bucketName); - OzoneBucket bucket = volume.getBucket(bucketName); - // Initiate multipart upload String uploadID1 = initiateMultipartUpload(bucket, key1, RATIS, ONE); @@ -931,8 +819,7 @@ public void testListMultipartUpload() throws Exception { Assert.assertEquals(0, expectedList.size()); } - private String verifyUploadedPart(String volumeName, String bucketName, - String keyName, String uploadID, String partName, + private String verifyUploadedPart(String uploadID, String partName, OMMetadataManager metadataMgr) throws IOException { OzoneManager ozoneManager = cluster.getOzoneManager(); String buckKey = ozoneManager.getMetadataManager() @@ -974,14 +861,14 @@ private String verifyUploadedPart(String volumeName, String bucketName, } private String getMultipartOpenKey(String multipartUploadID, - String volumeName, String bucketName, String keyName, + String volName, String buckName, String kName, OMMetadataManager omMetadataManager) throws IOException { - String fileName = OzoneFSUtils.getFileName(keyName); - final long volumeId = omMetadataManager.getVolumeId(volumeName); - final long bucketId = omMetadataManager.getBucketId(volumeName, - bucketName); - long parentID = getParentID(volumeName, bucketName, keyName, + String fileName = OzoneFSUtils.getFileName(kName); + final long volumeId = omMetadataManager.getVolumeId(volName); + final long bucketId = omMetadataManager.getBucketId(volName, + buckName); + long parentID = getParentID(volName, buckName, kName, omMetadataManager); String multipartKey = omMetadataManager.getMultipartKey(volumeId, bucketId, @@ -990,32 +877,36 @@ private String getMultipartOpenKey(String multipartUploadID, return multipartKey; } - private long getParentID(String volumeName, String bucketName, - String keyName, OMMetadataManager omMetadataManager) throws IOException { - Iterator pathComponents = Paths.get(keyName).iterator(); - final long volumeId = omMetadataManager.getVolumeId(volumeName); - final long bucketId = omMetadataManager.getBucketId(volumeName, - bucketName); + private long getParentID(String volName, String buckName, + String kName, OMMetadataManager omMetadataManager) throws IOException { + Iterator pathComponents = Paths.get(kName).iterator(); + final long volumeId = omMetadataManager.getVolumeId(volName); + final long bucketId = omMetadataManager.getBucketId(volName, + buckName); return OMFileRequest.getParentID(volumeId, bucketId, pathComponents, - keyName, omMetadataManager); + kName, omMetadataManager); } - private String initiateMultipartUpload(OzoneBucket bucket, String keyName, + private String initiateMultipartUpload(OzoneBucket oBucket, String kName, ReplicationType replicationType, ReplicationFactor replicationFactor) - throws Exception { - OmMultipartInfo multipartInfo = bucket.initiateMultipartUpload(keyName, + throws IOException { + OmMultipartInfo multipartInfo = oBucket.initiateMultipartUpload(kName, replicationType, replicationFactor); + Assert.assertNotNull(multipartInfo); String uploadID = multipartInfo.getUploadID(); - Assert.assertNotNull(uploadID); + Assert.assertEquals(volumeName, multipartInfo.getVolumeName()); + Assert.assertEquals(bucketName, multipartInfo.getBucketName()); + Assert.assertEquals(kName, multipartInfo.getKeyName()); + Assert.assertNotNull(multipartInfo.getUploadID()); return uploadID; } - private String uploadPart(OzoneBucket bucket, String keyName, String - uploadID, int partNumber, byte[] data) throws Exception { + private String uploadPart(OzoneBucket oBucket, String kName, String + uploadID, int partNumber, byte[] data) throws IOException { - OzoneOutputStream ozoneOutputStream = bucket.createMultipartKey(keyName, + OzoneOutputStream ozoneOutputStream = oBucket.createMultipartKey(kName, data.length, partNumber, uploadID); ozoneOutputStream.write(data, 0, data.length); @@ -1030,17 +921,17 @@ private String uploadPart(OzoneBucket bucket, String keyName, String return omMultipartCommitUploadPartInfo.getPartName(); } - private void completeMultipartUpload(OzoneBucket bucket, String keyName, + private void completeMultipartUpload(OzoneBucket oBucket, String kName, String uploadID, Map partsMap) throws Exception { - OmMultipartUploadCompleteInfo omMultipartUploadCompleteInfo = bucket - .completeMultipartUpload(keyName, uploadID, partsMap); + OmMultipartUploadCompleteInfo omMultipartUploadCompleteInfo = oBucket + .completeMultipartUpload(kName, uploadID, partsMap); Assert.assertNotNull(omMultipartUploadCompleteInfo); - Assert.assertEquals(omMultipartUploadCompleteInfo.getBucket(), bucket + Assert.assertEquals(omMultipartUploadCompleteInfo.getBucket(), oBucket .getName()); - Assert.assertEquals(omMultipartUploadCompleteInfo.getVolume(), bucket + Assert.assertEquals(omMultipartUploadCompleteInfo.getVolume(), oBucket .getVolumeName()); - Assert.assertEquals(omMultipartUploadCompleteInfo.getKey(), keyName); + Assert.assertEquals(omMultipartUploadCompleteInfo.getKey(), kName); Assert.assertNotNull(omMultipartUploadCompleteInfo.getHash()); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java index a6566141d797..361b6fdcf12d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientAbstract.java @@ -89,6 +89,7 @@ import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerLocationUtil; +import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmFailoverProxyUtil; import org.apache.hadoop.ozone.om.OzoneManager; @@ -104,6 +105,7 @@ import org.apache.hadoop.ozone.om.helpers.OmMultipartInfo; import org.apache.hadoop.ozone.om.helpers.OmMultipartUploadCompleteInfo; import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.helpers.OzoneFSUtils; import org.apache.hadoop.ozone.om.helpers.QuotaUtil; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.BucketLayout; @@ -203,6 +205,7 @@ static void startCluster(OzoneConfiguration conf) throws Exception { .setTotalPipelineNumLimit(10) .setScmId(scmId) .setClusterId(clusterId) + .setDataStreamMinPacketSize(1024) .build(); cluster.waitForClusterToBeReady(); ozClient = OzoneClientFactory.getRpcClient(conf); @@ -342,12 +345,7 @@ public void testSetAndClrQuota() throws Exception { OzoneVolume volume = null; store.createVolume(volumeName); - store.getVolume(volumeName).setQuota(OzoneQuota.parseQuota( - "10GB", "10000")); store.getVolume(volumeName).createBucket(bucketName); - volume = store.getVolume(volumeName); - Assert.assertEquals(10 * GB, volume.getQuotaInBytes()); - Assert.assertEquals(10000L, volume.getQuotaInNamespace()); OzoneBucket bucket = store.getVolume(volumeName).getBucket(bucketName); Assert.assertEquals(OzoneConsts.QUOTA_RESET, bucket.getQuotaInBytes()); Assert.assertEquals(OzoneConsts.QUOTA_RESET, bucket.getQuotaInNamespace()); @@ -366,6 +364,12 @@ public void testSetAndClrQuota() throws Exception { store.getVolume(volumeName).getBucket(bucketName2); Assert.assertEquals(1024L, ozoneBucket2.getQuotaInBytes()); + store.getVolume(volumeName).setQuota(OzoneQuota.parseQuota( + "10GB", "10000")); + volume = store.getVolume(volumeName); + Assert.assertEquals(10 * GB, volume.getQuotaInBytes()); + Assert.assertEquals(10000L, volume.getQuotaInNamespace()); + LambdaTestUtils.intercept(IOException.class, "Can not clear bucket" + " spaceQuota because volume spaceQuota is not cleared.", () -> ozoneBucket.clearSpaceQuota()); @@ -404,8 +408,6 @@ public void testSetBucketQuotaIllegal() throws Exception { String volumeName = UUID.randomUUID().toString(); String bucketName = UUID.randomUUID().toString(); store.createVolume(volumeName); - store.getVolume(volumeName).setQuota(OzoneQuota.parseQuota( - "10GB", "1000")); store.getVolume(volumeName).createBucket(bucketName); // test bucket set quota 0 @@ -951,14 +953,15 @@ public void testCheckUsedBytesQuota() throws IOException { volume = store.getVolume(volumeName); volume.createBucket(bucketName); OzoneBucket bucket = volume.getBucket(bucketName); + bucket.setQuota(OzoneQuota.parseQuota("1 B", "100")); // Test bucket quota. - store.getVolume(volumeName).setQuota( - OzoneQuota.parseQuota(Long.MAX_VALUE + " B", "100")); bucketName = UUID.randomUUID().toString(); volume.createBucket(bucketName); bucket = volume.getBucket(bucketName); bucket.setQuota(OzoneQuota.parseQuota("1 B", "100")); + store.getVolume(volumeName).setQuota( + OzoneQuota.parseQuota(Long.MAX_VALUE + " B", "100")); // Test bucket quota: write key. // The remaining quota does not satisfy a block size, so the write fails. @@ -1081,6 +1084,7 @@ private void bucketUsedBytesTestHelper(BucketLayout bucketLayout) static Stream bucketLayouts() { return Stream.of( BucketLayout.OBJECT_STORE, + BucketLayout.LEGACY, BucketLayout.FILE_SYSTEM_OPTIMIZED ); } @@ -1171,6 +1175,101 @@ private void bucketQuotaTestHelper(int keyLength, ReplicationConfig repConfig) store.getVolume(volumeName).getBucket(bucketName).getUsedBytes()); } + @ParameterizedTest + @MethodSource("bucketLayouts") + public void testBucketUsedNamespace(BucketLayout layout) throws IOException { + String volumeName = UUID.randomUUID().toString(); + String bucketName = UUID.randomUUID().toString(); + String value = "sample value"; + int valueLength = value.getBytes(UTF_8).length; + store.createVolume(volumeName); + OzoneVolume volume = store.getVolume(volumeName); + BucketArgs bucketArgs = BucketArgs.newBuilder() + .setBucketLayout(layout) + .build(); + volume.createBucket(bucketName, bucketArgs); + OzoneBucket bucket = volume.getBucket(bucketName); + String keyName1 = UUID.randomUUID().toString(); + String keyName2 = UUID.randomUUID().toString(); + + writeKey(bucket, keyName1, ONE, value, valueLength); + Assert.assertEquals(1L, getBucketUsedNamespace(volumeName, bucketName)); + // Test create a file twice will not increase usedNamespace twice + writeKey(bucket, keyName1, ONE, value, valueLength); + Assert.assertEquals(1L, getBucketUsedNamespace(volumeName, bucketName)); + writeKey(bucket, keyName2, ONE, value, valueLength); + Assert.assertEquals(2L, getBucketUsedNamespace(volumeName, bucketName)); + bucket.deleteKey(keyName1); + Assert.assertEquals(1L, getBucketUsedNamespace(volumeName, bucketName)); + bucket.deleteKey(keyName2); + Assert.assertEquals(0L, getBucketUsedNamespace(volumeName, bucketName)); + + RpcClient client = new RpcClient(cluster.getConf(), null); + String directoryName1 = UUID.randomUUID().toString(); + String directoryName2 = UUID.randomUUID().toString(); + + client.createDirectory(volumeName, bucketName, directoryName1); + Assert.assertEquals(1L, getBucketUsedNamespace(volumeName, bucketName)); + // Test create a directory twice will not increase usedNamespace twice + client.createDirectory(volumeName, bucketName, directoryName2); + Assert.assertEquals(2L, getBucketUsedNamespace(volumeName, bucketName)); + client.deleteKey(volumeName, bucketName, + OzoneFSUtils.addTrailingSlashIfNeeded(directoryName1), false); + Assert.assertEquals(1L, getBucketUsedNamespace(volumeName, bucketName)); + client.deleteKey(volumeName, bucketName, + OzoneFSUtils.addTrailingSlashIfNeeded(directoryName2), false); + Assert.assertEquals(0L, getBucketUsedNamespace(volumeName, bucketName)); + + String multiComponentsDir = "dir1/dir2/dir3/dir4"; + client.createDirectory(volumeName, bucketName, multiComponentsDir); + Assert.assertEquals(OzoneFSUtils.getFileCount(multiComponentsDir), + getBucketUsedNamespace(volumeName, bucketName)); + } + + @ParameterizedTest + @MethodSource("bucketLayouts") + public void testMissingParentBucketUsedNamespace(BucketLayout layout) + throws IOException { + // when will put a key that contain not exist directory only FSO buckets + // and LEGACY buckets with ozone.om.enable.filesystem.paths set to true + // will create missing directories. + String volumeName = UUID.randomUUID().toString(); + String bucketName = UUID.randomUUID().toString(); + String value = "sample value"; + int valueLength = value.getBytes(UTF_8).length; + store.createVolume(volumeName); + OzoneVolume volume = store.getVolume(volumeName); + BucketArgs bucketArgs = BucketArgs.newBuilder() + .setBucketLayout(layout) + .build(); + volume.createBucket(bucketName, bucketArgs); + OzoneBucket bucket = volume.getBucket(bucketName); + + if (layout.equals(BucketLayout.LEGACY)) { + OzoneConfiguration conf = cluster.getConf(); + conf.setBoolean(OMConfigKeys.OZONE_OM_ENABLE_FILESYSTEM_PATHS, true); + cluster.setConf(conf); + } + + // the directory "/dir1", ""/dir1/dir2/", "/dir1/dir2/dir3/" + // will be created automatically + String missingParentKeyName = "dir1/dir2/dir3/file1"; + writeKey(bucket, missingParentKeyName, ONE, value, valueLength); + if (layout.equals(BucketLayout.OBJECT_STORE)) { + // for OBJECT_STORE bucket, missing parent will not be + // created automatically + Assert.assertEquals(1, getBucketUsedNamespace(volumeName, bucketName)); + } else { + Assert.assertEquals(OzoneFSUtils.getFileCount(missingParentKeyName), + getBucketUsedNamespace(volumeName, bucketName)); + } + } + + private long getBucketUsedNamespace(String volume, String bucket) + throws IOException { + return store.getVolume(volume).getBucket(bucket).getUsedNamespace(); + } + @Test public void testVolumeUsedNamespace() throws IOException { String volumeName = UUID.randomUUID().toString(); @@ -1211,8 +1310,8 @@ public void testVolumeUsedNamespace() throws IOException { Assert.assertEquals(0L, volumeWithLinkedBucket.getUsedNamespace()); // Reset volume quota, the original usedNamespace needs to remain the same - store.getVolume(volumeName).setQuota(OzoneQuota.parseQuota( - 100 + " GB", "100")); + store.getVolume(volumeName).setQuota(OzoneQuota.parseNameSpaceQuota( + "100")); Assert.assertEquals(1L, store.getVolume(volumeName).getUsedNamespace()); @@ -1223,7 +1322,7 @@ public void testVolumeUsedNamespace() throws IOException { } @Test - public void testBucketUsedNamespace() throws IOException { + public void testBucketQuotaInNamespace() throws IOException { String volumeName = UUID.randomUUID().toString(); String bucketName = UUID.randomUUID().toString(); String key1 = UUID.randomUUID().toString(); @@ -1790,7 +1889,8 @@ public void testZReadKeyWithUnhealthyContainerReplica() throws Exception { KeyValueContainerData cData = (KeyValueContainerData) container.getContainerData(); try (DBHandle db = BlockUtils.getDB(cData, cluster.getConf())) { - db.getStore().getMetadataTable().put(cData.bcsIdKey(), newBCSID); + db.getStore().getMetadataTable().put(cData.getBcsIdKey(), + newBCSID); } container.updateBlockCommitSequenceId(newBCSID); index++; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientWithRatis.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientWithRatis.java index 94bfaefcc014..ca26ca177cc6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientWithRatis.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneRpcClientWithRatis.java @@ -18,32 +18,48 @@ package org.apache.hadoop.ozone.client.rpc; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; import java.util.Arrays; import java.util.HashMap; import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; +import org.apache.hadoop.hdds.client.DefaultReplicationConfig; +import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.OzoneKeyDetails; +import org.apache.hadoop.ozone.client.OzoneMultipartUploadPartListParts; import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.common.OzoneChecksumException; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmMultipartInfo; +import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; import org.junit.Assert; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.client.ReplicationFactor.THREE; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; /** @@ -155,4 +171,117 @@ public void testGetKeyAndFileWithNetworkTopology() throws IOException { } } } + + @Test + public void testMultiPartUploadWithStream() throws IOException { + String volumeName = UUID.randomUUID().toString(); + String bucketName = UUID.randomUUID().toString(); + String keyName = UUID.randomUUID().toString(); + + byte[] sampleData = new byte[1024 * 8]; + + int valueLength = sampleData.length; + + getStore().createVolume(volumeName); + OzoneVolume volume = getStore().getVolume(volumeName); + volume.createBucket(bucketName); + OzoneBucket bucket = volume.getBucket(bucketName); + + ReplicationConfig replicationConfig = + ReplicationConfig.fromTypeAndFactor( + ReplicationType.RATIS, + THREE); + + OmMultipartInfo multipartInfo = bucket.initiateMultipartUpload(keyName, + replicationConfig); + + assertNotNull(multipartInfo); + String uploadID = multipartInfo.getUploadID(); + Assert.assertEquals(volumeName, multipartInfo.getVolumeName()); + Assert.assertEquals(bucketName, multipartInfo.getBucketName()); + Assert.assertEquals(keyName, multipartInfo.getKeyName()); + assertNotNull(multipartInfo.getUploadID()); + + OzoneDataStreamOutput ozoneStreamOutput = bucket.createMultipartStreamKey( + keyName, valueLength, 1, uploadID); + ozoneStreamOutput.write(ByteBuffer.wrap(sampleData), 0, + valueLength); + ozoneStreamOutput.close(); + + OzoneMultipartUploadPartListParts parts = + bucket.listParts(keyName, uploadID, 0, 1); + + Assert.assertEquals(parts.getPartInfoList().size(), 1); + + OzoneMultipartUploadPartListParts.PartInfo partInfo = + parts.getPartInfoList().get(0); + Assert.assertEquals(valueLength, partInfo.getSize()); + + } + + @Test + public void testUploadWithStreamAndMemoryMappedBuffer() throws IOException { + // create a local dir + final String dir = GenericTestUtils.getTempPath( + getClass().getSimpleName()); + GenericTestUtils.assertDirCreation(new File(dir)); + + // create a local file + final int chunkSize = 1024; + final byte[] data = new byte[8 * chunkSize]; + ThreadLocalRandom.current().nextBytes(data); + final File file = new File(dir, "data"); + try (FileOutputStream out = new FileOutputStream(file)) { + out.write(data); + } + + // create a volume + final String volumeName = "vol-" + UUID.randomUUID(); + getStore().createVolume(volumeName); + final OzoneVolume volume = getStore().getVolume(volumeName); + + // create a bucket + final String bucketName = "buck-" + UUID.randomUUID(); + final BucketArgs bucketArgs = BucketArgs.newBuilder() + .setDefaultReplicationConfig( + new DefaultReplicationConfig(ReplicationType.RATIS, THREE)) + .build(); + volume.createBucket(bucketName, bucketArgs); + final OzoneBucket bucket = volume.getBucket(bucketName); + + // upload a key from the local file using memory-mapped buffers + final String keyName = "key-" + UUID.randomUUID(); + try (RandomAccessFile raf = new RandomAccessFile(file, "r"); + OzoneDataStreamOutput out = bucket.createStreamKey( + keyName, data.length)) { + final FileChannel channel = raf.getChannel(); + long off = 0; + for (long len = raf.length(); len > 0;) { + final long writeLen = Math.min(len, chunkSize); + final ByteBuffer mapped = channel.map(FileChannel.MapMode.READ_ONLY, + off, writeLen); + out.write(mapped); + off += writeLen; + len -= writeLen; + } + } + + // verify the key details + final OzoneKeyDetails keyDetails = bucket.getKey(keyName); + Assertions.assertEquals(keyName, keyDetails.getName()); + Assertions.assertEquals(data.length, keyDetails.getDataSize()); + + // verify the key content + final byte[] buffer = new byte[data.length]; + try (OzoneInputStream in = keyDetails.getContent()) { + for (int off = 0; off < data.length;) { + final int n = in.read(buffer, off, data.length - off); + if (n < 0) { + break; + } + off += n; + } + } + Assertions.assertArrayEquals(data, buffer); + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestValidateBCSIDOnRestart.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestValidateBCSIDOnRestart.java index 7f00825e34e6..ac964834fc13 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestValidateBCSIDOnRestart.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestValidateBCSIDOnRestart.java @@ -227,7 +227,7 @@ public void testValidateBCSIDOnDnRestart() throws Exception { // modify the bcsid for the container in the ROCKS DB thereby inducing // corruption db.getStore().getMetadataTable() - .put(keyValueContainerData.bcsIdKey(), 0L); + .put(keyValueContainerData.getBcsIdKey(), 0L); } // after the restart, there will be a mismatch in BCSID of what is recorded // in the and what is there in RockSDB and hence the container would be diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestChunkInputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestChunkInputStream.java index c9a9c6f2b5db..13c8a5911b83 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestChunkInputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestChunkInputStream.java @@ -60,7 +60,7 @@ public void testChunkReadBuffers() throws Exception { KeyInputStream keyInputStream = getKeyInputStream(keyName); BlockInputStream block0Stream = - (BlockInputStream)keyInputStream.getBlockStreams().get(0); + (BlockInputStream)keyInputStream.getPartStreams().get(0); block0Stream.initialize(); ChunkInputStream chunk0Stream = block0Stream.getChunkStreams().get(0); @@ -119,7 +119,7 @@ private void testCloseReleasesBuffers() throws Exception { try (KeyInputStream keyInputStream = getKeyInputStream(keyName)) { BlockInputStream block0Stream = - (BlockInputStream) keyInputStream.getBlockStreams().get(0); + (BlockInputStream) keyInputStream.getPartStreams().get(0); block0Stream.initialize(); ChunkInputStream chunk0Stream = block0Stream.getChunkStreams().get(0); @@ -144,7 +144,7 @@ public void testBufferRelease() throws Exception { try (KeyInputStream keyInputStream = getKeyInputStream(keyName)) { BlockInputStream block0Stream = - (BlockInputStream)keyInputStream.getBlockStreams().get(0); + (BlockInputStream)keyInputStream.getPartStreams().get(0); block0Stream.initialize(); ChunkInputStream chunk0Stream = block0Stream.getChunkStreams().get(0); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestKeyInputStream.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestKeyInputStream.java index f0c684486b17..3899cd6cad76 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestKeyInputStream.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/read/TestKeyInputStream.java @@ -161,7 +161,7 @@ public void testInputStreams() throws Exception { int expectedNumBlockStreams = BufferUtils.getNumberOfBins( dataLength, BLOCK_SIZE); List blockStreams = - keyInputStream.getBlockStreams(); + keyInputStream.getPartStreams(); Assert.assertEquals(expectedNumBlockStreams, blockStreams.size()); int readBlockLength = 0; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestECContainerRecovery.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestECContainerRecovery.java index 411ad538b1c7..1f08d0e101fe 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestECContainerRecovery.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestECContainerRecovery.java @@ -49,6 +49,7 @@ import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCoordinator; import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionSupervisor; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.tag.Flaky; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -254,6 +255,7 @@ public void testContainerRecoveryOverReplicationProcessing() waitForContainerCount(5, container.containerID(), scm); } + @Flaky("HDDS-7617") @Test public void testECContainerRecoveryWithTimedOutRecovery() throws Exception { byte[] inputData = getInputBytes(3); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java index dae6e383f844..14cd1b66f428 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java @@ -46,8 +46,11 @@ import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.io.BlockDataStreamOutputEntry; import org.apache.hadoop.ozone.client.io.BlockOutputStreamEntry; +import org.apache.hadoop.ozone.client.io.KeyDataStreamOutput; import org.apache.hadoop.ozone.client.io.KeyOutputStream; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.common.impl.ContainerData; @@ -134,8 +137,23 @@ public static OzoneOutputStream createKey(String keyName, } org.apache.hadoop.hdds.client.ReplicationFactor factor = org.apache.hadoop.hdds.client.ReplicationFactor.THREE; + ReplicationConfig config = + ReplicationConfig.fromTypeAndFactor(type, factor); return objectStore.getVolume(volumeName).getBucket(bucketName) - .createKey(keyName, size, type, factor, new HashMap<>()); + .createKey(keyName, size, config, new HashMap<>()); + } + + public static OzoneDataStreamOutput createStreamKey(String keyName, + ReplicationType type, long size, ObjectStore objectStore, + String volumeName, String bucketName) throws Exception { + org.apache.hadoop.hdds.client.ReplicationFactor factor = + type == ReplicationType.STAND_ALONE ? + org.apache.hadoop.hdds.client.ReplicationFactor.ONE : + org.apache.hadoop.hdds.client.ReplicationFactor.THREE; + ReplicationConfig config = + ReplicationConfig.fromTypeAndFactor(type, factor); + return objectStore.getVolume(volumeName).getBucket(bucketName) + .createStreamKey(keyName, size, config, new HashMap<>()); } public static OzoneOutputStream createKey(String keyName, @@ -143,8 +161,10 @@ public static OzoneOutputStream createKey(String keyName, org.apache.hadoop.hdds.client.ReplicationFactor factor, long size, ObjectStore objectStore, String volumeName, String bucketName) throws Exception { + ReplicationConfig config = + ReplicationConfig.fromTypeAndFactor(type, factor); return objectStore.getVolume(volumeName).getBucket(bucketName) - .createKey(keyName, size, type, factor, new HashMap<>()); + .createKey(keyName, size, config, new HashMap<>()); } public static OzoneOutputStream createKey(String keyName, @@ -187,6 +207,24 @@ public static void waitForContainerClose(OzoneOutputStream outputStream, waitForContainerClose(cluster, containerIdList.toArray(new Long[0])); } + + public static void waitForContainerClose(OzoneDataStreamOutput outputStream, + MiniOzoneCluster cluster) throws Exception { + KeyDataStreamOutput keyOutputStream = + (KeyDataStreamOutput) outputStream.getByteBufStreamOutput(); + List streamEntryList = + keyOutputStream.getStreamEntries(); + List containerIdList = new ArrayList<>(); + for (BlockDataStreamOutputEntry entry : streamEntryList) { + long id = entry.getBlockID().getContainerID(); + if (!containerIdList.contains(id)) { + containerIdList.add(id); + } + } + Assert.assertTrue(!containerIdList.isEmpty()); + waitForContainerClose(cluster, containerIdList.toArray(new Long[0])); + } + public static void waitForPipelineClose(OzoneOutputStream outputStream, MiniOzoneCluster cluster, boolean waitForContainerCreation) throws Exception { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java index b76092122cab..e8ac072c5d0e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java @@ -441,7 +441,7 @@ private void verifyBlocksCreated( .getContainer(blockID.getContainerID()).getContainerData(); try (DBHandle db = BlockUtils.getDB(cData, conf)) { Assertions.assertNotNull(db.getStore().getBlockDataTable() - .get(cData.blockKey(blockID.getLocalID()))); + .get(cData.getBlockKey(blockID.getLocalID()))); } }, omKeyLocationInfoGroups); } @@ -459,12 +459,13 @@ private void verifyBlocksDeleted( Table blockDataTable = db.getStore().getBlockDataTable(); - String blockKey = cData.blockKey(blockID.getLocalID()); + String blockKey = cData.getBlockKey(blockID.getLocalID()); BlockData blockData = blockDataTable.get(blockKey); Assertions.assertNull(blockData); - String deletingKey = cData.deletingBlockKey(blockID.getLocalID()); + String deletingKey = cData.getDeletingBlockKey( + blockID.getLocalID()); Assertions.assertNull(blockDataTable.get(deletingKey)); } containerIdsWithDeletedBlocks.add(blockID.getContainerID()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java index f410f4d7fe43..19fe829efe1a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.pipeline.MockPipeline; import org.apache.hadoop.hdds.security.token.ContainerTokenIdentifier; @@ -37,6 +39,7 @@ import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.replication.SimpleContainerDownloader; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.ozone.test.GenericTestUtils; @@ -53,17 +56,27 @@ import org.slf4j.LoggerFactory; import java.io.File; +import java.nio.file.Path; import java.security.cert.CertificateExpiredException; +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Date; +import java.util.List; import java.util.UUID; import java.util.concurrent.TimeUnit; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_DIR_NAME; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_DIR_NAME_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_LEN; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECURITY_SSL_KEYSTORE_RELOAD_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECURITY_SSL_TRUSTSTORE_RELOAD_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DEFAULT_DURATION; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_RENEW_GRACE_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; @@ -88,6 +101,7 @@ public class TestOzoneContainerWithTLS { private ContainerTokenSecretManager secretManager; private CertificateClientTestImpl caClient; private boolean containerTokenEnabled; + private int certLifetime = 15 * 1000; // 15s public TestOzoneContainerWithTLS(boolean enableToken) { this.containerTokenEnabled = enableToken; @@ -120,24 +134,30 @@ public void setup() throws Exception { conf.setBoolean(HddsConfigKeys.HDDS_GRPC_TLS_TEST_CERT, true); conf.setInt(HDDS_KEY_LEN, 1024); - conf.set(HDDS_X509_DEFAULT_DURATION, "PT5S"); // 5s + + // certificate lives for 10s + conf.set(HDDS_X509_DEFAULT_DURATION, + Duration.ofMillis(certLifetime).toString()); + conf.set(HDDS_X509_RENEW_GRACE_DURATION, "PT2S"); + conf.set(HDDS_SECURITY_SSL_KEYSTORE_RELOAD_INTERVAL, "1s"); + conf.set(HDDS_SECURITY_SSL_TRUSTSTORE_RELOAD_INTERVAL, "1s"); long expiryTime = conf.getTimeDuration( - HddsConfigKeys.HDDS_BLOCK_TOKEN_EXPIRY_TIME, - HddsConfigKeys.HDDS_BLOCK_TOKEN_EXPIRY_TIME_DEFAULT, + HddsConfigKeys.HDDS_BLOCK_TOKEN_EXPIRY_TIME, "1s", TimeUnit.MILLISECONDS); caClient = new CertificateClientTestImpl(conf, false); secretManager = new ContainerTokenSecretManager(new SecurityConfig(conf), - expiryTime, caClient.getCertificate(). - getSerialNumber().toString()); + expiryTime, caClient.getCertificate().getSerialNumber().toString()); } @Test(expected = CertificateExpiredException.class) public void testCertificateLifetime() throws Exception { // Sleep to wait for certificate expire - Thread.sleep(5000); - caClient.getCertificate().checkValidity(); + LocalDateTime now = LocalDateTime.now(); + now = now.plusSeconds(certLifetime / 1000); + caClient.getCertificate().checkValidity(Date.from( + now.atZone(ZoneId.systemDefault()).toInstant())); } @Test @@ -170,13 +190,134 @@ public void testCreateOzoneContainer() throws Exception { if (containerTokenEnabled) { secretManager.start(caClient); client.connect(); - createSecureContainerForTesting(client, containerId, + createSecureContainer(client, containerId, secretManager.generateToken( UserGroupInformation.getCurrentUser().getUserName(), ContainerID.valueOf(containerId))); } else { client.connect(); - createContainerForTesting(client, containerId); + createContainer(client, containerId); + } + } finally { + if (container != null) { + container.stop(); + } + } + } + + @Test + public void testContainerDownload() throws Exception { + DatanodeDetails dn = MockDatanodeDetails.createDatanodeDetails( + UUID.randomUUID().toString(), "localhost", "0.0.0.0", + "/default-rack"); + Pipeline pipeline = MockPipeline.createSingleNodePipeline(); + conf.set(HDDS_DATANODE_DIR_KEY, tempFolder.newFolder().getPath()); + conf.setInt(OzoneConfigKeys.DFS_CONTAINER_IPC_PORT, + pipeline.getFirstNode().getPort(DatanodeDetails.Port.Name.STANDALONE) + .getValue()); + conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_IPC_RANDOM_PORT, false); + + OzoneContainer container = null; + try { + container = new OzoneContainer(dn, conf, getContext(dn), caClient); + + // Set scmId and manually start ozone container. + container.start(UUID.randomUUID().toString()); + + if (containerTokenEnabled) { + secretManager.start(caClient); + } + + // Create containers + long containerId = ContainerTestHelper.getTestContainerID(); + List containerIdList = new ArrayList<>(); + XceiverClientGrpc client = new XceiverClientGrpc(pipeline, conf, + Collections.singletonList(caClient.getCACertificate())); + client.connect(); + if (containerTokenEnabled) { + Token token = secretManager.generateToken( + UserGroupInformation.getCurrentUser().getUserName(), + ContainerID.valueOf(containerId)); + createSecureContainer(client, containerId, token); + closeSecureContainer(client, containerId, token); + } else { + createContainer(client, containerId); + closeContainer(client, containerId); + } + containerIdList.add(containerId++); + + // Wait certificate to expire + GenericTestUtils.waitFor(() -> + caClient.getCertificate().getNotAfter().before(new Date()), + 100, certLifetime); + + List sourceDatanodes = new ArrayList<>(); + sourceDatanodes.add(dn); + if (containerTokenEnabled) { + // old client still function well after certificate expired + Token token = secretManager.generateToken( + UserGroupInformation.getCurrentUser().getUserName(), + ContainerID.valueOf(containerId)); + createSecureContainer(client, containerId, token); + closeSecureContainer(client, containerId++, token); + } else { + createContainer(client, containerId); + closeContainer(client, containerId++); + } + + // Download newly created container will fail because of cert expired + GenericTestUtils.LogCapturer logCapture = GenericTestUtils.LogCapturer + .captureLogs(SimpleContainerDownloader.LOG); + SimpleContainerDownloader downloader = + new SimpleContainerDownloader(conf, caClient); + Path file = downloader.getContainerDataFromReplicas( + containerId, sourceDatanodes, null); + downloader.close(); + Assert.assertNull(file); + Assert.assertTrue(logCapture.getOutput().contains( + "java.security.cert.CertificateExpiredException")); + + // Renew the certificate + caClient.renewKey(); + + // old client still function well after certificate renewed + if (containerTokenEnabled) { + Token token = secretManager.generateToken( + UserGroupInformation.getCurrentUser().getUserName(), + ContainerID.valueOf(containerId)); + createSecureContainer(client, containerId, token); + closeSecureContainer(client, containerId++, token); + } + + // Wait keyManager and trustManager to reload + Thread.sleep(2000); + + // old client still function well after certificate reload + if (containerTokenEnabled) { + Token token = secretManager.generateToken( + UserGroupInformation.getCurrentUser().getUserName(), + ContainerID.valueOf(containerId)); + createSecureContainer(client, containerId, token); + closeSecureContainer(client, containerId++, token); + } else { + createContainer(client, containerId); + closeContainer(client, containerId++); + } + + // Download container should succeed after key and cert renewed + for (Long cId : containerIdList) { + downloader = new SimpleContainerDownloader(conf, caClient); + try { + file = downloader.getContainerDataFromReplicas(cId, sourceDatanodes, + null); + downloader.close(); + Assert.assertNotNull(file); + } finally { + if (downloader != null) { + downloader.close(); + } + client.close(); + } } } finally { if (container != null) { @@ -185,27 +326,47 @@ public void testCreateOzoneContainer() throws Exception { } } - public static void createContainerForTesting(XceiverClientSpi client, + public static void createContainer(XceiverClientSpi client, long containerID) throws Exception { - ContainerProtos.ContainerCommandRequestProto request = - ContainerTestHelper.getCreateContainerRequest( - containerID, client.getPipeline()); - ContainerProtos.ContainerCommandResponseProto response = - client.sendCommand(request); + ContainerCommandRequestProto request = ContainerTestHelper + .getCreateContainerRequest(containerID, client.getPipeline()); + ContainerCommandResponseProto response = client.sendCommand(request); Assert.assertNotNull(response); + Assert.assertTrue(response.getResult() == ContainerProtos.Result.SUCCESS); } - public static void createSecureContainerForTesting(XceiverClientSpi client, + public static void createSecureContainer(XceiverClientSpi client, long containerID, Token token) throws Exception { - ContainerProtos.ContainerCommandRequestProto request = + ContainerCommandRequestProto request = ContainerTestHelper.getCreateContainerSecureRequest( containerID, client.getPipeline(), token); - ContainerProtos.ContainerCommandResponseProto response = + ContainerCommandResponseProto response = client.sendCommand(request); Assert.assertNotNull(response); + Assert.assertTrue(response.getResult() == ContainerProtos.Result.SUCCESS); } + public static void closeContainer(XceiverClientSpi client, + long containerID) throws Exception { + ContainerCommandRequestProto request = ContainerTestHelper + .getCloseContainer(client.getPipeline(), containerID); + ContainerCommandResponseProto response = client.sendCommand(request); + Assert.assertNotNull(response); + Assert.assertTrue(response.getResult() == ContainerProtos.Result.SUCCESS); + } + + public static void closeSecureContainer(XceiverClientSpi client, + long containerID, Token token) + throws Exception { + ContainerCommandRequestProto request = + ContainerTestHelper.getCloseContainer(client.getPipeline(), + containerID, token); + ContainerCommandResponseProto response = + client.sendCommand(request); + Assert.assertNotNull(response); + Assert.assertTrue(response.getResult() == ContainerProtos.Result.SUCCESS); + } private StateContext getContext(DatanodeDetails datanodeDetails) { DatanodeStateMachine stateMachine = Mockito.mock( diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestContainerServer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestContainerServer.java index 1cafedc32b75..18ca87994052 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestContainerServer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestContainerServer.java @@ -87,7 +87,8 @@ public class TestContainerServer { public static void setup() { DefaultMetricsSystem.setMiniClusterMode(true); CONF.set(HddsConfigKeys.HDDS_METADATA_DIR_NAME, TEST_DIR); - caClient = new DNCertificateClient(new SecurityConfig(CONF)); + caClient = new DNCertificateClient(new SecurityConfig(CONF), + null, null, null, null); } @Test diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestSecureContainerServer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestSecureContainerServer.java index 73453acebc72..62e7ecbda682 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestSecureContainerServer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/server/TestSecureContainerServer.java @@ -216,6 +216,10 @@ static XceiverServerRatis newXceiverServerRatis( DatanodeDetails dn, OzoneConfiguration conf) throws IOException { conf.setInt(OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_PORT, dn.getPort(DatanodeDetails.Port.Name.RATIS).getValue()); + conf.setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_ENABLED, + true); + conf.setBoolean( + OzoneConfigKeys.DFS_CONTAINER_RATIS_DATASTREAM_RANDOM_PORT, true); final String dir = TEST_DIR + dn.getUuid(); conf.set(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR, dir); final ContainerDispatcher dispatcher = createDispatcher(dn, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java index 492173b71cf9..d23fc7463332 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java @@ -800,19 +800,20 @@ public void testLookupKeyWithLocation() throws IOException { List locationList = keySession.getKeyInfo().getLatestVersionLocations().getLocationList(); Assert.assertEquals(1, locationList.size()); + long containerID = locationList.get(0).getContainerID(); locationInfoList.add( new OmKeyLocationInfo.Builder().setPipeline(pipeline) - .setBlockID(new BlockID(locationList.get(0).getContainerID(), + .setBlockID(new BlockID(containerID, locationList.get(0).getLocalID())).build()); keyArgs.setLocationInfoList(locationInfoList); writeClient.commitKey(keyArgs, keySession.getId()); - ContainerInfo containerInfo = new ContainerInfo.Builder().setContainerID(1L) - .setPipelineID(pipeline.getId()).build(); + ContainerInfo containerInfo = new ContainerInfo.Builder() + .setContainerID(containerID).setPipelineID(pipeline.getId()).build(); List containerWithPipelines = Arrays.asList( new ContainerWithPipeline(containerInfo, pipeline)); when(mockScmContainerClient.getContainerWithPipelineBatch( - Arrays.asList(1L))).thenReturn(containerWithPipelines); + Arrays.asList(containerID))).thenReturn(containerWithPipelines); OmKeyInfo key = keyManager.lookupKey(keyArgs, null); Assert.assertEquals(key.getKeyName(), keyName); @@ -1273,6 +1274,77 @@ public void testListStatus() throws IOException { } } + @Test + public void testGetFileStatus() throws IOException { + // create a key + String keyName = RandomStringUtils.randomAlphabetic(5); + OmKeyArgs keyArgs = createBuilder() + .setKeyName(keyName) + .setLatestVersionLocation(true) + .build(); + writeClient.createFile(keyArgs, false, false); + OpenKeySession keySession = writeClient.createFile(keyArgs, true, true); + keyArgs.setLocationInfoList( + keySession.getKeyInfo().getLatestVersionLocations().getLocationList()); + writeClient.commitKey(keyArgs, keySession.getId()); + OzoneFileStatus ozoneFileStatus = keyManager.getFileStatus(keyArgs); + Assert.assertEquals(keyName, ozoneFileStatus.getKeyInfo().getFileName()); + } + + @Test + public void testGetFileStatusWithFakeDir() throws IOException { + String parentDir = "dir1"; + String fileName = "file1"; + String keyName1 = parentDir + OZONE_URI_DELIMITER + fileName; + // "dir1.file1" used to confirm that it will not affect + // the creation of fake directory "dir1" + String keyName2 = parentDir + "." + fileName; + OzoneFileStatus ozoneFileStatus; + + // create a key "dir1/key1" + OmKeyArgs keyArgs = createBuilder().setKeyName(keyName1).build(); + OpenKeySession keySession = writeClient.openKey(keyArgs); + keyArgs.setLocationInfoList( + keySession.getKeyInfo().getLatestVersionLocations().getLocationList()); + writeClient.commitKey(keyArgs, keySession.getId()); + + // create a key "dir1.key" + keyArgs = createBuilder().setKeyName(keyName2).build(); + keySession = writeClient.createFile(keyArgs, true, true); + keyArgs.setLocationInfoList( + keySession.getKeyInfo().getLatestVersionLocations().getLocationList()); + writeClient.commitKey(keyArgs, keySession.getId()); + + // verify key "dir1/key1" and "dir1.key1" can be found in the bucket, and + // "dir1" can not be found in the bucket + Assert.assertNull(metadataManager.getKeyTable(getDefaultBucketLayout()) + .get(metadataManager.getOzoneKey(VOLUME_NAME, BUCKET_NAME, parentDir))); + Assert.assertNotNull(metadataManager.getKeyTable(getDefaultBucketLayout()) + .get(metadataManager.getOzoneKey(VOLUME_NAME, BUCKET_NAME, keyName1))); + Assert.assertNotNull(metadataManager.getKeyTable(getDefaultBucketLayout()) + .get(metadataManager.getOzoneKey(VOLUME_NAME, BUCKET_NAME, keyName2))); + + // get a non-existing "dir1", since the key is prefixed "dir1/key1", + // a fake "/dir1" will be returned + keyArgs = createBuilder().setKeyName(parentDir).build(); + ozoneFileStatus = keyManager.getFileStatus(keyArgs); + Assert.assertEquals(parentDir, ozoneFileStatus.getKeyInfo().getFileName()); + Assert.assertTrue(ozoneFileStatus.isDirectory()); + + // get a non-existing "dir", since the key is not prefixed "dir1/key1", + // a `OMException` will be thrown + keyArgs = createBuilder().setKeyName("dir").build(); + OmKeyArgs finalKeyArgs = keyArgs; + Assert.assertThrows(OMException.class, () -> keyManager.getFileStatus( + finalKeyArgs)); + + // get a file "dir1/key1" + keyArgs = createBuilder().setKeyName(keyName1).build(); + ozoneFileStatus = keyManager.getFileStatus(keyArgs); + Assert.assertEquals(fileName, ozoneFileStatus.getKeyInfo().getFileName()); + Assert.assertTrue(ozoneFileStatus.isFile()); + } + @Test public void testRefreshPipeline() throws Exception { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java index 4259a5e9e9b7..d4e0ea68ce72 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java @@ -45,9 +45,11 @@ import org.junit.rules.TemporaryFolder; import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; +import java.io.PrintStream; import java.time.LocalDateTime; import java.util.List; import java.util.ArrayList; @@ -65,6 +67,7 @@ public class TestLDBCli { private DBScanner dbScanner; private DBStore dbStore = null; private List keyNames; + private static final String DEFAULT_ENCODING = UTF_8.name(); @Rule public TemporaryFolder folder = new TemporaryFolder(); @@ -82,6 +85,7 @@ public void shutdown() throws Exception { if (dbStore != null) { dbStore.close(); } + System.setOut(System.out); // Restore the static fields in DBScanner DBScanner.setContainerId(-1); DBScanner.setDnDBSchemaVersion("V2"); @@ -118,6 +122,17 @@ public void testOMDB() throws Exception { Assert.assertTrue(getKeyNames(dbScanner).contains("key5")); Assert.assertFalse(getKeyNames(dbScanner).contains("key6")); + final ByteArrayOutputStream outputStreamCaptor = + new ByteArrayOutputStream(); + System.setOut(new PrintStream(outputStreamCaptor, false, DEFAULT_ENCODING)); + DBScanner.setShowCount(true); + dbScanner.call(); + Assert.assertEquals("5", + outputStreamCaptor.toString(DEFAULT_ENCODING).trim()); + System.setOut(System.out); + DBScanner.setShowCount(false); + + DBScanner.setLimit(1); Assert.assertEquals(1, getKeyNames(dbScanner).size()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestObjectStoreWithLegacyFS.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestObjectStoreWithLegacyFS.java index fb10a346e93f..05570549a61b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestObjectStoreWithLegacyFS.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestObjectStoreWithLegacyFS.java @@ -127,46 +127,36 @@ public void testFlatKeyStructureWithOBS() throws Exception { cluster.getOzoneManager().getMetadataManager() .getKeyTable(BucketLayout.OBJECT_STORE); - String seekKey = "dir"; - String dbKey = cluster.getOzoneManager().getMetadataManager() - .getOzoneKey(volumeName, bucketName, seekKey); - - GenericTestUtils - .waitFor(() -> assertKeyCount(keyTable, dbKey, 1, keyName), 500, - 60000); + GenericTestUtils.waitFor(() -> isKeyExist(keyTable, keyName), + 500, 60000); ozoneBucket.renameKey(keyName, "dir1/NewKey-1"); - GenericTestUtils - .waitFor(() -> assertKeyCount(keyTable, dbKey, 1, "dir1/NewKey-1"), 500, - 60000); + // RenameKey changes keyTable cache, so we need to + // wait for the transaction to be flushed to db + GenericTestUtils.waitFor(() -> !isKeyExist(keyTable, keyName), + 500, 60000); + // When the old key is removed, new key should exist + Assert.assertTrue(isKeyExist(keyTable, "dir1/NewKey-1")); } - private boolean assertKeyCount( - Table keyTable, - String dbKey, int expectedCnt, String keyName) { - int countKeys = 0; - try { - TableIterator> - itr = keyTable.iterator(); - itr.seek(dbKey); - while (itr.hasNext()) { - - Table.KeyValue keyValue = itr.next(); - if (!keyValue.getKey().startsWith(dbKey)) { - break; + private boolean isKeyExist(Table keyTable, + String keyName) { + String dbKey = cluster.getOzoneManager().getMetadataManager() + .getOzoneKey(volumeName, bucketName, keyName); + try (TableIterator> + iterator = keyTable.iterator()) { + iterator.seek(dbKey); + if (iterator.hasNext()) { + Table.KeyValue kv = iterator.next(); + if (kv.getKey().equals(dbKey)) { + return true; } - countKeys++; - Assert.assertTrue(keyValue.getKey().endsWith(keyName)); } } catch (IOException ex) { - LOG.info("Test failed with: " + ex.getMessage(), ex); - Assert.fail("Test failed with: " + ex.getMessage()); - } - if (countKeys != expectedCnt) { - LOG.info("Couldn't find KeyName:{} in KeyTable, retrying...", keyName); + LOG.error("Error while iterating key table", ex); } - return countKeys == expectedCnt; + return false; } @Test diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java index fd43a0fa26d5..b920f5734420 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java @@ -16,39 +16,40 @@ */ package org.apache.hadoop.ozone.om; -import java.util.ArrayList; -import java.util.UUID; - import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.MiniOzoneCluster; -import org.apache.hadoop.ozone.OzoneTestUtils; import org.apache.hadoop.ozone.TestDataUtil; -import org.apache.hadoop.ozone.client.BucketArgs; +import org.apache.hadoop.ozone.audit.AuditEventStatus; +import org.apache.hadoop.ozone.audit.OMAction; +import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneBucket; -import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.client.VolumeArgs; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes; import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer; import org.apache.hadoop.ozone.security.acl.IOzoneObj; import org.apache.hadoop.ozone.security.acl.OzoneObjInfo; import org.apache.hadoop.ozone.security.acl.RequestContext; +import org.apache.hadoop.ozone.audit.AuditLogTestUtils; import org.apache.ozone.test.GenericTestUtils; - -import org.apache.commons.lang3.RandomStringUtils; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_AUTHORIZER_CLASS; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD; import org.junit.AfterClass; -import static org.junit.Assert.assertTrue; - import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; -import org.junit.rules.ExpectedException; import org.junit.rules.Timeout; +import java.util.ArrayList; +import java.util.UUID; + +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_AUTHORIZER_CLASS; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD; +import static org.apache.hadoop.ozone.audit.AuditLogTestUtils.verifyAuditLog; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; + /** * Test for Ozone Manager ACLs. */ @@ -65,15 +66,11 @@ public class TestOmAcls { private static boolean keyAclAllow = true; private static boolean prefixAclAllow = true; private static MiniOzoneCluster cluster = null; - private static OMMetrics omMetrics; - private static OzoneConfiguration conf; - private static String clusterId; - private static String scmId; - private static String omId; private static GenericTestUtils.LogCapturer logCapturer; - @Rule - public ExpectedException exception = ExpectedException.none(); + static { + AuditLogTestUtils.enableAuditLog(); + } /** * Create a MiniDFSCluster for testing. @@ -82,10 +79,10 @@ public class TestOmAcls { */ @BeforeClass public static void init() throws Exception { - conf = new OzoneConfiguration(); - clusterId = UUID.randomUUID().toString(); - scmId = UUID.randomUUID().toString(); - omId = UUID.randomUUID().toString(); + OzoneConfiguration conf = new OzoneConfiguration(); + String clusterId = UUID.randomUUID().toString(); + String scmId = UUID.randomUUID().toString(); + String omId = UUID.randomUUID().toString(); conf.setBoolean(OZONE_ACL_ENABLED, true); conf.setClass(OZONE_ACL_AUTHORIZER_CLASS, OzoneAccessAuthorizerTest.class, IAccessAuthorizer.class); @@ -96,14 +93,10 @@ public static void init() throws Exception { .setOmId(omId) .build(); cluster.waitForClusterToBeReady(); - omMetrics = cluster.getOzoneManager().getMetrics(); logCapturer = GenericTestUtils.LogCapturer.captureLogs(OzoneManager.getLogger()); } - /** - * Shutdown MiniDFSCluster. - */ @AfterClass public static void shutdown() { if (cluster != null) { @@ -111,11 +104,10 @@ public static void shutdown() { } } - /** - * Reset ACL. - */ @Before - public void resetAcl() { + public void setup() { + logCapturer.clearOutput(); + TestOmAcls.volumeAclAllow = true; TestOmAcls.bucketAclAllow = true; TestOmAcls.keyAclAllow = true; @@ -123,72 +115,101 @@ public void resetAcl() { } @Test - public void testBucketCreationPermissionDenied() throws Exception { + public void testCreateVolumePermissionDenied() { + TestOmAcls.volumeAclAllow = false; - String volumeName = RandomStringUtils.randomAlphabetic(5).toLowerCase(); - String bucketName = RandomStringUtils.randomAlphabetic(5).toLowerCase(); + OMException exception = assertThrows(OMException.class, + () -> TestDataUtil.createVolumeAndBucket(cluster)); + assertEquals(ResultCodes.PERMISSION_DENIED, exception.getResult()); - VolumeArgs createVolumeArgs = VolumeArgs.newBuilder() - .setOwner("user" + RandomStringUtils.randomNumeric(5)) - .setAdmin("admin" + RandomStringUtils.randomNumeric(5)) - .build(); + assertTrue(logCapturer.getOutput() + .contains("doesn't have CREATE permission to access volume")); + verifyAuditLog(OMAction.CREATE_VOLUME, AuditEventStatus.FAILURE); + } - cluster.getClient().getObjectStore().createVolume(volumeName, - createVolumeArgs); - OzoneVolume volume = - cluster.getClient().getObjectStore().getVolume(volumeName); + @Test + public void testReadVolumePermissionDenied() throws Exception { + OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(cluster); + TestOmAcls.volumeAclAllow = false; + ObjectStore objectStore = cluster.getClient().getObjectStore(); + OMException exception = assertThrows(OMException.class, () -> + objectStore.getVolume(bucket.getVolumeName())); + assertEquals(ResultCodes.PERMISSION_DENIED, exception.getResult()); + + assertTrue(logCapturer.getOutput() + .contains("doesn't have READ permission to access volume")); + verifyAuditLog(OMAction.READ_VOLUME, AuditEventStatus.FAILURE); + } + @Test + public void testCreateBucketPermissionDenied() { TestOmAcls.bucketAclAllow = false; - OzoneTestUtils.expectOmException(ResultCodes.PERMISSION_DENIED, - () -> volume.createBucket(bucketName)); + + OMException exception = assertThrows(OMException.class, + () -> TestDataUtil.createVolumeAndBucket(cluster)); + assertEquals(ResultCodes.PERMISSION_DENIED, exception.getResult()); assertTrue(logCapturer.getOutput() - .contains("doesn't have CREATE permission to access bucket")); + .contains("doesn't have CREATE permission to access bucket")); + verifyAuditLog(OMAction.CREATE_BUCKET, AuditEventStatus.FAILURE); } @Test - public void testFailureInKeyOp() throws Exception { - final VolumeArgs createVolumeArgs; - + public void testReadBucketPermissionDenied() throws Exception { OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(cluster); - logCapturer.clearOutput(); + TestOmAcls.bucketAclAllow = false; + ObjectStore objectStore = cluster.getClient().getObjectStore(); + OMException exception = assertThrows(OMException.class, + () -> objectStore.getVolume( + bucket.getVolumeName()).getBucket(bucket.getName()) + ); + + assertEquals(ResultCodes.PERMISSION_DENIED, exception.getResult()); + assertTrue(logCapturer.getOutput() + .contains("doesn't have READ permission to access bucket")); + verifyAuditLog(OMAction.READ_BUCKET, AuditEventStatus.FAILURE); + } + @Test + public void testCreateKeyPermissionDenied() throws Exception { TestOmAcls.keyAclAllow = false; - OzoneTestUtils.expectOmException(ResultCodes.PERMISSION_DENIED, - () -> TestDataUtil.createKey(bucket, "testKey", "testcontent")); + OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(cluster); + + OMException exception = assertThrows(OMException.class, + () -> TestDataUtil.createKey(bucket, "testKey", "testcontent")); + assertEquals(ResultCodes.PERMISSION_DENIED, exception.getResult()); assertTrue(logCapturer.getOutput().contains("doesn't have CREATE " + - "permission to access key")); + "permission to access key")); } @Test - public void testSetACLPermissionDenied() throws Exception { - - String volumeName = RandomStringUtils.randomAlphabetic(5).toLowerCase(); - String bucketName = RandomStringUtils.randomAlphabetic(5).toLowerCase(); + public void testReadKeyPermissionDenied() throws Exception { + OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(cluster); + TestDataUtil.createKey(bucket, "testKey", "testcontent"); - VolumeArgs createVolumeArgs = VolumeArgs.newBuilder() - .setOwner("user" + RandomStringUtils.randomNumeric(5)) - .setAdmin("admin" + RandomStringUtils.randomNumeric(5)) - .build(); - BucketArgs createBucketArgs = BucketArgs.newBuilder() - .setOwner("user" + RandomStringUtils.randomNumeric(5)) - .build(); + TestOmAcls.keyAclAllow = false; + OMException exception = assertThrows(OMException.class, + () -> TestDataUtil.getKey(bucket, "testKey")); - cluster.getClient().getObjectStore().createVolume(volumeName, - createVolumeArgs); - OzoneVolume volume = - cluster.getClient().getObjectStore().getVolume(volumeName); - volume.createBucket(bucketName, createBucketArgs); + assertEquals(ResultCodes.PERMISSION_DENIED, exception.getResult()); + assertTrue(logCapturer.getOutput().contains("doesn't have READ " + + "permission to access key")); + verifyAuditLog(OMAction.READ_KEY, AuditEventStatus.FAILURE); + } - OzoneBucket bucket = volume.getBucket(bucketName); + @Test + public void testSetACLPermissionDenied() throws Exception { + OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(cluster); TestOmAcls.bucketAclAllow = false; - OzoneTestUtils.expectOmException(ResultCodes.PERMISSION_DENIED, - () -> bucket.setAcl(new ArrayList<>())); + OMException exception = assertThrows(OMException.class, + () -> bucket.setAcl(new ArrayList<>())); + assertEquals(ResultCodes.PERMISSION_DENIED, exception.getResult()); assertTrue(logCapturer.getOutput() .contains("doesn't have WRITE_ACL permission to access bucket")); + verifyAuditLog(OMAction.SET_ACL, AuditEventStatus.FAILURE); } /** diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java index 9f6141d8f91e..8d6fb8f9fe0b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java @@ -65,6 +65,7 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; +import org.junit.jupiter.api.Assertions; import org.junit.rules.Timeout; import org.mockito.Mockito; @@ -316,8 +317,20 @@ public void testKeyOps() throws Exception { writeClient.commitKey(keyArgs, keySession.getId()); writeClient.deleteKey(keyArgs); + keyArgs = createKeyArgs(volumeName, bucketName, + new ECReplicationConfig("rs-6-4-1024K")); + try { + keySession = writeClient.openKey(keyArgs); + writeClient.commitKey(keyArgs, keySession.getId()); + } catch (Exception e) { + //Expected Failure in preExecute due to not enough datanode + Assertions.assertTrue(e.getMessage() + .contains("No enough datanodes to choose")); + } + omMetrics = getMetrics("OMMetrics"); assertCounter("NumKeys", 2L, omMetrics); + assertCounter("NumBlockAllocationFails", 1L, omMetrics); // inject exception to test for Failure Metrics on the read path Mockito.doThrow(exception).when(mockKm).lookupKey(any(), any()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestSecureOzoneManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestSecureOzoneManager.java index 6347a4f9657e..1871fb1bc217 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestSecureOzoneManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestSecureOzoneManager.java @@ -22,10 +22,10 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.hdds.security.x509.certificate.client.OMCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.security.OMCertificateClient; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.ozone.test.LambdaTestUtils; import org.bouncycastle.cert.X509CertificateHolder; @@ -121,8 +121,8 @@ public void testSecureOmInitFailures() throws Exception { // Case 1: When keypair as well as certificate is missing. Initial keypair // boot-up. Get certificate will fail when SCM is not running. SecurityConfig securityConfig = new SecurityConfig(conf); - CertificateClient client = new OMCertificateClient(securityConfig, - omStorage.getOmCertSerialId()); + CertificateClient client = + new OMCertificateClient(securityConfig, omStorage, scmId); Assert.assertEquals(CertificateClient.InitResponse.GETCERT, client.init()); privateKey = client.getPrivateKey(); publicKey = client.getPublicKey(); @@ -131,8 +131,7 @@ public void testSecureOmInitFailures() throws Exception { Assert.assertNull(client.getCertificate()); // Case 2: If key pair already exist than response should be RECOVER. - client = new OMCertificateClient(securityConfig, - omStorage.getOmCertSerialId()); + client = new OMCertificateClient(securityConfig, omStorage, scmId); Assert.assertEquals(CertificateClient.InitResponse.RECOVER, client.init()); Assert.assertNotNull(client.getPrivateKey()); Assert.assertNotNull(client.getPublicKey()); @@ -168,17 +167,15 @@ public void testSecureOmInitFailures() throws Exception { securityConfig.getSignatureAlgo()); certCodec.writeCertificate(new X509CertificateHolder( x509Certificate.getEncoded())); - client = new OMCertificateClient(securityConfig, - x509Certificate.getSerialNumber().toString()); omStorage.setOmCertSerialId(x509Certificate.getSerialNumber().toString()); + client = new OMCertificateClient(securityConfig, omStorage, scmId); Assert.assertEquals(CertificateClient.InitResponse.FAILURE, client.init()); Assert.assertNull(client.getPrivateKey()); Assert.assertNull(client.getPublicKey()); Assert.assertNotNull(client.getCertificate()); // Case 6: When private key and certificate is present. - client = new OMCertificateClient(securityConfig, - x509Certificate.getSerialNumber().toString()); + client = new OMCertificateClient(securityConfig, omStorage, scmId); FileUtils.deleteQuietly(Paths.get(securityConfig.getKeyLocation(COMPONENT) .toString(), securityConfig.getPublicKeyFileName()).toFile()); keyCodec.writePrivateKey(privateKey); @@ -188,8 +185,7 @@ public void testSecureOmInitFailures() throws Exception { Assert.assertNotNull(client.getCertificate()); // Case 7 When keypair and certificate is present. - client = new OMCertificateClient(securityConfig, - x509Certificate.getSerialNumber().toString()); + client = new OMCertificateClient(securityConfig, omStorage, scmId); Assert.assertEquals(CertificateClient.InitResponse.SUCCESS, client.init()); Assert.assertNotNull(client.getPrivateKey()); Assert.assertNotNull(client.getPublicKey()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/parser/TestOzoneHARatisLogParser.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/parser/TestOzoneHARatisLogParser.java index 00e591ef74e5..aae760a94a9b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/parser/TestOzoneHARatisLogParser.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/parser/TestOzoneHARatisLogParser.java @@ -30,39 +30,34 @@ import org.apache.hadoop.ozone.segmentparser.OMRatisLogParser; import org.apache.hadoop.ozone.segmentparser.SCMRatisLogParser; import org.apache.ozone.test.GenericTestUtils; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.apache.ozone.test.tag.Flaky; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.PrintStream; import java.util.UUID; -import org.junit.Rule; -import org.junit.rules.Timeout; - import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; /** * Test Ozone OM and SCM HA Ratis log parser. */ -public class TestOzoneHARatisLogParser { - - /** - * Set a timeout for each test. - */ - @Rule - public Timeout timeout = Timeout.seconds(300); +@Flaky("HDDS-7008") +@Timeout(300) +class TestOzoneHARatisLogParser { private MiniOzoneHAClusterImpl cluster = null; private final ByteArrayOutputStream out = new ByteArrayOutputStream(); private final ByteArrayOutputStream err = new ByteArrayOutputStream(); - @Before - public void setup() throws Exception { + @BeforeEach + void setup() throws Exception { String clusterId = UUID.randomUUID().toString(); String scmId = UUID.randomUUID().toString(); String omServiceId = "omServiceId1"; @@ -91,8 +86,8 @@ private void performFewRequests(ObjectStore objectStore) throws Exception { UUID.randomUUID().toString()); } - @After - public void destroy() throws Exception { + @AfterEach + void destroy() throws Exception { if (cluster != null) { cluster.shutdown(); } @@ -102,7 +97,7 @@ public void destroy() throws Exception { } @Test - public void testRatisLogParsing() throws Exception { + void testRatisLogParsing() throws Exception { OzoneConfiguration ozoneConfiguration = cluster.getOMLeader().getConfiguration(); @@ -114,20 +109,20 @@ public void testRatisLogParsing() throws Exception { File omMetaDir = new File(ozoneConfiguration.get(OZONE_METADATA_DIRS), "ratis"); - Assert.assertTrue(omMetaDir.isDirectory()); + Assertions.assertTrue(omMetaDir.isDirectory()); String[] ratisDirs = omMetaDir.list(); - Assert.assertNotNull(ratisDirs); - Assert.assertEquals(1, ratisDirs.length); + Assertions.assertNotNull(ratisDirs); + Assertions.assertEquals(1, ratisDirs.length); File groupDir = new File(omMetaDir, ratisDirs[0]); - Assert.assertNotNull(groupDir); - Assert.assertTrue(groupDir.isDirectory()); + Assertions.assertNotNull(groupDir); + Assertions.assertTrue(groupDir.isDirectory()); File currentDir = new File(groupDir, "current"); File logFile = new File(currentDir, "log_inprogress_0"); GenericTestUtils.waitFor(logFile::exists, 100, 15000); - Assert.assertTrue(logFile.isFile()); + Assertions.assertTrue(logFile.isFile()); OMRatisLogParser omRatisLogParser = new OMRatisLogParser(); omRatisLogParser.setSegmentFile(logFile); @@ -136,27 +131,27 @@ public void testRatisLogParsing() throws Exception { // Not checking total entry count, because of not sure of exact count of // metadata entry changes. - Assert.assertTrue(out.toString(UTF_8.name()) + Assertions.assertTrue(out.toString(UTF_8.name()) .contains("Num Total Entries:")); out.reset(); // Now check for SCM. File scmMetadataDir = new File(RatisUtil.getRatisStorageDir(leaderSCMConfig)); - Assert.assertTrue(scmMetadataDir.isDirectory()); + Assertions.assertTrue(scmMetadataDir.isDirectory()); ratisDirs = scmMetadataDir.list(); - Assert.assertNotNull(ratisDirs); - Assert.assertEquals(1, ratisDirs.length); + Assertions.assertNotNull(ratisDirs); + Assertions.assertEquals(1, ratisDirs.length); groupDir = new File(scmMetadataDir, ratisDirs[0]); - Assert.assertNotNull(groupDir); - Assert.assertTrue(groupDir.isDirectory()); + Assertions.assertNotNull(groupDir); + Assertions.assertTrue(groupDir.isDirectory()); currentDir = new File(groupDir, "current"); logFile = new File(currentDir, "log_inprogress_1"); GenericTestUtils.waitFor(logFile::exists, 100, 15000); - Assert.assertTrue(logFile.isFile()); + Assertions.assertTrue(logFile.isFile()); SCMRatisLogParser scmRatisLogParser = new SCMRatisLogParser(); scmRatisLogParser.setSegmentFile(logFile); @@ -164,7 +159,7 @@ public void testRatisLogParsing() throws Exception { // Not checking total entry count, because of not sure of exact count of // metadata entry changes. - Assert.assertTrue(out.toString(UTF_8.name()) + Assertions.assertTrue(out.toString(UTF_8.name()) .contains("Num Total Entries:")); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java index abeb1f189c7f..18373d1789e7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.XceiverClientGrpc; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; @@ -89,6 +90,41 @@ public void shutdown() { } } + @Test + public void testSyncSCMContainerInfo() throws Exception { + ReconStorageContainerManagerFacade reconScm = + (ReconStorageContainerManagerFacade) + cluster.getReconServer().getReconStorageContainerManager(); + StorageContainerManager scm = cluster.getStorageContainerManager(); + ContainerManager scmContainerManager = scm.getContainerManager(); + ContainerManager reconContainerManager = reconScm.getContainerManager(); + final ContainerInfo container1 = scmContainerManager.allocateContainer( + RatisReplicationConfig.getInstance( + HddsProtos.ReplicationFactor.ONE), "admin"); + final ContainerInfo container2 = scmContainerManager.allocateContainer( + RatisReplicationConfig.getInstance( + HddsProtos.ReplicationFactor.ONE), "admin"); + reconContainerManager.allocateContainer( + RatisReplicationConfig.getInstance( + HddsProtos.ReplicationFactor.ONE), "admin"); + scmContainerManager.updateContainerState(container1.containerID(), + HddsProtos.LifeCycleEvent.FINALIZE); + scmContainerManager.updateContainerState(container2.containerID(), + HddsProtos.LifeCycleEvent.FINALIZE); + scmContainerManager.updateContainerState(container1.containerID(), + HddsProtos.LifeCycleEvent.CLOSE); + scmContainerManager.updateContainerState(container2.containerID(), + HddsProtos.LifeCycleEvent.CLOSE); + int scmContainersCount = scmContainerManager.getContainers().size(); + int reconContainersCount = reconContainerManager + .getContainers().size(); + Assert.assertNotEquals(scmContainersCount, reconContainersCount); + reconScm.syncWithSCMContainerInfo(); + reconContainersCount = reconContainerManager + .getContainers().size(); + Assert.assertEquals(scmContainersCount, reconContainersCount); + } + @Test public void testMissingContainerDownNode() throws Exception { ReconStorageContainerManagerFacade reconScm = diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerFSO.java index a893ee58134e..7af632830bb5 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerFSO.java @@ -128,8 +128,10 @@ public void testNamespaceSummaryAPI() throws Exception { NamespaceSummaryResponse entity = (NamespaceSummaryResponse) basicInfo.getEntity(); Assert.assertSame(entity.getEntityType(), EntityType.DIRECTORY); - Assert.assertEquals(1, entity.getNumTotalKey()); - Assert.assertEquals(0, entity.getNumTotalDir()); + Assert.assertEquals(1, entity.getCountStats().getNumTotalKey()); + Assert.assertEquals(0, entity.getCountStats().getNumTotalDir()); + Assert.assertEquals(-1, entity.getCountStats().getNumVolume()); + Assert.assertEquals(-1, entity.getCountStats().getNumBucket()); for (int i = 0; i < 10; i++) { Assert.assertNotNull(impl.getOMMetadataManagerInstance() .getVolumeTable().get("/vol" + i)); @@ -149,10 +151,10 @@ public void testNamespaceSummaryAPI() throws Exception { (NamespaceSummaryResponse) rootBasicRes.getEntity(); Assert.assertSame(EntityType.ROOT, rootBasicEntity.getEntityType()); // one additional dummy volume at creation - Assert.assertEquals(13, rootBasicEntity.getNumVolume()); - Assert.assertEquals(12, rootBasicEntity.getNumBucket()); - Assert.assertEquals(12, rootBasicEntity.getNumTotalDir()); - Assert.assertEquals(12, rootBasicEntity.getNumTotalKey()); + Assert.assertEquals(13, rootBasicEntity.getCountStats().getNumVolume()); + Assert.assertEquals(12, rootBasicEntity.getCountStats().getNumBucket()); + Assert.assertEquals(12, rootBasicEntity.getCountStats().getNumTotalDir()); + Assert.assertEquals(12, rootBasicEntity.getCountStats().getNumTotalKey()); } /** diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java index 88f387afee0f..4996371a3c42 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java @@ -590,14 +590,14 @@ public void testDeleteToTrashOrSkipTrash() throws Exception { final String strKey1 = strDir1 + "/key1"; final Path pathKey1 = new Path(strKey1); final Path trashPathKey1 = Path.mergePaths( - new Path(new OFSPath(strKey1).getTrashRoot(), trashCurrent), - new Path(dir1, "key1")); + new Path(new OFSPath(strKey1, clientConf).getTrashRoot(), + trashCurrent), new Path(dir1, "key1")); final String strKey2 = strDir1 + "/key2"; final Path pathKey2 = new Path(strKey2); final Path trashPathKey2 = Path.mergePaths( - new Path(new OFSPath(strKey2).getTrashRoot(), trashCurrent), - new Path(dir1, "key2")); + new Path(new OFSPath(strKey2, clientConf).getTrashRoot(), + trashCurrent), new Path(dir1, "key2")); int res; try { @@ -694,8 +694,8 @@ public void testDeleteTrashNoSkipTrash() throws Exception { final String[] rmTrashArgs = new String[] {"-rm", "-R", testVolBucket + "/.Trash"}; final Path trashPathKey1 = Path.mergePaths(new Path( - new OFSPath(testKey).getTrashRoot(), new Path("Current")), - new Path(keyName)); + new OFSPath(testKey, clientConf).getTrashRoot(), + new Path("Current")), new Path(keyName)); FileSystem fs = FileSystem.get(clientConf); try { @@ -820,6 +820,11 @@ public void testShQuota() throws Exception { .getQuotaInNamespace()); // Test clrquota option. + args = new String[]{"volume", "clrquota", "vol4"}; + executeWithError(ozoneShell, args, "At least one of the quota clear" + + " flag is required"); + out.reset(); + args = new String[]{"volume", "clrquota", "vol4", "--space-quota", "--namespace-quota"}; execute(ozoneShell, args); @@ -828,6 +833,11 @@ public void testShQuota() throws Exception { objectStore.getVolume("vol4").getQuotaInNamespace()); out.reset(); + args = new String[]{"bucket", "clrquota", "vol4/buck4"}; + executeWithError(ozoneShell, args, "At least one of the quota clear" + + " flag is required"); + out.reset(); + args = new String[]{"bucket", "clrquota", "vol4/buck4", "--space-quota", "--namespace-quota"}; execute(ozoneShell, args); @@ -853,23 +863,6 @@ public void testShQuota() throws Exception { () -> execute(ozoneShell, volumeArgs2)); out.reset(); - // Test set volume spaceQuota or nameSpaceQuota to normal value. - String[] volumeArgs3 = new String[]{"volume", "setquota", "vol4", - "--space-quota", "1000B"}; - execute(ozoneShell, volumeArgs3); - out.reset(); - assertEquals(1000, objectStore.getVolume("vol4").getQuotaInBytes()); - assertEquals(-1, - objectStore.getVolume("vol4").getQuotaInNamespace()); - - String[] volumeArgs4 = new String[]{"volume", "setquota", "vol4", - "--namespace-quota", "100"}; - execute(ozoneShell, volumeArgs4); - out.reset(); - assertEquals(1000, objectStore.getVolume("vol4").getQuotaInBytes()); - assertEquals(100, - objectStore.getVolume("vol4").getQuotaInNamespace()); - // Test set bucket quota to 0. String[] bucketArgs1 = new String[]{"bucket", "setquota", "vol4/buck4", "--space-quota", "0GB"}; @@ -914,6 +907,35 @@ public void testShQuota() throws Exception { assertEquals(100, objectStore.getVolume("vol4") .getBucket("buck4").getQuotaInNamespace()); + // Test set volume quota without quota flag + String[] bucketArgs6 = new String[]{"bucket", "setquota", "vol4/buck4"}; + executeWithError(ozoneShell, bucketArgs6, + "At least one of the quota set flag is required"); + out.reset(); + + // Test set volume spaceQuota or nameSpaceQuota to normal value. + String[] volumeArgs3 = new String[]{"volume", "setquota", "vol4", + "--space-quota", "1000B"}; + execute(ozoneShell, volumeArgs3); + out.reset(); + assertEquals(1000, objectStore.getVolume("vol4").getQuotaInBytes()); + assertEquals(-1, + objectStore.getVolume("vol4").getQuotaInNamespace()); + + String[] volumeArgs4 = new String[]{"volume", "setquota", "vol4", + "--namespace-quota", "100"}; + execute(ozoneShell, volumeArgs4); + out.reset(); + assertEquals(1000, objectStore.getVolume("vol4").getQuotaInBytes()); + assertEquals(100, + objectStore.getVolume("vol4").getQuotaInNamespace()); + + // Test set volume quota without quota flag + String[] volumeArgs5 = new String[]{"volume", "setquota", "vol4"}; + executeWithError(ozoneShell, volumeArgs5, + "At least one of the quota set flag is required"); + out.reset(); + objectStore.getVolume("vol").deleteBucket("buck"); objectStore.deleteVolume("vol"); objectStore.getVolume("vol1").deleteBucket("buck1"); diff --git a/hadoop-ozone/interface-client/pom.xml b/hadoop-ozone/interface-client/pom.xml index 276ffdc60a06..feb831219863 100644 --- a/hadoop-ozone/interface-client/pom.xml +++ b/hadoop-ozone/interface-client/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-interface-client - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Client interface Apache Ozone Client Interface jar diff --git a/hadoop-ozone/interface-client/src/main/resources/proto.lock b/hadoop-ozone/interface-client/src/main/resources/proto.lock index e6d24a895026..ffe53f04cbfd 100644 --- a/hadoop-ozone/interface-client/src/main/resources/proto.lock +++ b/hadoop-ozone/interface-client/src/main/resources/proto.lock @@ -1,5 +1,172 @@ { "definitions": [ + { + "protopath": "OMAdminProtocol.proto", + "def": { + "enums": [ + { + "name": "NodeState", + "enum_fields": [ + { + "name": "ACTIVE", + "integer": 1 + }, + { + "name": "DECOMMISSIONED", + "integer": 2 + } + ] + } + ], + "messages": [ + { + "name": "OMConfigurationRequest" + }, + { + "name": "OMConfigurationResponse", + "fields": [ + { + "id": 1, + "name": "success", + "type": "bool", + "required": true + }, + { + "id": 2, + "name": "errorMsg", + "type": "string", + "optional": true + }, + { + "id": 3, + "name": "nodesInMemory", + "type": "OMNodeInfo", + "is_repeated": true + }, + { + "id": 4, + "name": "nodesInNewConf", + "type": "OMNodeInfo", + "is_repeated": true + } + ] + }, + { + "name": "OMNodeInfo", + "fields": [ + { + "id": 1, + "name": "nodeID", + "type": "string", + "required": true + }, + { + "id": 2, + "name": "hostAddress", + "type": "string", + "required": true + }, + { + "id": 3, + "name": "rpcPort", + "type": "uint32", + "required": true + }, + { + "id": 4, + "name": "ratisPort", + "type": "uint32", + "required": true + }, + { + "id": 5, + "name": "nodeState", + "type": "NodeState", + "optional": true, + "options": [ + { + "name": "default", + "value": "ACTIVE" + } + ] + } + ] + }, + { + "name": "DecommissionOMRequest", + "fields": [ + { + "id": 1, + "name": "nodeId", + "type": "string", + "required": true + }, + { + "id": 2, + "name": "nodeAddress", + "type": "string", + "required": true + } + ] + }, + { + "name": "DecommissionOMResponse", + "fields": [ + { + "id": 1, + "name": "success", + "type": "bool", + "required": true + }, + { + "id": 3, + "name": "errorMsg", + "type": "string", + "optional": true + } + ] + } + ], + "services": [ + { + "name": "OzoneManagerAdminService", + "rpcs": [ + { + "name": "getOMConfiguration", + "in_type": "OMConfigurationRequest", + "out_type": "OMConfigurationResponse" + }, + { + "name": "decommission", + "in_type": "DecommissionOMRequest", + "out_type": "DecommissionOMResponse" + } + ] + } + ], + "package": { + "name": "hadoop.ozone" + }, + "options": [ + { + "name": "java_package", + "value": "org.apache.hadoop.ozone.protocol.proto" + }, + { + "name": "java_outer_classname", + "value": "OzoneManagerAdminProtocolProtos" + }, + { + "name": "java_generic_services", + "value": "true" + }, + { + "name": "java_generate_equals_and_hash", + "value": "true" + } + ] + } + }, { "protopath": "OmClientProtocol.proto", "def": { @@ -214,6 +381,70 @@ { "name": "PurgePaths", "integer": 94 + }, + { + "name": "PurgeDirectories", + "integer": 95 + }, + { + "name": "CreateTenant", + "integer": 96 + }, + { + "name": "DeleteTenant", + "integer": 97 + }, + { + "name": "ListTenant", + "integer": 98 + }, + { + "name": "TenantGetUserInfo", + "integer": 99 + }, + { + "name": "TenantAssignUserAccessId", + "integer": 100 + }, + { + "name": "TenantRevokeUserAccessId", + "integer": 101 + }, + { + "name": "TenantAssignAdmin", + "integer": 102 + }, + { + "name": "TenantRevokeAdmin", + "integer": 103 + }, + { + "name": "GetS3VolumeContext", + "integer": 104 + }, + { + "name": "TenantListUser", + "integer": 105 + }, + { + "name": "SetS3Secret", + "integer": 106 + }, + { + "name": "SetRangerServiceVersion", + "integer": 107 + }, + { + "name": "RangerBGSync", + "integer": 109 + }, + { + "name": "EchoRPC", + "integer": 110 + }, + { + "name": "GetKeyInfo", + "integer": 111 } ] }, @@ -503,6 +734,54 @@ { "name": "NOT_SUPPORTED_OPERATION_WHEN_PREPARED", "integer": 74 + }, + { + "name": "NOT_SUPPORTED_OPERATION_PRIOR_FINALIZATION", + "integer": 75 + }, + { + "name": "TENANT_NOT_FOUND", + "integer": 76 + }, + { + "name": "TENANT_ALREADY_EXISTS", + "integer": 77 + }, + { + "name": "INVALID_TENANT_ID", + "integer": 78 + }, + { + "name": "ACCESS_ID_NOT_FOUND", + "integer": 79 + }, + { + "name": "TENANT_USER_ACCESS_ID_ALREADY_EXISTS", + "integer": 80 + }, + { + "name": "INVALID_TENANT_USERNAME", + "integer": 81 + }, + { + "name": "INVALID_ACCESS_ID", + "integer": 82 + }, + { + "name": "TENANT_AUTHORIZER_ERROR", + "integer": 83 + }, + { + "name": "VOLUME_IS_REFERENCED", + "integer": 84 + }, + { + "name": "TENANT_NOT_EMPTY", + "integer": 85 + }, + { + "name": "FEATURE_NOT_ENABLED", + "integer": 86 } ] }, @@ -658,6 +937,35 @@ } ] }, + { + "name": "ChecksumTypeProto", + "enum_fields": [ + { + "name": "CHECKSUM_NULL" + }, + { + "name": "CHECKSUM_CRC32", + "integer": 1 + }, + { + "name": "CHECKSUM_CRC32C", + "integer": 2 + } + ] + }, + { + "name": "FileChecksumTypeProto", + "enum_fields": [ + { + "name": "MD5CRC", + "integer": 1 + }, + { + "name": "COMPOSITE_CRC", + "integer": 2 + } + ] + }, { "name": "OMTokenProto.Type", "enum_fields": [ @@ -717,307 +1025,476 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "clientId", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "userInfo", - "type": "UserInfo" + "type": "UserInfo", + "optional": true }, { "id": 5, "name": "version", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 6, "name": "layoutVersion", - "type": "LayoutVersion" + "type": "LayoutVersion", + "optional": true }, { "id": 11, "name": "createVolumeRequest", - "type": "CreateVolumeRequest" + "type": "CreateVolumeRequest", + "optional": true }, { "id": 12, "name": "setVolumePropertyRequest", - "type": "SetVolumePropertyRequest" + "type": "SetVolumePropertyRequest", + "optional": true }, { "id": 13, "name": "checkVolumeAccessRequest", - "type": "CheckVolumeAccessRequest" + "type": "CheckVolumeAccessRequest", + "optional": true }, { "id": 14, "name": "infoVolumeRequest", - "type": "InfoVolumeRequest" + "type": "InfoVolumeRequest", + "optional": true }, { "id": 15, "name": "deleteVolumeRequest", - "type": "DeleteVolumeRequest" + "type": "DeleteVolumeRequest", + "optional": true }, { "id": 16, "name": "listVolumeRequest", - "type": "ListVolumeRequest" + "type": "ListVolumeRequest", + "optional": true }, { "id": 21, "name": "createBucketRequest", - "type": "CreateBucketRequest" + "type": "CreateBucketRequest", + "optional": true }, { "id": 22, "name": "infoBucketRequest", - "type": "InfoBucketRequest" + "type": "InfoBucketRequest", + "optional": true }, { "id": 23, "name": "setBucketPropertyRequest", - "type": "SetBucketPropertyRequest" + "type": "SetBucketPropertyRequest", + "optional": true }, { "id": 24, "name": "deleteBucketRequest", - "type": "DeleteBucketRequest" + "type": "DeleteBucketRequest", + "optional": true }, { "id": 25, "name": "listBucketsRequest", - "type": "ListBucketsRequest" + "type": "ListBucketsRequest", + "optional": true }, { "id": 31, "name": "createKeyRequest", - "type": "CreateKeyRequest" + "type": "CreateKeyRequest", + "optional": true }, { "id": 32, "name": "lookupKeyRequest", - "type": "LookupKeyRequest" + "type": "LookupKeyRequest", + "optional": true }, { "id": 33, "name": "renameKeyRequest", - "type": "RenameKeyRequest" + "type": "RenameKeyRequest", + "optional": true }, { "id": 34, "name": "deleteKeyRequest", - "type": "DeleteKeyRequest" + "type": "DeleteKeyRequest", + "optional": true }, { "id": 35, "name": "listKeysRequest", - "type": "ListKeysRequest" + "type": "ListKeysRequest", + "optional": true }, { "id": 36, "name": "commitKeyRequest", - "type": "CommitKeyRequest" + "type": "CommitKeyRequest", + "optional": true }, { "id": 37, "name": "allocateBlockRequest", - "type": "AllocateBlockRequest" + "type": "AllocateBlockRequest", + "optional": true }, { "id": 38, "name": "deleteKeysRequest", - "type": "DeleteKeysRequest" + "type": "DeleteKeysRequest", + "optional": true }, { "id": 39, "name": "renameKeysRequest", - "type": "RenameKeysRequest" + "type": "RenameKeysRequest", + "optional": true }, { "id": 40, "name": "deleteOpenKeysRequest", - "type": "DeleteOpenKeysRequest" + "type": "DeleteOpenKeysRequest", + "optional": true }, { "id": 45, "name": "initiateMultiPartUploadRequest", - "type": "MultipartInfoInitiateRequest" + "type": "MultipartInfoInitiateRequest", + "optional": true }, { "id": 46, "name": "commitMultiPartUploadRequest", - "type": "MultipartCommitUploadPartRequest" + "type": "MultipartCommitUploadPartRequest", + "optional": true }, { "id": 47, "name": "completeMultiPartUploadRequest", - "type": "MultipartUploadCompleteRequest" + "type": "MultipartUploadCompleteRequest", + "optional": true }, { "id": 48, "name": "abortMultiPartUploadRequest", - "type": "MultipartUploadAbortRequest" + "type": "MultipartUploadAbortRequest", + "optional": true }, { "id": 49, "name": "getS3SecretRequest", - "type": "GetS3SecretRequest" + "type": "GetS3SecretRequest", + "optional": true }, { "id": 50, "name": "listMultipartUploadPartsRequest", - "type": "MultipartUploadListPartsRequest" + "type": "MultipartUploadListPartsRequest", + "optional": true }, { "id": 51, "name": "serviceListRequest", - "type": "ServiceListRequest" + "type": "ServiceListRequest", + "optional": true }, { "id": 53, "name": "dbUpdatesRequest", - "type": "DBUpdatesRequest" + "type": "DBUpdatesRequest", + "optional": true }, { "id": 54, "name": "finalizeUpgradeRequest", - "type": "FinalizeUpgradeRequest" + "type": "FinalizeUpgradeRequest", + "optional": true }, { "id": 55, "name": "finalizeUpgradeProgressRequest", - "type": "FinalizeUpgradeProgressRequest" + "type": "FinalizeUpgradeProgressRequest", + "optional": true }, { "id": 56, "name": "prepareRequest", - "type": "PrepareRequest" + "type": "PrepareRequest", + "optional": true }, { "id": 57, "name": "prepareStatusRequest", - "type": "PrepareStatusRequest" + "type": "PrepareStatusRequest", + "optional": true }, { "id": 58, "name": "cancelPrepareRequest", - "type": "CancelPrepareRequest" + "type": "CancelPrepareRequest", + "optional": true }, { "id": 61, "name": "getDelegationTokenRequest", - "type": "hadoop.common.GetDelegationTokenRequestProto" + "type": "hadoop.common.GetDelegationTokenRequestProto", + "optional": true }, { "id": 62, "name": "renewDelegationTokenRequest", - "type": "hadoop.common.RenewDelegationTokenRequestProto" + "type": "hadoop.common.RenewDelegationTokenRequestProto", + "optional": true }, { "id": 63, "name": "cancelDelegationTokenRequest", - "type": "hadoop.common.CancelDelegationTokenRequestProto" + "type": "hadoop.common.CancelDelegationTokenRequestProto", + "optional": true }, { "id": 64, "name": "updateGetDelegationTokenRequest", - "type": "UpdateGetDelegationTokenRequest" + "type": "UpdateGetDelegationTokenRequest", + "optional": true }, { "id": 65, "name": "updatedRenewDelegationTokenRequest", - "type": "UpdateRenewDelegationTokenRequest" + "type": "UpdateRenewDelegationTokenRequest", + "optional": true }, { "id": 70, "name": "getFileStatusRequest", - "type": "GetFileStatusRequest" + "type": "GetFileStatusRequest", + "optional": true }, { "id": 71, "name": "createDirectoryRequest", - "type": "CreateDirectoryRequest" + "type": "CreateDirectoryRequest", + "optional": true }, { "id": 72, "name": "createFileRequest", - "type": "CreateFileRequest" + "type": "CreateFileRequest", + "optional": true }, { "id": 73, "name": "lookupFileRequest", - "type": "LookupFileRequest" + "type": "LookupFileRequest", + "optional": true }, { "id": 74, "name": "listStatusRequest", - "type": "ListStatusRequest" + "type": "ListStatusRequest", + "optional": true }, { "id": 75, "name": "addAclRequest", - "type": "AddAclRequest" + "type": "AddAclRequest", + "optional": true }, { "id": 76, "name": "removeAclRequest", - "type": "RemoveAclRequest" + "type": "RemoveAclRequest", + "optional": true }, { "id": 77, "name": "setAclRequest", - "type": "SetAclRequest" + "type": "SetAclRequest", + "optional": true }, { "id": 78, "name": "getAclRequest", - "type": "GetAclRequest" + "type": "GetAclRequest", + "optional": true }, { "id": 81, "name": "purgeKeysRequest", - "type": "PurgeKeysRequest" + "type": "PurgeKeysRequest", + "optional": true }, { "id": 82, "name": "updateGetS3SecretRequest", - "type": "UpdateGetS3SecretRequest" + "type": "UpdateGetS3SecretRequest", + "optional": true }, { "id": 83, "name": "listMultipartUploadsRequest", - "type": "ListMultipartUploadsRequest" + "type": "ListMultipartUploadsRequest", + "optional": true }, { "id": 91, "name": "listTrashRequest", - "type": "ListTrashRequest" + "type": "ListTrashRequest", + "optional": true }, { "id": 92, "name": "RecoverTrashRequest", - "type": "RecoverTrashRequest" + "type": "RecoverTrashRequest", + "optional": true }, { "id": 93, "name": "RevokeS3SecretRequest", - "type": "RevokeS3SecretRequest" + "type": "RevokeS3SecretRequest", + "optional": true }, { "id": 94, "name": "purgePathsRequest", - "type": "PurgePathsRequest" + "type": "PurgePathsRequest", + "optional": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + }, + { + "id": 108, + "name": "purgeDirectoriesRequest", + "type": "PurgeDirectoriesRequest", + "optional": true + }, + { + "id": 95, + "name": "s3Authentication", + "type": "S3Authentication", + "optional": true + }, + { + "id": 96, + "name": "CreateTenantRequest", + "type": "CreateTenantRequest", + "optional": true + }, + { + "id": 97, + "name": "DeleteTenantRequest", + "type": "DeleteTenantRequest", + "optional": true + }, + { + "id": 98, + "name": "ListTenantRequest", + "type": "ListTenantRequest", + "optional": true + }, + { + "id": 99, + "name": "TenantGetUserInfoRequest", + "type": "TenantGetUserInfoRequest", + "optional": true + }, + { + "id": 100, + "name": "TenantAssignUserAccessIdRequest", + "type": "TenantAssignUserAccessIdRequest", + "optional": true + }, + { + "id": 101, + "name": "TenantRevokeUserAccessIdRequest", + "type": "TenantRevokeUserAccessIdRequest", + "optional": true + }, + { + "id": 102, + "name": "TenantAssignAdminRequest", + "type": "TenantAssignAdminRequest", + "optional": true + }, + { + "id": 103, + "name": "TenantRevokeAdminRequest", + "type": "TenantRevokeAdminRequest", + "optional": true + }, + { + "id": 104, + "name": "getS3VolumeContextRequest", + "type": "GetS3VolumeContextRequest", + "optional": true + }, + { + "id": 105, + "name": "tenantListUserRequest", + "type": "TenantListUserRequest", + "optional": true + }, + { + "id": 106, + "name": "SetS3SecretRequest", + "type": "SetS3SecretRequest", + "optional": true + }, + { + "id": 107, + "name": "SetRangerServiceVersionRequest", + "type": "SetRangerServiceVersionRequest", + "optional": true + }, + { + "id": 109, + "name": "RangerBGSyncRequest", + "type": "RangerBGSyncRequest", + "optional": true + }, + { + "id": 110, + "name": "EchoRPCRequest", + "type": "EchoRPCRequest", + "optional": true + }, + { + "id": 111, + "name": "GetKeyInfoRequest", + "type": "GetKeyInfoRequest", + "optional": true } ] }, @@ -1027,17 +1504,20 @@ { "id": 1, "name": "cmdType", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "traceID", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "success", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -1048,268 +1528,423 @@ { "id": 4, "name": "message", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "status", - "type": "Status" + "type": "Status", + "required": true }, { "id": 6, "name": "leaderOMNodeId", - "type": "string" + "type": "string", + "optional": true }, { "id": 11, "name": "createVolumeResponse", - "type": "CreateVolumeResponse" + "type": "CreateVolumeResponse", + "optional": true }, { "id": 12, "name": "setVolumePropertyResponse", - "type": "SetVolumePropertyResponse" + "type": "SetVolumePropertyResponse", + "optional": true }, { "id": 13, "name": "checkVolumeAccessResponse", - "type": "CheckVolumeAccessResponse" + "type": "CheckVolumeAccessResponse", + "optional": true }, { "id": 14, "name": "infoVolumeResponse", - "type": "InfoVolumeResponse" + "type": "InfoVolumeResponse", + "optional": true }, { "id": 15, "name": "deleteVolumeResponse", - "type": "DeleteVolumeResponse" + "type": "DeleteVolumeResponse", + "optional": true }, { "id": 16, "name": "listVolumeResponse", - "type": "ListVolumeResponse" + "type": "ListVolumeResponse", + "optional": true }, { "id": 21, "name": "createBucketResponse", - "type": "CreateBucketResponse" + "type": "CreateBucketResponse", + "optional": true }, { "id": 22, "name": "infoBucketResponse", - "type": "InfoBucketResponse" + "type": "InfoBucketResponse", + "optional": true }, { "id": 23, "name": "setBucketPropertyResponse", - "type": "SetBucketPropertyResponse" + "type": "SetBucketPropertyResponse", + "optional": true }, { "id": 24, "name": "deleteBucketResponse", - "type": "DeleteBucketResponse" + "type": "DeleteBucketResponse", + "optional": true }, { "id": 25, "name": "listBucketsResponse", - "type": "ListBucketsResponse" + "type": "ListBucketsResponse", + "optional": true }, { "id": 31, "name": "createKeyResponse", - "type": "CreateKeyResponse" + "type": "CreateKeyResponse", + "optional": true }, { "id": 32, "name": "lookupKeyResponse", - "type": "LookupKeyResponse" + "type": "LookupKeyResponse", + "optional": true }, { "id": 33, "name": "renameKeyResponse", - "type": "RenameKeyResponse" + "type": "RenameKeyResponse", + "optional": true }, { "id": 34, "name": "deleteKeyResponse", - "type": "DeleteKeyResponse" + "type": "DeleteKeyResponse", + "optional": true }, { "id": 35, "name": "listKeysResponse", - "type": "ListKeysResponse" + "type": "ListKeysResponse", + "optional": true }, { "id": 36, "name": "commitKeyResponse", - "type": "CommitKeyResponse" + "type": "CommitKeyResponse", + "optional": true }, { "id": 37, "name": "allocateBlockResponse", - "type": "AllocateBlockResponse" + "type": "AllocateBlockResponse", + "optional": true }, { "id": 38, "name": "deleteKeysResponse", - "type": "DeleteKeysResponse" + "type": "DeleteKeysResponse", + "optional": true }, { "id": 39, "name": "renameKeysResponse", - "type": "RenameKeysResponse" + "type": "RenameKeysResponse", + "optional": true }, { "id": 45, "name": "initiateMultiPartUploadResponse", - "type": "MultipartInfoInitiateResponse" + "type": "MultipartInfoInitiateResponse", + "optional": true }, { "id": 46, "name": "commitMultiPartUploadResponse", - "type": "MultipartCommitUploadPartResponse" + "type": "MultipartCommitUploadPartResponse", + "optional": true }, { "id": 47, "name": "completeMultiPartUploadResponse", - "type": "MultipartUploadCompleteResponse" + "type": "MultipartUploadCompleteResponse", + "optional": true }, { "id": 48, "name": "abortMultiPartUploadResponse", - "type": "MultipartUploadAbortResponse" + "type": "MultipartUploadAbortResponse", + "optional": true }, { "id": 49, "name": "getS3SecretResponse", - "type": "GetS3SecretResponse" + "type": "GetS3SecretResponse", + "optional": true }, { "id": 50, "name": "listMultipartUploadPartsResponse", - "type": "MultipartUploadListPartsResponse" + "type": "MultipartUploadListPartsResponse", + "optional": true }, { "id": 51, "name": "ServiceListResponse", - "type": "ServiceListResponse" + "type": "ServiceListResponse", + "optional": true }, { "id": 52, "name": "dbUpdatesResponse", - "type": "DBUpdatesResponse" + "type": "DBUpdatesResponse", + "optional": true }, { "id": 54, "name": "finalizeUpgradeResponse", - "type": "FinalizeUpgradeResponse" + "type": "FinalizeUpgradeResponse", + "optional": true }, { "id": 55, "name": "finalizeUpgradeProgressResponse", - "type": "FinalizeUpgradeProgressResponse" + "type": "FinalizeUpgradeProgressResponse", + "optional": true }, { "id": 56, "name": "prepareResponse", - "type": "PrepareResponse" + "type": "PrepareResponse", + "optional": true }, { "id": 57, "name": "prepareStatusResponse", - "type": "PrepareStatusResponse" + "type": "PrepareStatusResponse", + "optional": true }, { "id": 58, "name": "cancelPrepareResponse", - "type": "CancelPrepareResponse" + "type": "CancelPrepareResponse", + "optional": true }, { "id": 61, "name": "getDelegationTokenResponse", - "type": "GetDelegationTokenResponseProto" + "type": "GetDelegationTokenResponseProto", + "optional": true }, { "id": 62, "name": "renewDelegationTokenResponse", - "type": "RenewDelegationTokenResponseProto" + "type": "RenewDelegationTokenResponseProto", + "optional": true }, { "id": 63, "name": "cancelDelegationTokenResponse", - "type": "CancelDelegationTokenResponseProto" + "type": "CancelDelegationTokenResponseProto", + "optional": true }, { "id": 70, "name": "getFileStatusResponse", - "type": "GetFileStatusResponse" + "type": "GetFileStatusResponse", + "optional": true }, { "id": 71, "name": "createDirectoryResponse", - "type": "CreateDirectoryResponse" + "type": "CreateDirectoryResponse", + "optional": true }, { "id": 72, "name": "createFileResponse", - "type": "CreateFileResponse" + "type": "CreateFileResponse", + "optional": true }, { "id": 73, "name": "lookupFileResponse", - "type": "LookupFileResponse" + "type": "LookupFileResponse", + "optional": true }, { "id": 74, "name": "listStatusResponse", - "type": "ListStatusResponse" + "type": "ListStatusResponse", + "optional": true }, { "id": 75, "name": "addAclResponse", - "type": "AddAclResponse" + "type": "AddAclResponse", + "optional": true }, { "id": 76, "name": "removeAclResponse", - "type": "RemoveAclResponse" + "type": "RemoveAclResponse", + "optional": true }, { "id": 77, "name": "setAclResponse", - "type": "SetAclResponse" + "type": "SetAclResponse", + "optional": true }, { "id": 78, "name": "getAclResponse", - "type": "GetAclResponse" + "type": "GetAclResponse", + "optional": true }, { "id": 81, "name": "purgeKeysResponse", - "type": "PurgeKeysResponse" + "type": "PurgeKeysResponse", + "optional": true }, { "id": 82, "name": "listMultipartUploadsResponse", - "type": "ListMultipartUploadsResponse" + "type": "ListMultipartUploadsResponse", + "optional": true }, { "id": 91, "name": "listTrashResponse", - "type": "ListTrashResponse" + "type": "ListTrashResponse", + "optional": true }, { "id": 92, "name": "RecoverTrashResponse", - "type": "RecoverTrashResponse" + "type": "RecoverTrashResponse", + "optional": true }, { "id": 93, "name": "purgePathsResponse", - "type": "PurgePathsResponse" - } + "type": "PurgePathsResponse", + "optional": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + }, + { + "id": 108, + "name": "purgeDirectoriesResponse", + "type": "PurgeDirectoriesResponse", + "optional": true + }, + { + "id": 96, + "name": "CreateTenantResponse", + "type": "CreateTenantResponse", + "optional": true + }, + { + "id": 97, + "name": "DeleteTenantResponse", + "type": "DeleteTenantResponse", + "optional": true + }, + { + "id": 98, + "name": "ListTenantResponse", + "type": "ListTenantResponse", + "optional": true + }, + { + "id": 99, + "name": "TenantGetUserInfoResponse", + "type": "TenantGetUserInfoResponse", + "optional": true + }, + { + "id": 100, + "name": "TenantAssignUserAccessIdResponse", + "type": "TenantAssignUserAccessIdResponse", + "optional": true + }, + { + "id": 101, + "name": "TenantRevokeUserAccessIdResponse", + "type": "TenantRevokeUserAccessIdResponse", + "optional": true + }, + { + "id": 102, + "name": "TenantAssignAdminResponse", + "type": "TenantAssignAdminResponse", + "optional": true + }, + { + "id": 103, + "name": "TenantRevokeAdminResponse", + "type": "TenantRevokeAdminResponse", + "optional": true + }, + { + "id": 104, + "name": "getS3VolumeContextResponse", + "type": "GetS3VolumeContextResponse", + "optional": true + }, + { + "id": 105, + "name": "tenantListUserResponse", + "type": "TenantListUserResponse", + "optional": true + }, + { + "id": 106, + "name": "SetS3SecretResponse", + "type": "SetS3SecretResponse", + "optional": true + }, + { + "id": 107, + "name": "SetRangerServiceVersionResponse", + "type": "SetRangerServiceVersionResponse", + "optional": true + }, + { + "id": 109, + "name": "RangerBGSyncResponse", + "type": "RangerBGSyncResponse", + "optional": true + }, + { + "id": 110, + "name": "EchoRPCResponse", + "type": "EchoRPCResponse", + "optional": true + }, + { + "id": 111, + "name": "GetKeyInfoResponse", + "type": "GetKeyInfoResponse", + "optional": true + } ] }, { @@ -1318,27 +1953,32 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "startKeyName", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "keyPrefix", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "maxKeys", - "type": "int32" + "type": "int32", + "optional": true } ] }, @@ -1359,22 +1999,26 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "keyName", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "destinationBucket", - "type": "string" + "type": "string", + "required": true } ] }, @@ -1384,7 +2028,8 @@ { "id": 1, "name": "response", - "type": "bool" + "type": "bool", + "required": true } ] }, @@ -1394,22 +2039,26 @@ { "id": 1, "name": "adminName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "ownerName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "volume", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "quotaInBytes", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 5, @@ -1426,27 +2075,32 @@ { "id": 7, "name": "creationTime", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 8, "name": "objectID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 9, "name": "updateID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 10, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 11, "name": "quotaInNamespace", "type": "int64", + "optional": true, "options": [ { "name": "default", @@ -1457,7 +2111,14 @@ { "id": 12, "name": "usedNamespace", - "type": "uint64" + "type": "uint64", + "optional": true + }, + { + "id": 13, + "name": "refCount", + "type": "int64", + "optional": true } ] }, @@ -1467,17 +2128,20 @@ { "id": 1, "name": "userName", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "remoteAddress", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "hostName", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1487,12 +2151,14 @@ { "id": 1, "name": "getDelegationTokenResponse", - "type": "GetDelegationTokenResponseProto" + "type": "GetDelegationTokenResponseProto", + "required": true }, { "id": 2, "name": "tokenRenewInterval", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -1502,12 +2168,14 @@ { "id": 1, "name": "renewDelegationTokenRequest", - "type": "hadoop.common.RenewDelegationTokenRequestProto" + "type": "hadoop.common.RenewDelegationTokenRequestProto", + "required": true }, { "id": 2, "name": "renewDelegationTokenResponse", - "type": "RenewDelegationTokenResponseProto" + "type": "RenewDelegationTokenResponseProto", + "required": true } ] }, @@ -1517,7 +2185,8 @@ { "id": 1, "name": "volumeInfo", - "type": "VolumeInfo" + "type": "VolumeInfo", + "required": true } ] }, @@ -1536,12 +2205,14 @@ { "id": 2, "name": "objectID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 3, "name": "updateID", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -1551,27 +2222,32 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "ownerName", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "quotaInBytes", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 4, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 5, "name": "quotaInNamespace", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -1581,7 +2257,8 @@ { "id": 1, "name": "response", - "type": "bool" + "type": "bool", + "optional": true } ] }, @@ -1591,12 +2268,14 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "userAcl", - "type": "OzoneAclInfo" + "type": "OzoneAclInfo", + "required": true } ] }, @@ -1609,7 +2288,8 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -1619,7 +2299,8 @@ { "id": 2, "name": "volumeInfo", - "type": "VolumeInfo" + "type": "VolumeInfo", + "optional": true } ] }, @@ -1629,7 +2310,8 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -1642,27 +2324,32 @@ { "id": 1, "name": "scope", - "type": "Scope" + "type": "Scope", + "required": true }, { "id": 2, "name": "userName", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "prefix", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "prevKey", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "maxKeys", - "type": "uint32" + "type": "uint32", + "optional": true } ] }, @@ -1683,12 +2370,14 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, @@ -1700,6 +2389,7 @@ "id": 4, "name": "isVersionEnabled", "type": "bool", + "required": true, "options": [ { "name": "default", @@ -1711,6 +2401,7 @@ "id": 5, "name": "storageType", "type": "StorageTypeProto", + "required": true, "options": [ { "name": "default", @@ -1721,7 +2412,8 @@ { "id": 6, "name": "creationTime", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 7, @@ -1732,42 +2424,50 @@ { "id": 8, "name": "beinfo", - "type": "BucketEncryptionInfoProto" + "type": "BucketEncryptionInfoProto", + "optional": true }, { "id": 9, "name": "objectID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 10, "name": "updateID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 11, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 12, "name": "sourceVolume", - "type": "string" + "type": "string", + "optional": true }, { "id": 13, "name": "sourceBucket", - "type": "string" + "type": "string", + "optional": true }, { "id": 14, "name": "usedBytes", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 15, "name": "quotaInBytes", "type": "int64", + "optional": true, "options": [ { "name": "default", @@ -1779,6 +2479,7 @@ "id": 16, "name": "quotaInNamespace", "type": "int64", + "optional": true, "options": [ { "name": "default", @@ -1789,12 +2490,26 @@ { "id": 17, "name": "usedNamespace", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 18, "name": "bucketLayout", - "type": "BucketLayoutProto" + "type": "BucketLayoutProto", + "optional": true + }, + { + "id": 19, + "name": "owner", + "type": "string", + "optional": true + }, + { + "id": 20, + "name": "defaultReplicationConfig", + "type": "hadoop.hdds.DefaultReplicationConfig", + "optional": true } ] }, @@ -1804,17 +2519,20 @@ { "id": 1, "name": "keyName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "suite", - "type": "CipherSuiteProto" + "type": "CipherSuiteProto", + "optional": true }, { "id": 3, "name": "cryptoProtocolVersion", - "type": "CryptoProtocolVersionProto" + "type": "CryptoProtocolVersionProto", + "optional": true } ] }, @@ -1824,32 +2542,38 @@ { "id": 1, "name": "suite", - "type": "CipherSuiteProto" + "type": "CipherSuiteProto", + "required": true }, { "id": 2, "name": "cryptoProtocolVersion", - "type": "CryptoProtocolVersionProto" + "type": "CryptoProtocolVersionProto", + "required": true }, { "id": 3, "name": "key", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 4, "name": "iv", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 5, "name": "keyName", - "type": "string" + "type": "string", + "required": true }, { "id": 6, "name": "ezKeyVersionName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -1859,17 +2583,20 @@ { "id": 1, "name": "key", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 2, "name": "iv", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 3, "name": "ezKeyVersionName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -1879,27 +2606,32 @@ { "id": 1, "name": "keyId", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 3, "name": "nonce", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 4, "name": "encryptionKey", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 5, "name": "expiryDate", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 6, "name": "encryptionAlgorithm", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -1909,22 +2641,26 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 5, "name": "isVersionEnabled", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 6, "name": "storageType", - "type": "StorageTypeProto" + "type": "StorageTypeProto", + "optional": true }, { "id": 7, @@ -1935,12 +2671,26 @@ { "id": 8, "name": "quotaInBytes", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 9, "name": "quotaInNamespace", - "type": "uint64" + "type": "uint64", + "optional": true + }, + { + "id": 10, + "name": "ownerName", + "type": "string", + "optional": true + }, + { + "id": 11, + "name": "defaultReplicationConfig", + "type": "hadoop.hdds.DefaultReplicationConfig", + "optional": true } ] }, @@ -1950,7 +2700,8 @@ { "id": 1, "name": "name", - "type": "string" + "type": "string", + "required": true }, { "id": 2, @@ -1967,12 +2718,14 @@ { "id": 4, "name": "objectID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 5, "name": "updateID", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -1982,12 +2735,14 @@ { "id": 1, "name": "resType", - "type": "ObjectType" + "type": "ObjectType", + "required": true }, { "id": 2, "name": "storeType", "type": "StoreType", + "required": true, "options": [ { "name": "default", @@ -1998,7 +2753,8 @@ { "id": 3, "name": "path", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2008,22 +2764,26 @@ { "id": 1, "name": "type", - "type": "OzoneAclType" + "type": "OzoneAclType", + "required": true }, { "id": 2, "name": "name", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "rights", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 4, "name": "aclScope", "type": "OzoneAclScope", + "required": true, "options": [ { "name": "default", @@ -2039,7 +2799,8 @@ { "id": 1, "name": "obj", - "type": "OzoneObj" + "type": "OzoneObj", + "required": true } ] }, @@ -2060,17 +2821,20 @@ { "id": 1, "name": "obj", - "type": "OzoneObj" + "type": "OzoneObj", + "required": true }, { "id": 2, "name": "acl", - "type": "OzoneAclInfo" + "type": "OzoneAclInfo", + "required": true }, { "id": 3, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2080,7 +2844,8 @@ { "id": 1, "name": "response", - "type": "bool" + "type": "bool", + "required": true } ] }, @@ -2090,17 +2855,20 @@ { "id": 1, "name": "obj", - "type": "OzoneObj" + "type": "OzoneObj", + "required": true }, { "id": 2, "name": "acl", - "type": "OzoneAclInfo" + "type": "OzoneAclInfo", + "required": true }, { "id": 3, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2110,7 +2878,8 @@ { "id": 1, "name": "response", - "type": "bool" + "type": "bool", + "required": true } ] }, @@ -2120,7 +2889,8 @@ { "id": 1, "name": "obj", - "type": "OzoneObj" + "type": "OzoneObj", + "required": true }, { "id": 2, @@ -2131,7 +2901,8 @@ { "id": 3, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2141,7 +2912,8 @@ { "id": 1, "name": "response", - "type": "bool" + "type": "bool", + "required": true } ] }, @@ -2151,7 +2923,8 @@ { "id": 1, "name": "bucketInfo", - "type": "BucketInfo" + "type": "BucketInfo", + "required": true } ] }, @@ -2164,12 +2937,14 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2179,7 +2954,8 @@ { "id": 2, "name": "bucketInfo", - "type": "BucketInfo" + "type": "BucketInfo", + "optional": true } ] }, @@ -2189,17 +2965,27 @@ { "id": 1, "name": "bucketArgs", - "type": "BucketArgs" + "type": "BucketArgs", + "optional": true }, { "id": 2, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, { - "name": "SetBucketPropertyResponse" + "name": "SetBucketPropertyResponse", + "fields": [ + { + "id": 1, + "name": "response", + "type": "bool", + "optional": true + } + ] }, { "name": "DeleteBucketRequest", @@ -2207,12 +2993,14 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2225,22 +3013,26 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "startKey", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "prefix", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "count", - "type": "int32" + "type": "int32", + "optional": true } ] }, @@ -2261,32 +3053,50 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "keyName", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "dataSize", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 5, "name": "type", - "type": "hadoop.hdds.ReplicationType" + "type": "hadoop.hdds.ReplicationType", + "optional": true, + "options": [ + { + "name": "default", + "value": "NONE" + } + ] }, { "id": 6, "name": "factor", - "type": "hadoop.hdds.ReplicationFactor" + "type": "hadoop.hdds.ReplicationFactor", + "optional": true, + "options": [ + { + "name": "default", + "value": "ZERO" + } + ] }, { "id": 7, @@ -2297,17 +3107,20 @@ { "id": 8, "name": "isMultipartKey", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 9, "name": "multipartUploadID", - "type": "string" + "type": "string", + "optional": true }, { "id": 10, "name": "multipartNumber", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 11, @@ -2324,32 +3137,50 @@ { "id": 13, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 14, "name": "sortDatanodes", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 15, "name": "fileEncryptionInfo", - "type": "FileEncryptionInfoProto" + "type": "FileEncryptionInfoProto", + "optional": true }, { "id": 16, "name": "latestVersionLocation", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 17, "name": "recursive", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 18, "name": "headOp", - "type": "bool" + "type": "bool", + "optional": true + }, + { + "id": 19, + "name": "ecReplicationConfig", + "type": "hadoop.hdds.ECReplicationConfig", + "optional": true + }, + { + "id": 20, + "name": "forceUpdateContainerCacheFromSCM", + "type": "bool", + "optional": true } ] }, @@ -2359,37 +3190,44 @@ { "id": 1, "name": "blockID", - "type": "hadoop.hdds.BlockID" + "type": "hadoop.hdds.BlockID", + "required": true }, { "id": 3, "name": "offset", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 4, "name": "length", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 5, "name": "createVersion", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 6, "name": "token", - "type": "hadoop.common.TokenProto" + "type": "hadoop.common.TokenProto", + "optional": true }, { "id": 7, "name": "pipeline", - "type": "hadoop.hdds.Pipeline" + "type": "hadoop.hdds.Pipeline", + "optional": true }, { "id": 9, "name": "partNumber", "type": "int32", + "optional": true, "options": [ { "name": "default", @@ -2405,7 +3243,8 @@ { "id": 1, "name": "version", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 2, @@ -2416,12 +3255,14 @@ { "id": 3, "name": "fileEncryptionInfo", - "type": "FileEncryptionInfoProto" + "type": "FileEncryptionInfoProto", + "optional": true }, { "id": 4, "name": "isMultipartKey", "type": "bool", + "optional": true, "options": [ { "name": "default", @@ -2431,38 +3272,125 @@ } ] }, + { + "name": "CompositeCrcFileChecksumProto", + "fields": [ + { + "id": 1, + "name": "checksumType", + "type": "ChecksumTypeProto", + "required": true + }, + { + "id": 2, + "name": "bytesPerCrc", + "type": "uint32", + "required": true + }, + { + "id": 3, + "name": "crc", + "type": "uint32", + "required": true + } + ] + }, + { + "name": "MD5MD5Crc32FileChecksumProto", + "fields": [ + { + "id": 1, + "name": "checksumType", + "type": "ChecksumTypeProto", + "required": true + }, + { + "id": 2, + "name": "bytesPerCRC", + "type": "uint32", + "required": true + }, + { + "id": 3, + "name": "crcPerBlock", + "type": "uint64", + "required": true + }, + { + "id": 4, + "name": "md5", + "type": "bytes", + "required": true + } + ] + }, + { + "name": "FileChecksumProto", + "fields": [ + { + "id": 1, + "name": "checksumType", + "type": "FileChecksumTypeProto", + "required": true, + "options": [ + { + "name": "default", + "value": "COMPOSITE_CRC" + } + ] + }, + { + "id": 2, + "name": "compositeCrc", + "type": "CompositeCrcFileChecksumProto", + "optional": true + }, + { + "id": 3, + "name": "md5Crc", + "type": "MD5MD5Crc32FileChecksumProto", + "optional": true + } + ] + }, { "name": "KeyInfo", "fields": [ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "keyName", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "dataSize", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 5, "name": "type", - "type": "hadoop.hdds.ReplicationType" + "type": "hadoop.hdds.ReplicationType", + "required": true }, { "id": 6, "name": "factor", - "type": "hadoop.hdds.ReplicationFactor" + "type": "hadoop.hdds.ReplicationFactor", + "optional": true }, { "id": 7, @@ -2473,17 +3401,20 @@ { "id": 8, "name": "creationTime", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 9, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 10, "name": "latestVersion", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 11, @@ -2494,7 +3425,8 @@ { "id": 12, "name": "fileEncryptionInfo", - "type": "FileEncryptionInfoProto" + "type": "FileEncryptionInfoProto", + "optional": true }, { "id": 13, @@ -2505,17 +3437,32 @@ { "id": 14, "name": "objectID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 15, "name": "updateID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 16, "name": "parentID", - "type": "uint64" + "type": "uint64", + "optional": true + }, + { + "id": 17, + "name": "ecReplicationConfig", + "type": "hadoop.hdds.ECReplicationConfig", + "optional": true + }, + { + "id": 18, + "name": "fileChecksum", + "type": "FileChecksumProto", + "optional": true } ] }, @@ -2525,17 +3472,20 @@ { "id": 1, "name": "name", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "creationTime", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 4, @@ -2552,17 +3502,20 @@ { "id": 6, "name": "objectID", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 7, "name": "updateID", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 8, "name": "parentID", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -2583,17 +3536,20 @@ { "id": 2, "name": "keyInfo", - "type": "KeyInfo" + "type": "KeyInfo", + "optional": true }, { "id": 3, "name": "blockSize", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 4, "name": "isDirectory", - "type": "bool" + "type": "bool", + "optional": true } ] }, @@ -2603,7 +3559,8 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true } ] }, @@ -2613,7 +3570,8 @@ { "id": 1, "name": "status", - "type": "OzoneFileStatusProto" + "type": "OzoneFileStatusProto", + "required": true } ] }, @@ -2623,7 +3581,8 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true } ] }, @@ -2636,22 +3595,26 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true }, { "id": 2, "name": "isRecursive", - "type": "bool" + "type": "bool", + "required": true }, { "id": 3, "name": "isOverwrite", - "type": "bool" + "type": "bool", + "required": true }, { "id": 4, "name": "clientID", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2661,17 +3624,20 @@ { "id": 1, "name": "keyInfo", - "type": "KeyInfo" + "type": "KeyInfo", + "optional": true }, { "id": 2, "name": "ID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 3, "name": "openVersion", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2681,7 +3647,8 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true } ] }, @@ -2691,7 +3658,8 @@ { "id": 1, "name": "keyInfo", - "type": "KeyInfo" + "type": "KeyInfo", + "optional": true } ] }, @@ -2701,22 +3669,32 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true }, { "id": 2, "name": "recursive", - "type": "bool" + "type": "bool", + "required": true }, { "id": 3, "name": "startKey", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "numEntries", - "type": "uint64" + "type": "uint64", + "required": true + }, + { + "id": 5, + "name": "allowPartialPrefix", + "type": "bool", + "optional": true } ] }, @@ -2737,12 +3715,14 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true }, { "id": 2, "name": "clientID", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2752,17 +3732,20 @@ { "id": 2, "name": "keyInfo", - "type": "KeyInfo" + "type": "KeyInfo", + "optional": true }, { "id": 3, "name": "ID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 4, "name": "openVersion", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2772,7 +3755,8 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true } ] }, @@ -2782,17 +3766,60 @@ { "id": 2, "name": "keyInfo", - "type": "KeyInfo" + "type": "KeyInfo", + "optional": true }, { "id": 3, "name": "ID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 4, "name": "openVersion", - "type": "uint64" + "type": "uint64", + "optional": true + } + ] + }, + { + "name": "GetKeyInfoRequest", + "fields": [ + { + "id": 1, + "name": "keyArgs", + "type": "KeyArgs", + "required": true + }, + { + "id": 2, + "name": "assumeS3Context", + "type": "bool", + "optional": true + } + ] + }, + { + "name": "GetKeyInfoResponse", + "fields": [ + { + "id": 1, + "name": "keyInfo", + "type": "KeyInfo", + "optional": true + }, + { + "id": 2, + "name": "volumeInfo", + "type": "VolumeInfo", + "optional": true + }, + { + "id": 3, + "name": "UserPrincipal", + "type": "string", + "optional": true } ] }, @@ -2802,7 +3829,8 @@ { "id": 1, "name": "renameKeysArgs", - "type": "RenameKeysArgs" + "type": "RenameKeysArgs", + "required": true } ] }, @@ -2812,12 +3840,14 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, @@ -2833,12 +3863,14 @@ { "id": 1, "name": "fromKeyName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "toKeyName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2854,7 +3886,8 @@ { "id": 2, "name": "status", - "type": "bool" + "type": "bool", + "optional": true } ] }, @@ -2864,12 +3897,14 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true }, { "id": 2, "name": "toKeyName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -2882,7 +3917,8 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true } ] }, @@ -2892,7 +3928,8 @@ { "id": 1, "name": "deleteKeys", - "type": "DeleteKeyArgs" + "type": "DeleteKeyArgs", + "optional": true } ] }, @@ -2902,12 +3939,14 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, @@ -2923,12 +3962,14 @@ { "id": 1, "name": "unDeletedKeys", - "type": "DeleteKeyArgs" + "type": "DeleteKeyArgs", + "optional": true }, { "id": 2, "name": "status", - "type": "bool" + "type": "bool", + "optional": true } ] }, @@ -2938,17 +3979,20 @@ { "id": 2, "name": "keyInfo", - "type": "KeyInfo" + "type": "KeyInfo", + "optional": true }, { "id": 3, "name": "ID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 4, "name": "openVersion", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -2958,12 +4002,14 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, @@ -3013,6 +4059,55 @@ { "name": "PurgePathsResponse" }, + { + "name": "PurgeDirectoriesRequest", + "fields": [ + { + "id": 1, + "name": "deletedPath", + "type": "PurgePathRequest", + "is_repeated": true + } + ] + }, + { + "name": "PurgeDirectoriesResponse" + }, + { + "name": "PurgePathRequest", + "fields": [ + { + "id": 1, + "name": "volumeId", + "type": "uint64", + "required": true + }, + { + "id": 2, + "name": "bucketId", + "type": "uint64", + "required": true + }, + { + "id": 3, + "name": "deletedDir", + "type": "string", + "optional": true + }, + { + "id": 4, + "name": "deletedSubFiles", + "type": "KeyInfo", + "is_repeated": true + }, + { + "id": 5, + "name": "markDeletedSubDirs", + "type": "KeyInfo", + "is_repeated": true + } + ] + }, { "name": "DeleteOpenKeysRequest", "fields": [ @@ -3021,6 +4116,12 @@ "name": "openKeysPerBucket", "type": "OpenKeyBucket", "is_repeated": true + }, + { + "id": 2, + "name": "bucketLayout", + "type": "BucketLayoutProto", + "optional": true } ] }, @@ -3030,12 +4131,14 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, @@ -3051,12 +4154,20 @@ { "id": 1, "name": "name", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "clientID", - "type": "uint64" + "type": "uint64", + "optional": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] } ] }, @@ -3066,77 +4177,92 @@ { "id": 1, "name": "type", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "version", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 3, "name": "owner", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "renewer", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "realUser", - "type": "string" + "type": "string", + "optional": true }, { "id": 6, "name": "issueDate", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 7, "name": "maxDate", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 8, "name": "sequenceNumber", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 9, "name": "masterKeyId", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 10, "name": "expiryDate", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 11, "name": "omCertSerialId", - "type": "string" + "type": "string", + "optional": true }, { "id": 12, "name": "accessKeyId", - "type": "string" + "type": "string", + "optional": true }, { "id": 13, "name": "signature", - "type": "string" + "type": "string", + "optional": true }, { "id": 14, "name": "strToSign", - "type": "string" + "type": "string", + "optional": true }, { "id": 15, "name": "omServiceId", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -3146,22 +4272,26 @@ { "id": 1, "name": "keyId", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 2, "name": "expiryDate", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "privateKeyBytes", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 4, "name": "publicKeyBytes", - "type": "bytes" + "type": "bytes", + "required": true } ] }, @@ -3171,27 +4301,32 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "startKey", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "prefix", - "type": "string" + "type": "string", + "optional": true }, { "id": 5, "name": "count", - "type": "int32" + "type": "int32", + "optional": true } ] }, @@ -3212,12 +4347,14 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true }, { "id": 2, "name": "clientID", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -3230,22 +4367,26 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true }, { "id": 2, "name": "clientID", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "excludeList", - "type": "hadoop.hdds.ExcludeListProto" + "type": "hadoop.hdds.ExcludeListProto", + "optional": true }, { "id": 4, "name": "keyLocation", - "type": "KeyLocation" + "type": "KeyLocation", + "optional": true } ] }, @@ -3255,7 +4396,8 @@ { "id": 2, "name": "keyLocation", - "type": "KeyLocation" + "type": "KeyLocation", + "optional": true } ] }, @@ -3268,7 +4410,14 @@ { "id": 1, "name": "sequenceNumber", - "type": "uint64" + "type": "uint64", + "required": true + }, + { + "id": 2, + "name": "limitCount", + "type": "uint64", + "optional": true } ] }, @@ -3284,7 +4433,8 @@ { "id": 3, "name": "caCertificate", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, @@ -3300,13 +4450,42 @@ { "id": 1, "name": "sequenceNumber", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 2, "name": "data", "type": "bytes", "is_repeated": true + }, + { + "id": 3, + "name": "latestSequenceNumber", + "type": "uint64", + "optional": true + } + ] + }, + { + "name": "RangerBGSyncRequest", + "fields": [ + { + "id": 1, + "name": "noWait", + "type": "bool", + "optional": true + } + ] + }, + { + "name": "RangerBGSyncResponse", + "fields": [ + { + "id": 1, + "name": "runSuccess", + "type": "bool", + "optional": true } ] }, @@ -3316,7 +4495,8 @@ { "id": 1, "name": "upgradeClientId", - "type": "string" + "type": "string", + "required": true } ] }, @@ -3326,7 +4506,8 @@ { "id": 1, "name": "status", - "type": "hadoop.hdds.UpgradeFinalizationStatus" + "type": "hadoop.hdds.UpgradeFinalizationStatus", + "required": true } ] }, @@ -3336,17 +4517,20 @@ { "id": 1, "name": "upgradeClientId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "takeover", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 3, "name": "readonly", - "type": "bool" + "type": "bool", + "optional": true } ] }, @@ -3356,7 +4540,8 @@ { "id": 1, "name": "status", - "type": "hadoop.hdds.UpgradeFinalizationStatus" + "type": "hadoop.hdds.UpgradeFinalizationStatus", + "required": true } ] }, @@ -3366,7 +4551,8 @@ { "id": 1, "name": "args", - "type": "PrepareRequestArgs" + "type": "PrepareRequestArgs", + "required": true } ] }, @@ -3377,6 +4563,7 @@ "id": 1, "name": "txnApplyWaitTimeoutSeconds", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -3388,6 +4575,7 @@ "id": 2, "name": "txnApplyCheckIntervalSeconds", "type": "uint64", + "optional": true, "options": [ { "name": "default", @@ -3403,7 +4591,8 @@ { "id": 1, "name": "txnID", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -3413,7 +4602,8 @@ { "id": 1, "name": "txnID", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -3423,12 +4613,14 @@ { "id": 1, "name": "status", - "type": "PrepareStatus" + "type": "PrepareStatus", + "required": true }, { "id": 2, "name": "currentTxnIndex", - "type": "uint64" + "type": "uint64", + "optional": true } ] }, @@ -3444,12 +4636,14 @@ { "id": 1, "name": "type", - "type": "Type" + "type": "Type", + "required": true }, { "id": 2, "name": "value", - "type": "uint32" + "type": "uint32", + "required": true } ] }, @@ -3459,12 +4653,14 @@ { "id": 1, "name": "nodeId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "serverRole", - "type": "string" + "type": "string", + "required": true } ] }, @@ -3474,12 +4670,14 @@ { "id": 1, "name": "nodeType", - "type": "hadoop.hdds.NodeType" + "type": "hadoop.hdds.NodeType", + "required": true }, { "id": 2, "name": "hostname", - "type": "string" + "type": "string", + "required": true }, { "id": 3, @@ -3490,7 +4688,20 @@ { "id": 4, "name": "omRole", - "type": "OMRoleInfo" + "type": "OMRoleInfo", + "optional": true + }, + { + "id": 5, + "name": "OMVersion", + "type": "int32", + "optional": true, + "options": [ + { + "name": "default", + "value": "0" + } + ] } ] }, @@ -3500,7 +4711,8 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true } ] }, @@ -3510,22 +4722,26 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "keyName", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "multipartUploadID", - "type": "string" + "type": "string", + "required": true } ] }, @@ -3535,22 +4751,26 @@ { "id": 1, "name": "uploadID", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "creationTime", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 3, "name": "type", - "type": "hadoop.hdds.ReplicationType" + "type": "hadoop.hdds.ReplicationType", + "required": true }, { "id": 4, "name": "factor", - "type": "hadoop.hdds.ReplicationFactor" + "type": "hadoop.hdds.ReplicationFactor", + "optional": true }, { "id": 5, @@ -3561,17 +4781,26 @@ { "id": 6, "name": "objectID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 7, "name": "updateID", - "type": "uint64" + "type": "uint64", + "optional": true }, { "id": 8, "name": "parentID", - "type": "uint64" + "type": "uint64", + "optional": true + }, + { + "id": 9, + "name": "ecReplicationConfig", + "type": "hadoop.hdds.ECReplicationConfig", + "optional": true } ] }, @@ -3581,17 +4810,20 @@ { "id": 1, "name": "partName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "partNumber", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 3, "name": "partKeyInfo", - "type": "KeyInfo" + "type": "KeyInfo", + "required": true } ] }, @@ -3601,12 +4833,14 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true }, { "id": 2, "name": "clientID", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -3616,7 +4850,8 @@ { "id": 1, "name": "partName", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -3626,7 +4861,8 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true }, { "id": 2, @@ -3642,22 +4878,26 @@ { "id": 1, "name": "volume", - "type": "string" + "type": "string", + "optional": true }, { "id": 2, "name": "bucket", - "type": "string" + "type": "string", + "optional": true }, { "id": 3, "name": "key", - "type": "string" + "type": "string", + "optional": true }, { "id": 4, "name": "hash", - "type": "string" + "type": "string", + "optional": true } ] }, @@ -3667,12 +4907,14 @@ { "id": 1, "name": "partNumber", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 2, "name": "partName", - "type": "string" + "type": "string", + "required": true } ] }, @@ -3682,7 +4924,8 @@ { "id": 1, "name": "keyArgs", - "type": "KeyArgs" + "type": "KeyArgs", + "required": true } ] }, @@ -3695,32 +4938,38 @@ { "id": 1, "name": "volume", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucket", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "key", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "uploadID", - "type": "string" + "type": "string", + "required": true }, { "id": 5, "name": "partNumbermarker", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 6, "name": "maxParts", - "type": "uint32" + "type": "uint32", + "optional": true } ] }, @@ -3730,28 +4979,38 @@ { "id": 2, "name": "type", - "type": "hadoop.hdds.ReplicationType" + "type": "hadoop.hdds.ReplicationType", + "optional": true }, { "id": 3, "name": "factor", - "type": "hadoop.hdds.ReplicationFactor" + "type": "hadoop.hdds.ReplicationFactor", + "optional": true }, { "id": 4, "name": "nextPartNumberMarker", - "type": "uint32" + "type": "uint32", + "optional": true }, { "id": 5, "name": "isTruncated", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 6, "name": "partsList", "type": "PartInfo", "is_repeated": true + }, + { + "id": 7, + "name": "ecReplicationConfig", + "type": "hadoop.hdds.ECReplicationConfig", + "optional": true } ] }, @@ -3761,17 +5020,20 @@ { "id": 1, "name": "volume", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucket", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "prefix", - "type": "string" + "type": "string", + "required": true } ] }, @@ -3781,7 +5043,8 @@ { "id": 1, "name": "isTruncated", - "type": "bool" + "type": "bool", + "optional": true }, { "id": 2, @@ -3797,37 +5060,50 @@ { "id": 1, "name": "volumeName", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "bucketName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "keyName", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "uploadId", - "type": "string" + "type": "string", + "required": true }, { "id": 5, "name": "creationTime", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 6, "name": "type", - "type": "hadoop.hdds.ReplicationType" + "type": "hadoop.hdds.ReplicationType", + "required": true }, { "id": 7, "name": "factor", - "type": "hadoop.hdds.ReplicationFactor" + "type": "hadoop.hdds.ReplicationFactor", + "optional": true + }, + { + "id": 8, + "name": "ecReplicationConfig", + "type": "hadoop.hdds.ECReplicationConfig", + "optional": true } ] }, @@ -3837,22 +5113,54 @@ { "id": 1, "name": "partNumber", - "type": "uint32" + "type": "uint32", + "required": true }, { "id": 2, "name": "partName", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "modificationTime", - "type": "uint64" + "type": "uint64", + "required": true }, { "id": 4, "name": "size", - "type": "uint64" + "type": "uint64", + "required": true + } + ] + }, + { + "name": "EchoRPCRequest", + "fields": [ + { + "id": 1, + "name": "payloadReq", + "type": "bytes", + "optional": true + }, + { + "id": 2, + "name": "payloadSizeResp", + "type": "int32", + "optional": true + } + ] + }, + { + "name": "EchoRPCResponse", + "fields": [ + { + "id": 1, + "name": "payload", + "type": "bytes", + "optional": true } ] }, @@ -3862,7 +5170,8 @@ { "id": 2, "name": "response", - "type": "hadoop.common.GetDelegationTokenResponseProto" + "type": "hadoop.common.GetDelegationTokenResponseProto", + "optional": true } ] }, @@ -3872,7 +5181,8 @@ { "id": 2, "name": "response", - "type": "hadoop.common.RenewDelegationTokenResponseProto" + "type": "hadoop.common.RenewDelegationTokenResponseProto", + "optional": true } ] }, @@ -3882,7 +5192,8 @@ { "id": 2, "name": "response", - "type": "hadoop.common.CancelDelegationTokenResponseProto" + "type": "hadoop.common.CancelDelegationTokenResponseProto", + "optional": true } ] }, @@ -3892,12 +5203,14 @@ { "id": 1, "name": "kerberosID", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "awsSecret", - "type": "string" + "type": "string", + "required": true } ] }, @@ -3907,7 +5220,14 @@ { "id": 1, "name": "kerberosID", - "type": "string" + "type": "string", + "required": true + }, + { + "id": 2, + "name": "createIfNotExist", + "type": "bool", + "optional": true } ] }, @@ -3917,7 +5237,210 @@ { "id": 2, "name": "s3Secret", - "type": "S3Secret" + "type": "S3Secret", + "required": true + } + ] + }, + { + "name": "SetS3SecretRequest", + "fields": [ + { + "id": 1, + "name": "accessId", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "secretKey", + "type": "string", + "optional": true + } + ] + }, + { + "name": "SetS3SecretResponse", + "fields": [ + { + "id": 1, + "name": "accessId", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "secretKey", + "type": "string", + "optional": true + } + ] + }, + { + "name": "TenantState", + "fields": [ + { + "id": 1, + "name": "tenantId", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "bucketNamespaceName", + "type": "string", + "optional": true + }, + { + "id": 3, + "name": "userRoleName", + "type": "string", + "optional": true + }, + { + "id": 4, + "name": "adminRoleName", + "type": "string", + "optional": true + }, + { + "id": 5, + "name": "bucketNamespacePolicyName", + "type": "string", + "optional": true + }, + { + "id": 6, + "name": "bucketPolicyName", + "type": "string", + "optional": true + } + ] + }, + { + "name": "TenantUserPrincipalInfo", + "fields": [ + { + "id": 1, + "name": "accessIds", + "type": "string", + "is_repeated": true + } + ] + }, + { + "name": "UserAccessIdInfo", + "fields": [ + { + "id": 1, + "name": "userPrincipal", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "accessId", + "type": "string", + "optional": true + } + ] + }, + { + "name": "ExtendedUserAccessIdInfo", + "fields": [ + { + "id": 1, + "name": "userPrincipal", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "accessId", + "type": "string", + "optional": true + }, + { + "id": 3, + "name": "tenantId", + "type": "string", + "optional": true + }, + { + "id": 4, + "name": "isAdmin", + "type": "bool", + "optional": true + }, + { + "id": 5, + "name": "isDelegatedAdmin", + "type": "bool", + "optional": true + } + ] + }, + { + "name": "ListTenantRequest" + }, + { + "name": "ListTenantResponse", + "fields": [ + { + "id": 1, + "name": "tenantState", + "type": "TenantState", + "is_repeated": true + } + ] + }, + { + "name": "TenantListUserRequest", + "fields": [ + { + "id": 1, + "name": "tenantId", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "prefix", + "type": "string", + "optional": true + } + ] + }, + { + "name": "TenantListUserResponse", + "fields": [ + { + "id": 1, + "name": "userAccessIdInfo", + "type": "UserAccessIdInfo", + "is_repeated": true + } + ] + }, + { + "name": "TenantGetUserInfoRequest", + "fields": [ + { + "id": 1, + "name": "userPrincipal", + "type": "string", + "optional": true + } + ] + }, + { + "name": "TenantGetUserInfoResponse", + "fields": [ + { + "id": 1, + "name": "accessIdInfo", + "type": "ExtendedUserAccessIdInfo", + "is_repeated": true } ] }, @@ -3927,7 +5450,8 @@ { "id": 1, "name": "version", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -3937,7 +5461,202 @@ { "id": 1, "name": "kerberosID", - "type": "string" + "type": "string", + "required": true + } + ] + }, + { + "name": "CreateTenantRequest", + "fields": [ + { + "id": 1, + "name": "tenantId", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "volumeName", + "type": "string", + "optional": true + }, + { + "id": 3, + "name": "userRoleName", + "type": "string", + "optional": true + }, + { + "id": 4, + "name": "adminRoleName", + "type": "string", + "optional": true + } + ] + }, + { + "name": "SetRangerServiceVersionRequest", + "fields": [ + { + "id": 1, + "name": "rangerServiceVersion", + "type": "uint64", + "required": true + } + ] + }, + { + "name": "DeleteTenantRequest", + "fields": [ + { + "id": 1, + "name": "tenantId", + "type": "string", + "optional": true + } + ] + }, + { + "name": "TenantAssignUserAccessIdRequest", + "fields": [ + { + "id": 1, + "name": "userPrincipal", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "tenantId", + "type": "string", + "optional": true + }, + { + "id": 3, + "name": "accessId", + "type": "string", + "optional": true + } + ] + }, + { + "name": "TenantRevokeUserAccessIdRequest", + "fields": [ + { + "id": 1, + "name": "accessId", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "tenantId", + "type": "string", + "optional": true + } + ] + }, + { + "name": "TenantAssignAdminRequest", + "fields": [ + { + "id": 1, + "name": "accessId", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "tenantId", + "type": "string", + "optional": true + }, + { + "id": 3, + "name": "delegated", + "type": "bool", + "optional": true + } + ] + }, + { + "name": "TenantRevokeAdminRequest", + "fields": [ + { + "id": 1, + "name": "accessId", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "tenantId", + "type": "string", + "optional": true + } + ] + }, + { + "name": "GetS3VolumeContextRequest" + }, + { + "name": "CreateTenantResponse" + }, + { + "name": "SetRangerServiceVersionResponse" + }, + { + "name": "DeleteTenantResponse", + "fields": [ + { + "id": 1, + "name": "volumeName", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "volRefCount", + "type": "int64", + "optional": true + } + ] + }, + { + "name": "TenantAssignUserAccessIdResponse", + "fields": [ + { + "id": 1, + "name": "s3Secret", + "type": "S3Secret", + "optional": true + } + ] + }, + { + "name": "TenantRevokeUserAccessIdResponse" + }, + { + "name": "TenantAssignAdminResponse" + }, + { + "name": "TenantRevokeAdminResponse" + }, + { + "name": "GetS3VolumeContextResponse", + "fields": [ + { + "id": 1, + "name": "volumeInfo", + "type": "VolumeInfo", + "optional": true + }, + { + "id": 2, + "name": "userPrincipal", + "type": "string", + "optional": true } ] }, @@ -3947,12 +5666,37 @@ { "id": 1, "name": "kerberosID", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "awsSecret", - "type": "string" + "type": "string", + "required": true + } + ] + }, + { + "name": "S3Authentication", + "fields": [ + { + "id": 1, + "name": "stringToSign", + "type": "string", + "optional": true + }, + { + "id": 2, + "name": "signature", + "type": "string", + "optional": true + }, + { + "id": 3, + "name": "accessId", + "type": "string", + "optional": true } ] } @@ -4037,17 +5781,20 @@ { "id": 1, "name": "nodeId", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "hostAddress", - "type": "string" + "type": "string", + "required": true }, { "id": 3, "name": "ratisPort", - "type": "uint32" + "type": "uint32", + "required": true } ] }, @@ -4057,17 +5804,20 @@ { "id": 1, "name": "success", - "type": "bool" + "type": "bool", + "required": true }, { "id": 2, "name": "errorCode", - "type": "ErrorCode" + "type": "ErrorCode", + "optional": true }, { "id": 3, "name": "errorMsg", - "type": "string" + "type": "string", + "optional": true } ] } @@ -4117,22 +5867,26 @@ { "id": 1, "name": "identifier", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 2, "name": "password", - "type": "bytes" + "type": "bytes", + "required": true }, { "id": 3, "name": "kind", - "type": "string" + "type": "string", + "required": true }, { "id": 4, "name": "service", - "type": "string" + "type": "string", + "required": true } ] }, @@ -4142,17 +5896,20 @@ { "id": 1, "name": "alias", - "type": "string" + "type": "string", + "required": true }, { "id": 2, "name": "token", - "type": "hadoop.common.TokenProto" + "type": "hadoop.common.TokenProto", + "optional": true }, { "id": 3, "name": "secret", - "type": "bytes" + "type": "bytes", + "optional": true } ] }, @@ -4179,7 +5936,8 @@ { "id": 1, "name": "renewer", - "type": "string" + "type": "string", + "required": true } ] }, @@ -4189,7 +5947,8 @@ { "id": 1, "name": "token", - "type": "hadoop.common.TokenProto" + "type": "hadoop.common.TokenProto", + "optional": true } ] }, @@ -4199,7 +5958,8 @@ { "id": 1, "name": "token", - "type": "hadoop.common.TokenProto" + "type": "hadoop.common.TokenProto", + "required": true } ] }, @@ -4209,7 +5969,8 @@ { "id": 1, "name": "newExpiryTime", - "type": "uint64" + "type": "uint64", + "required": true } ] }, @@ -4219,7 +5980,8 @@ { "id": 1, "name": "token", - "type": "hadoop.common.TokenProto" + "type": "hadoop.common.TokenProto", + "required": true } ] }, diff --git a/hadoop-ozone/interface-storage/pom.xml b/hadoop-ozone/interface-storage/pom.xml index 365eaedd826b..72d8933dbe9a 100644 --- a/hadoop-ozone/interface-storage/pom.xml +++ b/hadoop-ozone/interface-storage/pom.xml @@ -20,10 +20,10 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-interface-storage - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Storage Interface Apache Ozone Storage Interface jar diff --git a/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java b/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java index 469170475db3..53080aa4a92f 100644 --- a/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java +++ b/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java @@ -448,6 +448,16 @@ Set getMultipartUploadKeys(String volumeName, String getOzonePathKey(long volumeId, long bucketId, long parentObjectId, String pathComponentName); + /** + * Given ozone path key, component id, return the corresponding + * DB path key for delete table. + * + * @param objectId - object Id + * @param pathKey - path key of component + * @return DB Delete directory key as String. + */ + String getOzoneDeletePathKey(long objectId, String pathKey); + /** * Returns DB key name of an open file in OM metadata store. Should be * #open# prefix followed by actual leaf node name. diff --git a/hadoop-ozone/ozone-manager/pom.xml b/hadoop-ozone/ozone-manager/pom.xml index 6d72aa6a7eeb..987fb0b487cd 100644 --- a/hadoop-ozone/ozone-manager/pom.xml +++ b/hadoop-ozone/ozone-manager/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-manager - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Manager Server Apache Ozone Manager Server jar @@ -146,7 +146,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.reflections reflections - 0.9.11 diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index 2aec21c6ce27..f500c9edd92e 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -1228,6 +1228,8 @@ private OzoneFileStatus getOzoneFileStatus(OmKeyArgs args, final String keyName = args.getKeyName(); OmKeyInfo fileKeyInfo = null; + OmKeyInfo dirKeyInfo = null; + OmKeyInfo fakeDirKeyInfo = null; metadataManager.getLock().acquireReadLock(BUCKET_LOCK, volumeName, bucketName); try { @@ -1240,28 +1242,27 @@ private OzoneFileStatus getOzoneFileStatus(OmKeyArgs args, // Check if the key is a file. String fileKeyBytes = metadataManager.getOzoneKey( volumeName, bucketName, keyName); - fileKeyInfo = metadataManager - .getKeyTable(getBucketLayout(metadataManager, volumeName, bucketName)) - .get(fileKeyBytes); + BucketLayout layout = + getBucketLayout(metadataManager, volumeName, bucketName); + fileKeyInfo = metadataManager.getKeyTable(layout).get(fileKeyBytes); + String dirKey = OzoneFSUtils.addTrailingSlashIfNeeded(keyName); // Check if the key is a directory. if (fileKeyInfo == null) { - String dirKey = OzoneFSUtils.addTrailingSlashIfNeeded(keyName); String dirKeyBytes = metadataManager.getOzoneKey( volumeName, bucketName, dirKey); - OmKeyInfo dirKeyInfo = metadataManager.getKeyTable( - getBucketLayout(metadataManager, volumeName, bucketName)) - .get(dirKeyBytes); - if (dirKeyInfo != null) { - return new OzoneFileStatus(dirKeyInfo, scmBlockSize, true); + dirKeyInfo = metadataManager.getKeyTable(layout).get(dirKeyBytes); + if (dirKeyInfo == null) { + fakeDirKeyInfo = + createFakeDirIfShould(volumeName, bucketName, keyName, layout); } } } finally { metadataManager.getLock().releaseReadLock(BUCKET_LOCK, volumeName, bucketName); - - // if the key is a file then do refresh pipeline info in OM by asking SCM if (fileKeyInfo != null) { + // if the key is a file + // then do refresh pipeline info in OM by asking SCM if (args.getLatestVersionLocation()) { slimLocationVersion(fileKeyInfo); } @@ -1276,10 +1277,21 @@ private OzoneFileStatus getOzoneFileStatus(OmKeyArgs args, sortDatanodes(clientAddress, fileKeyInfo); } } - return new OzoneFileStatus(fileKeyInfo, scmBlockSize, false); } } + if (fileKeyInfo != null) { + return new OzoneFileStatus(fileKeyInfo, scmBlockSize, false); + } + + if (dirKeyInfo != null) { + return new OzoneFileStatus(dirKeyInfo, scmBlockSize, true); + } + + if (fakeDirKeyInfo != null) { + return new OzoneFileStatus(fakeDirKeyInfo, scmBlockSize, true); + } + // Key is not found, throws exception if (LOG.isDebugEnabled()) { LOG.debug("Unable to get file status for the key: volume: {}, bucket:" + @@ -1291,6 +1303,41 @@ private OzoneFileStatus getOzoneFileStatus(OmKeyArgs args, FILE_NOT_FOUND); } + /** + * Create a fake directory if the key is a path prefix, + * otherwise returns null. + * Some keys may contain '/' Ozone will treat '/' as directory separator + * such as : key name is 'a/b/c', 'a' and 'b' may not really exist, + * but Ozone treats 'a' and 'b' as a directory. + * we need create a fake directory 'a' or 'a/b' + * + * @return OmKeyInfo if the key is a path prefix, otherwise returns null. + */ + private OmKeyInfo createFakeDirIfShould(String volume, String bucket, + String keyName, BucketLayout layout) throws IOException { + OmKeyInfo fakeDirKeyInfo = null; + String dirKey = OzoneFSUtils.addTrailingSlashIfNeeded(keyName); + String fileKeyBytes = metadataManager.getOzoneKey(volume, bucket, keyName); + try (TableIterator> + keyTblItr = metadataManager.getKeyTable(layout).iterator()) { + Table.KeyValue keyValue = + keyTblItr + .seek(OzoneFSUtils.addTrailingSlashIfNeeded(fileKeyBytes)); + + if (keyValue != null) { + Path fullPath = Paths.get(keyValue.getValue().getKeyName()); + Path subPath = Paths.get(dirKey); + OmKeyInfo omKeyInfo = keyValue.getValue(); + if (fullPath.startsWith(subPath)) { + // create fake directory + fakeDirKeyInfo = createDirectoryKey(omKeyInfo, dirKey); + } + } + } + + return fakeDirKeyInfo; + } + private OzoneFileStatus getOzoneFileStatusFSO(OmKeyArgs args, String clientAddress, boolean skipFileNotFoundError) throws IOException { @@ -1368,6 +1415,7 @@ private OmKeyInfo createDirectoryKey(OmKeyInfo keyInfo, String keyName) .setVolumeName(keyInfo.getVolumeName()) .setBucketName(keyInfo.getBucketName()) .setKeyName(dir) + .setFileName(OzoneFSUtils.getFileName(keyName)) .setOmKeyLocationInfos(Collections.singletonList( new OmKeyLocationInfoGroup(0, new ArrayList<>()))) .setCreationTime(Time.now()) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMStorage.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMStorage.java index 0cde1047d5b1..aee1e0035010 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMStorage.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMStorage.java @@ -32,18 +32,50 @@ import static org.apache.hadoop.ozone.om.OmUpgradeConfig.ConfigStrings.OZONE_OM_INIT_DEFAULT_LAYOUT_VERSION; /** - * OMStorage is responsible for management of the StorageDirectories used by - * the Ozone Manager. + * Ozone Manager VERSION file representation. + * On top of what is defined in the base Storage class, this class adds + * functionality to hold Ozone Manager related data in its VERSION file. + * The additional values stored: + * - Ozone Manager ID - a UUID that identifies this Ozone Manager. + * The value can not be changed once initialized, and + * it is initialized automatically in this class. + * The value itself is not used anymore, it is part of the + * {@link org.apache.hadoop.hdds.protocol.proto.HddsProtos + * .OzoneManagerDetailsProto} hence not removed yet. + * - Ozone Manager Node Id - the node id defined for this Ozone manager in the + * configuration. The value can not be changed after + * it was set. + * - Ozone Manager Certificate Serial Id - the serial id of the Ozone Manager's + * SSL certificate if one exists. */ public class OMStorage extends Storage { - public static final String STORAGE_DIR = "om"; - public static final String OM_ID = "omUuid"; - public static final String OM_CERT_SERIAL_ID = "omCertSerialId"; + static final String ERROR_OM_IS_ALREADY_INITIALIZED = + "OM is already initialized."; + static final String ERROR_UNEXPECTED_OM_NODE_ID_TEMPLATE = + "OM NodeId: %s does not match existing nodeId from VERSION file: %s"; + static final String ERROR_STORAGE_NOT_INITIALIZED = + "OM Storage is not initialized yet."; + + static final String STORAGE_DIR = "om"; + static final String OM_ID = "omUuid"; + static final String OM_CERT_SERIAL_ID = "omCertSerialId"; + static final String OM_NODE_ID = "nodeId"; /** - * Construct OMStorage. + * Construct the OMStorage instance based on the configuration. + * The parent directory used by the storage is defined by the + * {@link OMConfigKeys#OZONE_OM_DB_DIRS} property, if that is not set the + * {@link org.apache.hadoop.hdds.HddsConfigKeys#OZONE_METADATA_DIRS} property + * value is used as a fallback. If none of these are defined in the + * configuration an IllegalArgumentException is being thrown. + * + * @param conf an OzoneConfiguration instance containing the properties that + * can define the path where Ozone Manager stores its metadata + * * @throws IOException if any directories are inaccessible. + * @throws IllegalArgumentException if the configuration does not specify the + * path where the metadata should be stored */ public OMStorage(OzoneConfiguration conf) throws IOException { super(NodeType.OM, getOmDbDir(conf), STORAGE_DIR, @@ -51,33 +83,144 @@ public OMStorage(OzoneConfiguration conf) throws IOException { OMLayoutVersionManager::maxLayoutVersion)); } + /** + * Sets the certificate serial id to be stored in the VERSION file + * representation. + * Note that, to change the VERSION file itself, + * {@link #persistCurrentState()} has to be called after this method. + * + * @param certSerialId the new certificate serial id to set + * + * @throws IOException if the current VERSION file is not readable + */ public void setOmCertSerialId(String certSerialId) throws IOException { getStorageInfo().setProperty(OM_CERT_SERIAL_ID, certSerialId); } - public void unsetOmCertSerialId() throws IOException { + /** + * Removes the certificate serial id from the VERSION file representation. + * Note that, to change the VERSION file itself, + * {@link #persistCurrentState()} has to be called after this method. + */ + public void unsetOmCertSerialId() { getStorageInfo().unsetProperty(OM_CERT_SERIAL_ID); } + /** + * Set's the Ozone Manager ID to be stored in the VERSION file representation. + * Note that, to change the VERSION file itself, + * {@link #persistCurrentState()} has to be called after this method. + * + * @param omId the UUID that identifies this Ozone Manager as a String + * + * @throws IOException if the Storage representation is already initialized, + * as this property can not be changed once it has been + * set and stored + */ public void setOmId(String omId) throws IOException { if (getState() == StorageState.INITIALIZED) { - throw new IOException("OM is already initialized."); + throw new IOException(ERROR_OM_IS_ALREADY_INITIALIZED); } else { getStorageInfo().setProperty(OM_ID, omId); } } + /** + * Set's the Ozone Manager Node ID. + * This value should be set based on the configuration and should not be + * changed later on neither in the configuration nor in the VERSION file + * to ensure consistency within the Ozone Manager HA peers. + * + * Note that, to change the VERSION file itself, + * {@link #persistCurrentState()} has to be called after this method. + * + * @param nodeId the UUID that identifies this Ozone Manager as a String + * + * @throws IOException if the Storage representation is already initialized, + * as this property can not be changed once it has been + * set and stored. + */ + public void setOmNodeId(String nodeId) + throws IOException { + if (getState() == StorageState.INITIALIZED) { + throw new IOException(ERROR_OM_IS_ALREADY_INITIALIZED); + } else { + getStorageInfo().setProperty(OM_NODE_ID, nodeId); + } + } + + /** + * Validates if the provided value is the one saved in the VERSION file. + * This method provides a convenience to check if the configured value and + * the one that was stored in the VERSION file are matching. + * + * As a VERSION file that was created by an older version of OM might not + * contain the value, if the VERSION file does not have this property, the + * method persists the provided expectedNodeId into the VERSION file + * and skips the validation. + * + * @param expectedNodeId the nodeId read from configuration, that has to be + * matched against what we have saved in the VERSION + * file + * + * @throws IOException - if the VERSION file is not present at the time of the + * call + * - if the VERSION file contains a different value than + * the expectedNodeId provided + * - if reading/writing the VERSION file fails + */ + /* Note that we have other options as well to handle this case, but at this + * time this seemed to be a good tradeoff. + * Other options: + * 1. Use the Upgrade framework and bump Layout version. + * Excessive work, and the addition, with that the verification happens + * too late, after a bunch of things has been initialized, and there + * might be possible problems from the late validation. + * 2. Write the value during omInit only as with anything added earlier + * Seems to be error-prone, as without re-initializing the OM, the value + * will not get to the VERSION file, and validation will fail. + * This approach has the potential to scatter OzoneManager constructor call, + * so if anything like this one is to be implemented, try to figure out + * a better way, or switch to another approach if feasible. + */ + public void validateOrPersistOmNodeId(String expectedNodeId) + throws IOException { + if (getState() != StorageState.INITIALIZED) { + throw new IOException(ERROR_STORAGE_NOT_INITIALIZED); + } + String ourValue = getOmNodeId(); + if (ourValue != null && !ourValue.equals(expectedNodeId)) { + String msg = String.format( + ERROR_UNEXPECTED_OM_NODE_ID_TEMPLATE, expectedNodeId, ourValue); + throw new IOException(msg); + } else { + getStorageInfo().setProperty(OM_NODE_ID, expectedNodeId); + persistCurrentState(); + } + } + /** * Retrieves the OM ID from the version file. - * @return OM_ID + * + * @return the stored OM ID */ public String getOmId() { return getStorageInfo().getProperty(OM_ID); } + /** + * Retrieves the OM NodeId from the version file. + * + * @return the stored OM Node ID + */ + public String getOmNodeId() { + return getStorageInfo().getProperty(OM_NODE_ID); + } + /** * Retrieves the serial id of certificate issued by SCM. - * @return OM_ID + * + * @return the stored Certificate Serial ID */ public String getOmCertSerialId() { return getStorageInfo().getProperty(OM_CERT_SERIAL_ID); @@ -91,6 +234,10 @@ protected Properties getNodeProperties() { } Properties omProperties = new Properties(); omProperties.setProperty(OM_ID, omId); + String nodeId = getOmNodeId(); + if (nodeId != null) { + omProperties.setProperty(OM_NODE_ID, nodeId); + } if (getOmCertSerialId() != null) { omProperties.setProperty(OM_CERT_SERIAL_ID, getOmCertSerialId()); @@ -99,11 +246,20 @@ protected Properties getNodeProperties() { } /** - * Get the location where OM should store its metadata directories. - * Fall back to OZONE_METADATA_DIRS if not defined. + * From the provided configuration gets the directory that Ozone Manager + * should use to store its metadata. + * The value of {@link OMConfigKeys#OZONE_OM_DB_DIRS} property is returned + * as the primary value, if that is not set, the method falls back to the + * {@link org.apache.hadoop.hdds.HddsConfigKeys#OZONE_METADATA_DIRS} property. + * If none of these are defined, an IllegalArgumentException is being thrown. + * + * @param conf - the configuration to get the properties from + * + * @return The metadata directory path, that should be used by OM, after + * creating all the necessary directories * - * @param conf - Config - * @return File path, after creating all the required Directories. + * @throws IllegalArgumentException if the metadata directory can not be + * determined from the configuration */ public static File getOmDbDir(ConfigurationSource conf) { return ServerUtils.getDBPath(conf, OMConfigKeys.OZONE_OM_DB_DIRS); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java index c499d1e38659..247e657170a8 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java @@ -1452,6 +1452,11 @@ public String getOzonePathKey(final long volumeId, final long bucketId, return builder.toString(); } + @Override + public String getOzoneDeletePathKey(long objectId, String pathKey) { + return pathKey + OM_KEY_PREFIX + objectId; + } + @Override public String getOpenFileName(long volumeId, long bucketId, long parentID, String fileName, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index d6755e03f08e..215f0e5daec0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -30,9 +30,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; -import java.security.KeyPair; import java.security.PrivilegedExceptionAction; -import java.security.cert.CertificateException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -65,8 +63,6 @@ import org.apache.hadoop.hdds.conf.ConfigurationException; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto; -import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.client.HddsClientUtils; import org.apache.hadoop.hdds.server.OzoneAdmins; @@ -83,7 +79,7 @@ import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.hdds.security.x509.certificate.client.OMCertificateClient; +import org.apache.hadoop.ozone.security.OMCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; import org.apache.hadoop.hdds.server.ServiceRuntimeInfoImpl; @@ -215,7 +211,6 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED_DEFAULT; import static org.apache.hadoop.hdds.HddsUtils.getScmAddressForClients; -import static org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString; import static org.apache.hadoop.hdds.server.ServerUtils.getRemoteUserName; import static org.apache.hadoop.hdds.server.ServerUtils.updateRPCListenAddress; import static org.apache.hadoop.hdds.utils.HAUtils.getScmInfo; @@ -274,6 +269,7 @@ import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INVALID_REQUEST; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.PERMISSION_DENIED; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.TOKEN_ERROR_OTHER; +import static org.apache.hadoop.util.ExitUtil.terminate; import static org.apache.hadoop.util.MetricUtil.captureLatencyNs; import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.BUCKET_LOCK; import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.VOLUME_LOCK; @@ -282,6 +278,7 @@ import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerInterServiceProtocolProtos.OzoneManagerInterService; import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneManagerService; import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PrepareStatusResponse.PrepareStatus; + import org.apache.ratis.proto.RaftProtos.RaftPeerRole; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftPeer; @@ -290,7 +287,6 @@ import org.apache.ratis.util.ExitUtils; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.LifeCycle; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -379,6 +375,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl private final RatisSnapshotInfo omRatisSnapshotInfo; private final Map ratisMetricsMap = new ConcurrentHashMap<>(); + private List ratisReporterList = null; private KeyProviderCryptoExtension kmsProvider = null; private static String keyProviderUriKeyName = @@ -457,6 +454,7 @@ private OzoneManager(OzoneConfiguration conf, StartupOption startupOption) this.omNodeDetails = omhaNodeDetails.getLocalNodeDetails(); omStorage = new OMStorage(conf); + omStorage.validateOrPersistOmNodeId(omNodeDetails.getNodeId()); omId = omStorage.getOmId(); versionManager = new OMLayoutVersionManager(omStorage.getLayoutVersion()); @@ -542,13 +540,16 @@ private OzoneManager(OzoneConfiguration conf, StartupOption startupOption) // For testing purpose only, not hit scm from om as Hadoop UGI can't login // two principals in the same JVM. + ScmInfo scmInfo; if (!testSecureOmFlag) { - ScmInfo scmInfo = getScmInfo(configuration); + scmInfo = getScmInfo(configuration); if (!scmInfo.getClusterId().equals(omStorage.getClusterID())) { logVersionMismatch(conf, scmInfo); throw new OMException("SCM version info mismatch.", ResultCodes.SCM_VERSION_MISMATCH_ERROR); } + } else { + scmInfo = new ScmInfo.Builder().setScmId("testSecureOm").build(); } RPC.setProtocolEngine(configuration, OzoneManagerProtocolPB.class, @@ -568,8 +569,9 @@ private OzoneManager(OzoneConfiguration conf, StartupOption startupOption) throw new RuntimeException("OzoneManager started in secure mode but " + "doesn't have SCM signed certificate."); } - certClient = new OMCertificateClient(new SecurityConfig(conf), - omStorage.getOmCertSerialId()); + certClient = new OMCertificateClient(secConfig, omStorage, + scmInfo == null ? null : scmInfo.getScmId(), this::saveNewCertId, + this::terminateOM); } if (secConfig.isBlockTokenEnabled()) { blockTokenMgr = createBlockTokenSecretManager(configuration); @@ -1035,6 +1037,7 @@ public void startSecretManager() { /** * For testing purpose only. */ + @VisibleForTesting public void setCertClient(CertificateClient certClient) { // TODO: Initialize it in constructor with implementation for certClient. this.certClient = certClient; @@ -1227,7 +1230,8 @@ private static void loginOMUserIfSecurityEnabled(OzoneConfiguration conf) @VisibleForTesting public static boolean omInit(OzoneConfiguration conf) throws IOException, AuthenticationException { - OMHANodeDetails.loadOMHAConfig(conf); + OMHANodeDetails omhaNodeDetails = OMHANodeDetails.loadOMHAConfig(conf); + String nodeId = omhaNodeDetails.getLocalNodeDetails().getNodeId(); loginOMUserIfSecurityEnabled(conf); OMStorage omStorage = new OMStorage(conf); StorageState state = omStorage.getState(); @@ -1244,6 +1248,7 @@ public static boolean omInit(OzoneConfiguration conf) throws IOException, } if (state != StorageState.INITIALIZED) { + omStorage.setOmNodeId(nodeId); omStorage.setClusterId(clusterId); omStorage.initialize(); System.out.println( @@ -1276,19 +1281,18 @@ public static boolean omInit(OzoneConfiguration conf) throws IOException, */ @VisibleForTesting public static void initializeSecurity(OzoneConfiguration conf, - OMStorage omStore, String scmId) - throws IOException { + OMStorage omStore, String scmId) throws IOException { LOG.info("Initializing secure OzoneManager."); CertificateClient certClient = - new OMCertificateClient(new SecurityConfig(conf), - omStore.getOmCertSerialId()); + new OMCertificateClient(new SecurityConfig(conf), omStore, scmId); CertificateClient.InitResponse response = certClient.init(); if (response.equals(CertificateClient.InitResponse.REINIT)) { LOG.info("Re-initialize certificate client."); omStore.unsetOmCertSerialId(); omStore.persistCurrentState(); - certClient = new OMCertificateClient(new SecurityConfig(conf)); + certClient = new OMCertificateClient( + new SecurityConfig(conf), omStore, scmId); response = certClient.init(); } LOG.info("Init response: {}", response); @@ -1297,7 +1301,10 @@ public static void initializeSecurity(OzoneConfiguration conf, LOG.info("Initialization successful."); break; case GETCERT: - getSCMSignedCert(certClient, conf, omStore, scmId); + // Sign and persist OM cert. + CertificateSignRequest.Builder builder = certClient.getCSRBuilder(); + omStore.setOmCertSerialId( + certClient.signAndStoreCertificate(builder.build())); LOG.info("Successfully stored SCM signed certificate."); break; case FAILURE: @@ -1774,10 +1781,10 @@ public void updatePeerList(List newPeers) { for (String omNodeId : decommissionedOMs) { if (isCurrentNode(omNodeId)) { // Decommissioning Node should not receive the configuration change - // request. Shut it down. - String errorMsg = "Shutting down as OM has been decommissioned."; - LOG.error("Fatal Error: {}", errorMsg); - exitManager.forceExit(1, errorMsg, LOG); + // request. It may receive the request if the newly added node id or + // the decommissioned node id is same. + LOG.warn("New OM node Id: {} is same as decommissioned earlier", + omNodeId); } else { // Remove decommissioned node from peer list (which internally // removed from Ratis peer list too) @@ -1968,7 +1975,7 @@ private void initializeRatisServer(boolean shouldBootstrap) if (isRatisEnabled) { if (omRatisServer == null) { // This needs to be done before initializing Ratis. - RatisDropwizardExports. + ratisReporterList = RatisDropwizardExports. registerRatisMetricReporters(ratisMetricsMap, () -> isStopped()); omRatisServer = OzoneManagerRatisServer.newOMRatisServer( configuration, this, omNodeDetails, peerNodesMap, @@ -2082,7 +2089,7 @@ public void stop() { omSnapshotProvider.stop(); } OMPerformanceMetrics.unregister(); - RatisDropwizardExports.clear(ratisMetricsMap); + RatisDropwizardExports.clear(ratisMetricsMap, ratisReporterList); scmClient.close(); } catch (Exception e) { LOG.error("OzoneManager stop failed.", e); @@ -2094,6 +2101,11 @@ public void shutDown(String message) { ExitUtils.terminate(0, message, LOG); } + public void terminateOM() { + stop(); + terminate(1); + } + /** * Wait until service has completed shutdown. */ @@ -2117,120 +2129,6 @@ private void startSecretManagerIfNecessary() { } } - /** - * Get SCM signed certificate and store it using certificate client. - */ - private static void getSCMSignedCert(CertificateClient client, - OzoneConfiguration config, OMStorage omStore, String scmId) - throws IOException { - CertificateSignRequest.Builder builder = client.getCSRBuilder(); - KeyPair keyPair = new KeyPair(client.getPublicKey(), - client.getPrivateKey()); - boolean flexibleFqdnResolutionEnabled = config.getBoolean( - OZONE_FLEXIBLE_FQDN_RESOLUTION_ENABLED, - OZONE_FLEXIBLE_FQDN_RESOLUTION_ENABLED_DEFAULT); - InetSocketAddress omRpcAdd = OmUtils.getOmAddress(config); - String ip = null; - - boolean addressResolved = omRpcAdd != null && omRpcAdd.getAddress() != null; - if (flexibleFqdnResolutionEnabled && !addressResolved && omRpcAdd != null) { - InetSocketAddress omRpcAddWithHostName = - OzoneNetUtils.getAddressWithHostNameLocal(omRpcAdd); - if (omRpcAddWithHostName != null - && omRpcAddWithHostName.getAddress() != null) { - addressResolved = true; - ip = omRpcAddWithHostName.getAddress().getHostAddress(); - } - } - - if (!addressResolved) { - LOG.error("Incorrect om rpc address. omRpcAdd:{}", omRpcAdd); - throw new RuntimeException("Can't get SCM signed certificate. " + - "omRpcAdd: " + omRpcAdd); - } - - if (ip == null) { - ip = omRpcAdd.getAddress().getHostAddress(); - } - - String hostname = omRpcAdd.getHostName(); - int port = omRpcAdd.getPort(); - String subject; - if (builder.hasDnsName()) { - subject = UserGroupInformation.getCurrentUser().getShortUserName() - + "@" + hostname; - } else { - // With only IP in alt.name, certificate validation would fail if subject - // isn't a hostname either, so omit username. - subject = hostname; - } - - builder.setCA(false) - .setKey(keyPair) - .setConfiguration(config) - .setScmID(scmId) - .setClusterID(omStore.getClusterID()) - .setSubject(subject); - - OMHANodeDetails haOMHANodeDetails = OMHANodeDetails.loadOMHAConfig(config); - String serviceName = - haOMHANodeDetails.getLocalNodeDetails().getServiceId(); - if (!StringUtils.isEmpty(serviceName)) { - builder.addServiceName(serviceName); - } - - LOG.info("Creating csr for OM->dns:{},ip:{},scmId:{},clusterId:{}," + - "subject:{}", hostname, ip, scmId, omStore.getClusterID(), subject); - - HddsProtos.OzoneManagerDetailsProto.Builder omDetailsProtoBuilder = - HddsProtos.OzoneManagerDetailsProto.newBuilder() - .setHostName(hostname) - .setIpAddress(ip) - .setUuid(omStore.getOmId()) - .addPorts(HddsProtos.Port.newBuilder() - .setName(RPC_PORT) - .setValue(port) - .build()); - - PKCS10CertificationRequest csr = builder.build(); - HddsProtos.OzoneManagerDetailsProto omDetailsProto = - omDetailsProtoBuilder.build(); - LOG.info("OzoneManager ports added:{}", omDetailsProto.getPortsList()); - SCMSecurityProtocolClientSideTranslatorPB secureScmClient = - HddsServerUtil.getScmSecurityClientWithFixedDuration(config); - - SCMGetCertResponseProto response = secureScmClient. - getOMCertChain(omDetailsProto, getEncodedString(csr)); - String pemEncodedCert = response.getX509Certificate(); - - try { - - // Store SCM CA certificate. - if (response.hasX509CACertificate()) { - String pemEncodedRootCert = response.getX509CACertificate(); - client.storeCertificate(pemEncodedRootCert, true, true); - client.storeCertificate(pemEncodedCert, true); - - // Store Root CA certificate if available. - if (response.hasX509RootCACertificate()) { - client.storeRootCACertificate(response.getX509RootCACertificate(), - true); - } - - // Persist om cert serial id. - omStore.setOmCertSerialId(CertificateCodec. - getX509Certificate(pemEncodedCert).getSerialNumber().toString()); - } else { - throw new RuntimeException("Unable to retrieve OM certificate " + - "chain"); - } - } catch (IOException | CertificateException e) { - LOG.error("Error while storing SCM signed certificate.", e); - throw new RuntimeException(e); - } - - } - /** * @return true if delegation token operation is allowed */ @@ -2609,14 +2507,13 @@ public boolean isSpnegoEnabled() { */ @Override public OmVolumeArgs getVolumeInfo(String volume) throws IOException { - if (isAclEnabled) { - checkAcls(ResourceType.VOLUME, StoreType.OZONE, ACLType.READ, volume, - null, null); - } - boolean auditSuccess = true; Map auditMap = buildAuditMap(volume); try { + if (isAclEnabled) { + checkAcls(ResourceType.VOLUME, StoreType.OZONE, ACLType.READ, volume, + null, null); + } metrics.incNumVolumeInfos(); return volumeManager.getVolumeInfo(volume); } catch (Exception ex) { @@ -2748,10 +2645,6 @@ public List listAllVolumes(String prefix, String prevKey, int public List listBuckets(String volumeName, String startKey, String prefix, int maxNumOfBuckets) throws IOException { - if (isAclEnabled) { - checkAcls(ResourceType.VOLUME, StoreType.OZONE, ACLType.LIST, volumeName, - null, null); - } boolean auditSuccess = true; Map auditMap = buildAuditMap(volumeName); auditMap.put(OzoneConsts.START_KEY, startKey); @@ -2759,6 +2652,10 @@ public List listBuckets(String volumeName, auditMap.put(OzoneConsts.MAX_NUM_OF_BUCKETS, String.valueOf(maxNumOfBuckets)); try { + if (isAclEnabled) { + checkAcls(ResourceType.VOLUME, StoreType.OZONE, ACLType.LIST, + volumeName, null, null); + } metrics.incNumBucketLists(); return bucketManager.listBuckets(volumeName, startKey, prefix, maxNumOfBuckets); @@ -2787,14 +2684,14 @@ public List listBuckets(String volumeName, @Override public OmBucketInfo getBucketInfo(String volume, String bucket) throws IOException { - if (isAclEnabled) { - checkAcls(ResourceType.BUCKET, StoreType.OZONE, ACLType.READ, volume, - bucket, null); - } boolean auditSuccess = true; Map auditMap = buildAuditMap(volume); auditMap.put(OzoneConsts.BUCKET, bucket); try { + if (isAclEnabled) { + checkAcls(ResourceType.BUCKET, StoreType.OZONE, ACLType.READ, volume, + bucket, null); + } metrics.incNumBucketInfos(); final OmBucketInfo bucketInfo = bucketManager.getBucketInfo(volume, bucket); @@ -2826,20 +2723,19 @@ public OmKeyInfo lookupKey(OmKeyArgs args) throws IOException { ResolvedBucket bucket = captureLatencyNs( perfMetrics.getLookupResolveBucketLatencyNs(), () -> resolveBucketLink(args)); - - if (isAclEnabled) { - captureLatencyNs(perfMetrics.getLookupAclCheckLatencyNs(), - () -> checkAcls(ResourceType.KEY, StoreType.OZONE, ACLType.READ, - bucket.realVolume(), bucket.realBucket(), args.getKeyName()) - ); - } - boolean auditSuccess = true; Map auditMap = bucket.audit(args.toAuditMap()); OmKeyArgs resolvedArgs = bucket.update(args); try { + if (isAclEnabled) { + captureLatencyNs(perfMetrics.getLookupAclCheckLatencyNs(), + () -> checkAcls(ResourceType.KEY, StoreType.OZONE, + ACLType.READ, bucket.realVolume(), bucket.realBucket(), + args.getKeyName()) + ); + } metrics.incNumKeyLookups(); return keyManager.lookupKey(resolvedArgs, getClientAddress()); } catch (Exception ex) { @@ -2882,17 +2778,18 @@ public KeyInfoWithVolumeContext getKeyInfo(final OmKeyArgs args, perfMetrics.getGetKeyInfoResolveBucketLatencyNs(), () -> resolveBucketLink(resolvedVolumeArgs)); - if (isAclEnabled) { - captureLatencyNs(perfMetrics.getGetKeyInfoAclCheckLatencyNs(), () -> - checkAcls(ResourceType.KEY, StoreType.OZONE, ACLType.READ, - bucket.realVolume(), bucket.realBucket(), args.getKeyName()) - ); - } - boolean auditSuccess = true; OmKeyArgs resolvedArgs = bucket.update(args); try { + if (isAclEnabled) { + captureLatencyNs(perfMetrics.getGetKeyInfoAclCheckLatencyNs(), () -> + checkAcls(ResourceType.KEY, StoreType.OZONE, + ACLType.READ, bucket.realVolume(), + bucket.realBucket(), args.getKeyName()) + ); + } + metrics.incNumGetKeyInfo(); OmKeyInfo keyInfo = keyManager.getKeyInfo(resolvedArgs, getClientAddress()); @@ -2925,11 +2822,6 @@ public List listKeys(String volumeName, String bucketName, ResolvedBucket bucket = resolveBucketLink(Pair.of(volumeName, bucketName)); - if (isAclEnabled) { - checkAcls(ResourceType.BUCKET, StoreType.OZONE, ACLType.LIST, - bucket.realVolume(), bucket.realBucket(), keyPrefix); - } - boolean auditSuccess = true; Map auditMap = bucket.audit(); auditMap.put(OzoneConsts.START_KEY, startKey); @@ -2937,6 +2829,10 @@ public List listKeys(String volumeName, String bucketName, auditMap.put(OzoneConsts.KEY_PREFIX, keyPrefix); try { + if (isAclEnabled) { + checkAcls(ResourceType.BUCKET, StoreType.OZONE, ACLType.LIST, + bucket.realVolume(), bucket.realBucket(), keyPrefix); + } metrics.incNumKeyLists(); return keyManager.listKeys(bucket.realVolume(), bucket.realBucket(), startKey, keyPrefix, maxKeys); @@ -2958,22 +2854,17 @@ public List listKeys(String volumeName, String bucketName, public List listTrash(String volumeName, String bucketName, String startKeyName, String keyPrefix, int maxKeys) throws IOException { - - // bucket links not supported - - if (isAclEnabled) { - checkAcls(ResourceType.BUCKET, StoreType.OZONE, ACLType.LIST, - volumeName, bucketName, keyPrefix); - } - boolean auditSuccess = true; Map auditMap = buildAuditMap(volumeName); auditMap.put(OzoneConsts.BUCKET, bucketName); auditMap.put(OzoneConsts.START_KEY, startKeyName); auditMap.put(OzoneConsts.KEY_PREFIX, keyPrefix); auditMap.put(OzoneConsts.MAX_KEYS, String.valueOf(maxKeys)); - try { + if (isAclEnabled) { + checkAcls(ResourceType.BUCKET, StoreType.OZONE, ACLType.LIST, + volumeName, bucketName, keyPrefix); + } metrics.incNumTrashKeyLists(); return keyManager.listTrash(volumeName, bucketName, startKeyName, keyPrefix, maxKeys); @@ -3599,17 +3490,16 @@ private ResourceType getResourceType(OmKeyArgs args) { public OmKeyInfo lookupFile(OmKeyArgs args) throws IOException { ResolvedBucket bucket = resolveBucketLink(args); - if (isAclEnabled) { - checkAcls(ResourceType.KEY, StoreType.OZONE, ACLType.READ, - bucket.realVolume(), bucket.realBucket(), args.getKeyName()); - } - boolean auditSuccess = true; Map auditMap = bucket.audit(args.toAuditMap()); args = bucket.update(args); try { + if (isAclEnabled) { + checkAcls(ResourceType.KEY, StoreType.OZONE, ACLType.READ, + bucket.realVolume(), bucket.realBucket(), args.getKeyName()); + } metrics.incNumLookupFile(); return keyManager.lookupFile(args, getClientAddress()); } catch (Exception ex) { @@ -3645,17 +3535,16 @@ public List listStatus(OmKeyArgs args, boolean recursive, ResolvedBucket bucket = resolveBucketLink(args); - if (isAclEnabled) { - checkAcls(getResourceType(args), StoreType.OZONE, ACLType.READ, - bucket.realVolume(), bucket.realBucket(), args.getKeyName()); - } - boolean auditSuccess = true; Map auditMap = bucket.audit(args.toAuditMap()); args = bucket.update(args); try { + if (isAclEnabled) { + checkAcls(getResourceType(args), StoreType.OZONE, ACLType.READ, + bucket.realVolume(), bucket.realBucket(), args.getKeyName()); + } metrics.incNumListStatus(); return keyManager.listStatus(args, recursive, startKey, maxListingPageSize, getClientAddress(), allowPartialPrefixes); @@ -4579,4 +4468,64 @@ private void updateLayoutVersionInDB(OMLayoutVersionManager lvm, private BucketLayout getBucketLayout() { return BucketLayout.DEFAULT; } + + void saveNewCertId(String certId) { + try { + omStorage.setOmCertSerialId(certId); + omStorage.persistCurrentState(); + } catch (IOException ex) { + // New cert ID cannot be persisted into VERSION file. + LOG.error("Failed to persist new cert ID {} to VERSION file." + + "Terminating OzoneManager...", certId, ex); + shutDown("OzoneManage shutdown because VERSION file persist failure."); + } + } + + public static HddsProtos.OzoneManagerDetailsProto getOmDetailsProto( + OzoneConfiguration config, String omID) { + boolean flexibleFqdnResolutionEnabled = config.getBoolean( + OZONE_FLEXIBLE_FQDN_RESOLUTION_ENABLED, + OZONE_FLEXIBLE_FQDN_RESOLUTION_ENABLED_DEFAULT); + InetSocketAddress omRpcAdd = OmUtils.getOmAddress(config); + String ip = null; + + boolean addressResolved = omRpcAdd != null && omRpcAdd.getAddress() != null; + if (flexibleFqdnResolutionEnabled && !addressResolved && omRpcAdd != null) { + InetSocketAddress omRpcAddWithHostName = + OzoneNetUtils.getAddressWithHostNameLocal(omRpcAdd); + if (omRpcAddWithHostName != null + && omRpcAddWithHostName.getAddress() != null) { + addressResolved = true; + ip = omRpcAddWithHostName.getAddress().getHostAddress(); + } + } + + if (!addressResolved) { + LOG.error("Incorrect om rpc address. omRpcAdd:{}", omRpcAdd); + throw new RuntimeException("Can't get SCM signed certificate. " + + "omRpcAdd: " + omRpcAdd); + } + + if (ip == null) { + ip = omRpcAdd.getAddress().getHostAddress(); + } + + String hostname = omRpcAdd.getHostName(); + int port = omRpcAdd.getPort(); + + HddsProtos.OzoneManagerDetailsProto.Builder omDetailsProtoBuilder = + HddsProtos.OzoneManagerDetailsProto.newBuilder() + .setHostName(hostname) + .setIpAddress(ip) + .setUuid(omID) + .addPorts(HddsProtos.Port.newBuilder() + .setName(RPC_PORT) + .setValue(port) + .build()); + + HddsProtos.OzoneManagerDetailsProto omDetailsProto = + omDetailsProtoBuilder.build(); + LOG.info("OzoneManager ports added:{}", omDetailsProto.getPortsList()); + return omDetailsProto; + } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java index 46bc57d0efb5..6779c3378bcd 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java @@ -18,6 +18,7 @@ import com.google.common.base.Preconditions; import com.google.protobuf.RpcController; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.ClientVersion; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.fs.FSDataInputStream; @@ -90,6 +91,7 @@ public TrashOzoneFileSystem(OzoneManager ozoneManager) throws IOException { this.userName = UserGroupInformation.getCurrentUser().getShortUserName(); this.runCount = new AtomicLong(0); + setConf(ozoneManager.getConfiguration()); } private RaftClientRequest getRatisRequest( @@ -154,8 +156,8 @@ public boolean rename(Path src, Path dst) throws IOException { ozoneManager.getMetrics().incNumTrashRenames(); LOG.trace("Src:" + src + "Dst:" + dst); // check whether the src and dst belong to the same bucket & trashroot. - OFSPath srcPath = new OFSPath(src); - OFSPath dstPath = new OFSPath(dst); + OFSPath srcPath = new OFSPath(src, OzoneConfiguration.of(getConf())); + OFSPath dstPath = new OFSPath(dst, OzoneConfiguration.of(getConf())); OmBucketInfo bucket = ozoneManager.getBucketInfo(srcPath.getVolumeName(), srcPath.getBucketName()); if (bucket.getBucketLayout().isFileSystemOptimized()) { @@ -190,7 +192,7 @@ private boolean renameFSO(OFSPath srcPath, OFSPath dstPath) { @Override public boolean delete(Path path, boolean b) throws IOException { ozoneManager.getMetrics().incNumTrashDeletes(); - OFSPath srcPath = new OFSPath(path); + OFSPath srcPath = new OFSPath(path, OzoneConfiguration.of(getConf())); OmBucketInfo bucket = ozoneManager.getBucketInfo(srcPath.getVolumeName(), srcPath.getBucketName()); if (bucket.getBucketLayout().isFileSystemOptimized()) { @@ -283,7 +285,7 @@ public FileStatus getFileStatus(Path path) throws IOException { } private OmKeyArgs constructOmKeyArgs(Path path) { - OFSPath ofsPath = new OFSPath(path); + OFSPath ofsPath = new OFSPath(path, OzoneConfiguration.of(getConf())); String volume = ofsPath.getVolumeName(); String bucket = ofsPath.getBucketName(); String key = ofsPath.getKeyName(); @@ -388,7 +390,8 @@ boolean iterate() throws IOException { List keyPathList = new ArrayList<>(); if (status.isDirectory()) { LOG.trace("Iterating directory: {}", pathKey); - OFSPath ofsPath = new OFSPath(pathKey); + OFSPath ofsPath = new OFSPath(pathKey, + OzoneConfiguration.of(getConf())); String ofsPathprefix = ofsPath.getNonKeyPathNoPrefixDelim() + OZONE_URI_DELIMITER; while (keyIterator.hasNext()) { @@ -482,8 +485,10 @@ private class RenameIterator extends OzoneListingIterator { boolean processKeyPath(List keyPathList) { for (String keyPath : keyPathList) { String newPath = dstPath.concat(keyPath.substring(srcPath.length())); - OFSPath src = new OFSPath(keyPath); - OFSPath dst = new OFSPath(newPath); + OFSPath src = new OFSPath(keyPath, + OzoneConfiguration.of(getConf())); + OFSPath dst = new OFSPath(newPath, + OzoneConfiguration.of(getConf())); OzoneManagerProtocolProtos.OMRequest omRequest = getRenameKeyRequest(src, dst); @@ -552,7 +557,8 @@ && listStatus(f).length != 0) { boolean processKeyPath(List keyPathList) { LOG.trace("Deleting keys: {}", keyPathList); for (String keyPath : keyPathList) { - OFSPath path = new OFSPath(keyPath); + OFSPath path = new OFSPath(keyPath, + OzoneConfiguration.of(getConf())); OzoneManagerProtocolProtos.OMRequest omRequest = getDeleteKeysRequest(path); try { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashPolicyOzone.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashPolicyOzone.java index da422654386f..83a1ee5c16d5 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashPolicyOzone.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashPolicyOzone.java @@ -151,7 +151,8 @@ public boolean moveToTrash(Path path) throws IOException { Path trashPath; Path baseTrashPath; if (fs.getUri().getScheme().equals(OzoneConsts.OZONE_OFS_URI_SCHEME)) { - OFSPath ofsPath = new OFSPath(path); + OFSPath ofsPath = new OFSPath(path, + OzoneConfiguration.of(configuration)); // trimming volume and bucket in order to be compatible with o3fs // Also including volume and bucket name in the path is redundant as // the key is already in a particular volume and bucket. @@ -222,7 +223,8 @@ private boolean validatePath(Path path) throws IOException { // Check to see if bucket is path item to be deleted. // Cannot moveToTrash if bucket is deleted, // return error for this condition - OFSPath ofsPath = new OFSPath(key.substring(1)); + OFSPath ofsPath = new OFSPath(key.substring(1), + OzoneConfiguration.of(configuration)); if (path.isRoot() || ofsPath.isBucket()) { throw new IOException("Recursive rm of bucket " + path.toString() + " not permitted"); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java index 905f0c768da6..4a255f263f3c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java @@ -275,8 +275,9 @@ private void flushTransactions() { AtomicReference lastTraceId = new AtomicReference<>(); readyBuffer.iterator().forEachRemaining((entry) -> { + OMResponse omResponse = null; try { - OMResponse omResponse = entry.getResponse().getOMResponse(); + omResponse = entry.getResponse().getOMResponse(); lastTraceId.set(omResponse.getTraceID()); addToBatchWithTrace(omResponse, (SupplierWithIOException) () -> { @@ -290,7 +291,9 @@ private void flushTransactions() { } catch (IOException ex) { // During Adding to RocksDB batch entry got an exception. // We should terminate the OM. - terminate(ex); + terminate(ex, 1, omResponse); + } catch (Throwable t) { + terminate(t, 2, omResponse); } }); @@ -386,11 +389,9 @@ private void flushTransactions() { + "exit.", Thread.currentThread().getName()); } } catch (IOException ex) { - terminate(ex); + terminate(ex, 1); } catch (Throwable t) { - final String s = "OMDoubleBuffer flush thread " + - Thread.currentThread().getName() + " encountered Throwable error"; - ExitUtils.terminate(2, s, t, LOG); + terminate(t, 2); } } } @@ -480,10 +481,18 @@ public void stop() { } - private void terminate(IOException ex) { - String message = "During flush to DB encountered error in " + - "OMDoubleBuffer flush thread " + Thread.currentThread().getName(); - ExitUtils.terminate(1, message, ex, LOG); + private void terminate(Throwable t, int status) { + terminate(t, status, null); + } + + private void terminate(Throwable t, int status, OMResponse omResponse) { + StringBuilder message = new StringBuilder( + "During flush to DB encountered error in " + + "OMDoubleBuffer flush thread " + Thread.currentThread().getName()); + if (omResponse != null) { + message.append(" when handling OMRequest: ").append(omResponse); + } + ExitUtils.terminate(status, message.toString(), t, LOG); } /** diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java index b46779e2fef9..cd52bd7143e2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java @@ -80,7 +80,6 @@ import org.apache.hadoop.ozone.om.request.volume.acl.OMVolumeAddAclRequest; import org.apache.hadoop.ozone.om.request.volume.acl.OMVolumeRemoveAclRequest; import org.apache.hadoop.ozone.om.request.volume.acl.OMVolumeSetAclRequest; -import org.apache.hadoop.ozone.om.request.OMEchoRPCRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneObj.ObjectType; @@ -290,8 +289,6 @@ public static OMClientRequest createClientRequest(OMRequest omRequest, volumeName = keyArgs.getVolumeName(); bucketName = keyArgs.getBucketName(); break; - case EchoRPC: - return new OMEchoRPCRequest(omRequest); default: throw new IllegalStateException("Unrecognized write command " + "type request" + cmdType); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketCreateRequest.java index 2d913bf31938..2bb988069edc 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketCreateRequest.java @@ -207,8 +207,10 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, } //Check quotaInBytes to update - checkQuotaBytesValid(metadataManager, omVolumeArgs, omBucketInfo, - volumeKey); + if (!bucketInfo.hasSourceBucket()) { + checkQuotaBytesValid(metadataManager, omVolumeArgs, omBucketInfo, + volumeKey); + } // Add objectID and updateID omBucketInfo.setObjectID( @@ -388,6 +390,15 @@ public boolean checkQuotaBytesValid(OMMetadataManager metadataManager, long quotaInBytes = omBucketInfo.getQuotaInBytes(); long volumeQuotaInBytes = omVolumeArgs.getQuotaInBytes(); + // When volume quota is set, then its mandatory to have bucket quota + if (volumeQuotaInBytes > 0) { + if (quotaInBytes <= 0) { + throw new OMException("Bucket space quota in this volume " + + "should be set as volume space quota is already set.", + OMException.ResultCodes.QUOTA_ERROR); + } + } + long totalBucketQuota = 0; if (quotaInBytes > 0) { totalBucketQuota = quotaInBytes; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketSetPropertyRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketSetPropertyRequest.java index f3fd3fc71e02..ff38668e5f53 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketSetPropertyRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/bucket/OMBucketSetPropertyRequest.java @@ -202,6 +202,10 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, if (defaultReplicationConfig != null) { // Resetting the default replication config. bucketInfoBuilder.setDefaultReplicationConfig(defaultReplicationConfig); + } else if (dbBucketInfo.getDefaultReplicationConfig() != null) { + // Retaining existing default replication config + bucketInfoBuilder.setDefaultReplicationConfig( + dbBucketInfo.getDefaultReplicationConfig()); } bucketInfoBuilder.setCreationTime(dbBucketInfo.getCreationTime()); @@ -298,12 +302,20 @@ public boolean checkQuotaBytesValid(OMMetadataManager metadataManager, OMException.ResultCodes.QUOTA_ERROR); } } + + // avoid iteration of other bucket if quota set is less than previous set + if (quotaInBytes < dbBucketInfo.getQuotaInBytes()) { + return true; + } + List bucketList = metadataManager.listBuckets( omVolumeArgs.getVolume(), null, null, Integer.MAX_VALUE); for (OmBucketInfo bucketInfo : bucketList) { + if (omBucketArgs.getBucketName().equals(bucketInfo.getBucketName())) { + continue; + } long nextQuotaInBytes = bucketInfo.getQuotaInBytes(); - if (nextQuotaInBytes > OzoneConsts.QUOTA_RESET && - !omBucketArgs.getBucketName().equals(bucketInfo.getBucketName())) { + if (nextQuotaInBytes > OzoneConsts.QUOTA_RESET) { totalBucketQuota += nextQuotaInBytes; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMDirectoryCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMDirectoryCreateRequest.java index 64af4947d5aa..780ee9ca725f 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMDirectoryCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMDirectoryCreateRequest.java @@ -36,6 +36,7 @@ import org.apache.hadoop.ozone.om.helpers.OzoneAclUtil; import org.apache.hadoop.ozone.om.helpers.OzoneFSUtils; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerDoubleBufferHelper; import org.apache.hadoop.ozone.om.request.util.OmResponseUtil; @@ -212,12 +213,19 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, missingParents, inheritAcls, trxnLogIndex); numMissingParents = missingParentInfos.size(); + OmBucketInfo omBucketInfo = + getBucketInfo(omMetadataManager, volumeName, bucketName); + checkBucketQuotaInNamespace(omBucketInfo, numMissingParents + 1L); + omBucketInfo.incrUsedNamespace(numMissingParents + 1L); + OMFileRequest.addKeyTableCacheEntries(omMetadataManager, volumeName, bucketName, Optional.of(dirKeyInfo), Optional.of(missingParentInfos), trxnLogIndex); + result = Result.SUCCESS; omClientResponse = new OMDirectoryCreateResponse(omResponse.build(), - dirKeyInfo, missingParentInfos, result, getBucketLayout()); + dirKeyInfo, missingParentInfos, result, getBucketLayout(), + omBucketInfo.copyObject()); } else { // omDirectoryResult == DIRECTORY_EXITS result = Result.DIRECTORY_ALREADY_EXISTS; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMDirectoryCreateRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMDirectoryCreateRequestWithFSO.java index ed4cab660d30..9140a89ecda7 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMDirectoryCreateRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMDirectoryCreateRequestWithFSO.java @@ -27,6 +27,7 @@ import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OzoneAclUtil; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerDoubleBufferHelper; @@ -160,24 +161,28 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, final long bucketId = omMetadataManager .getBucketId(volumeName, bucketName); + // total number of keys created. + numKeysCreated = missingParentInfos.size() + 1; + OmBucketInfo omBucketInfo = + getBucketInfo(omMetadataManager, volumeName, bucketName); + checkBucketQuotaInNamespace(omBucketInfo, numKeysCreated); + omBucketInfo.incrUsedNamespace(numKeysCreated); + // prepare leafNode dir OmDirectoryInfo dirInfo = createDirectoryInfoWithACL( - omPathInfo.getLeafNodeName(), - keyArgs, omPathInfo.getLeafNodeObjectId(), - omPathInfo.getLastKnownParentId(), trxnLogIndex, - OzoneAclUtil.fromProtobuf(keyArgs.getAclsList())); + omPathInfo.getLeafNodeName(), + keyArgs, omPathInfo.getLeafNodeObjectId(), + omPathInfo.getLastKnownParentId(), trxnLogIndex, + OzoneAclUtil.fromProtobuf(keyArgs.getAclsList())); OMFileRequest.addDirectoryTableCacheEntries(omMetadataManager, - volumeId, bucketId, trxnLogIndex, - Optional.of(missingParentInfos), Optional.of(dirInfo)); - - // total number of keys created. - numKeysCreated = missingParentInfos.size() + 1; + volumeId, bucketId, trxnLogIndex, + Optional.of(missingParentInfos), Optional.of(dirInfo)); result = OMDirectoryCreateRequest.Result.SUCCESS; omClientResponse = new OMDirectoryCreateResponseWithFSO(omResponse.build(), volumeId, bucketId, dirInfo, missingParentInfos, result, - getBucketLayout()); + getBucketLayout(), omBucketInfo.copyObject()); } else { result = Result.DIRECTORY_ALREADY_EXISTS; omResponse.setStatus(Status.DIRECTORY_ALREADY_EXISTS); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequest.java index 634ba81f2af0..d92400af3e78 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequest.java @@ -141,7 +141,8 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { new ExcludeList(), requestedSize, scmBlockSize, ozoneManager.getPreallocateBlocksMax(), ozoneManager.isGrpcBlockTokenEnabled(), - ozoneManager.getOMNodeId()); + ozoneManager.getOMNodeId(), + ozoneManager.getMetrics()); KeyArgs.Builder newKeyArgs = keyArgs.toBuilder() .setModificationTime(Time.now()).setType(type).setFactor(factor) @@ -276,8 +277,11 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, long preAllocatedSpace = newLocationList.size() * ozoneManager.getScmBlockSize() * omKeyInfo.getReplicationConfig().getRequiredNodes(); - checkBucketQuotaInBytes(omBucketInfo, preAllocatedSpace); - checkBucketQuotaInNamespace(omBucketInfo, 1L); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + preAllocatedSpace); + numMissingParents = missingParentInfos.size(); + checkBucketQuotaInNamespace(omBucketInfo, numMissingParents + 1L); + omBucketInfo.incrUsedNamespace(numMissingParents); // Add to cache entry can be done outside of lock for this openKey. // Even if bucket gets deleted, when commitKey we shall identify if @@ -292,7 +296,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, bucketName, Optional.absent(), Optional.of(missingParentInfos), trxnLogIndex); - numMissingParents = missingParentInfos.size(); // Prepare response omResponse.setCreateFileResponse(CreateFileResponse.newBuilder() .setKeyInfo(omKeyInfo.getNetworkProtobuf(getOmRequest().getVersion(), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequestWithFSO.java index 8fdecbc50565..caeb2b0a9223 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileCreateRequestWithFSO.java @@ -199,8 +199,10 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, long preAllocatedSpace = newLocationList.size() * ozoneManager.getScmBlockSize() * repConfig .getRequiredNodes(); - checkBucketQuotaInBytes(omBucketInfo, preAllocatedSpace); - checkBucketQuotaInNamespace(omBucketInfo, 1L); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + preAllocatedSpace); + checkBucketQuotaInNamespace(omBucketInfo, numKeysCreated + 1L); + omBucketInfo.incrUsedNamespace(numKeysCreated); // Add to cache entry can be done outside of lock for this openKey. // Even if bucket gets deleted, when commitKey we shall identify if diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequest.java index ecc2bf06c547..46a3c51ce1b6 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequest.java @@ -117,7 +117,8 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { ozoneManager.getBlockTokenSecretManager(), repConfig, excludeList, ozoneManager.getScmBlockSize(), ozoneManager.getScmBlockSize(), ozoneManager.getPreallocateBlocksMax(), - ozoneManager.isGrpcBlockTokenEnabled(), ozoneManager.getOMNodeId()); + ozoneManager.isGrpcBlockTokenEnabled(), ozoneManager.getOMNodeId(), + ozoneManager.getMetrics()); // Set modification time and normalize key if required. KeyArgs.Builder newKeyArgs = @@ -221,7 +222,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, long totalAllocatedSpace = QuotaUtil.getReplicatedSize( preAllocatedKeySize, repConfig) + QuotaUtil.getReplicatedSize( hadAllocatedKeySize, repConfig); - checkBucketQuotaInBytes(omBucketInfo, totalAllocatedSpace); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + totalAllocatedSpace); // Append new block openKeyInfo.appendNewBlocks(newLocationList, false); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequestWithFSO.java index 649593778bc5..f28cade18cb9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMAllocateBlockRequestWithFSO.java @@ -152,7 +152,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, long totalAllocatedSpace = QuotaUtil.getReplicatedSize( preAllocatedKeySize, repConfig) + QuotaUtil.getReplicatedSize( hadAllocatedKeySize, repConfig); - checkBucketQuotaInBytes(omBucketInfo, totalAllocatedSpace); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + totalAllocatedSpace); // Append new block openKeyInfo.appendNewBlocks(newLocationList, false); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java index d783064fc2d2..f319a5c9c8ef 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java @@ -75,10 +75,13 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, volumeName, bucketName); lockSet.add(volBucketPair); } + OmBucketInfo omBucketInfo = getBucketInfo(omMetadataManager, volumeName, bucketName); // bucketInfo can be null in case of delete volume or bucket - if (null != omBucketInfo) { + // or key does not belong to bucket as bucket is recreated + if (null != omBucketInfo + && omBucketInfo.getObjectID() == path.getBucketId()) { omBucketInfo.incrUsedNamespace(-1L); volBucketInfoMap.putIfAbsent(volBucketPair, omBucketInfo); } @@ -98,7 +101,9 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, OmBucketInfo omBucketInfo = getBucketInfo(omMetadataManager, volumeName, bucketName); // bucketInfo can be null in case of delete volume or bucket - if (null != omBucketInfo) { + // or key does not belong to bucket as bucket is recreated + if (null != omBucketInfo + && omBucketInfo.getObjectID() == path.getBucketId()) { omBucketInfo.incrUsedBytes(-sumBlockLengths(keyInfo)); omBucketInfo.incrUsedNamespace(-1L); volBucketInfoMap.putIfAbsent(volBucketPair, omBucketInfo); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequest.java index 02cbf83f78b0..cfd04fd36614 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequest.java @@ -235,10 +235,12 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, oldKeyVersionsToDelete = getOldVersionsToCleanUp(dbOzoneKey, keyToDelete, omMetadataManager, trxnLogIndex, ozoneManager.isRatisEnabled()); - checkBucketQuotaInBytes(omBucketInfo, correctedSpace); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + correctedSpace); } else { checkBucketQuotaInNamespace(omBucketInfo, 1L); - checkBucketQuotaInBytes(omBucketInfo, correctedSpace); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + correctedSpace); omBucketInfo.incrUsedNamespace(1L); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequestWithFSO.java index 5c7ac450b8f3..ee6e99c56dee 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequestWithFSO.java @@ -173,10 +173,12 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, oldKeyVersionsToDelete = getOldVersionsToCleanUp(dbFileKey, keyToDelete, omMetadataManager, trxnLogIndex, ozoneManager.isRatisEnabled()); - checkBucketQuotaInBytes(omBucketInfo, correctedSpace); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + correctedSpace); } else { checkBucketQuotaInNamespace(omBucketInfo, 1L); - checkBucketQuotaInBytes(omBucketInfo, correctedSpace); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + correctedSpace); omBucketInfo.incrUsedNamespace(1L); } @@ -193,7 +195,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, } // Add to cache of open key table and key table. - OMFileRequest.addOpenFileTableCacheEntry(omMetadataManager, dbFileKey, + OMFileRequest.addOpenFileTableCacheEntry(omMetadataManager, dbOpenFileKey, null, fileName, trxnLogIndex); OMFileRequest.addFileTableCacheEntry(omMetadataManager, dbFileKey, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequest.java index ced8b7c48998..eeb25fb5053b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequest.java @@ -150,7 +150,8 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { new ExcludeList(), requestedSize, scmBlockSize, ozoneManager.getPreallocateBlocksMax(), ozoneManager.isGrpcBlockTokenEnabled(), - ozoneManager.getOMNodeId()); + ozoneManager.getOMNodeId(), + ozoneManager.getMetrics()); newKeyArgs = keyArgs.toBuilder().setModificationTime(Time.now()) .setType(type).setFactor(factor) @@ -266,11 +267,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, .getAllParentInfo(ozoneManager, keyArgs, pathInfo.getMissingParents(), inheritAcls, trxnLogIndex); - // Add cache entries for the prefix directories. - // Skip adding for the file key itself, until Key Commit. - OMFileRequest.addKeyTableCacheEntries(omMetadataManager, volumeName, - bucketName, Optional.absent(), Optional.of(missingParentInfos), - trxnLogIndex); numMissingParents = missingParentInfos.size(); } @@ -309,8 +305,18 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, * ozoneManager.getScmBlockSize() * replicationConfig.getRequiredNodes(); // check bucket and volume quota - checkBucketQuotaInBytes(omBucketInfo, preAllocatedSpace); - checkBucketQuotaInNamespace(omBucketInfo, 1L); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + preAllocatedSpace); + checkBucketQuotaInNamespace(omBucketInfo, numMissingParents + 1L); + omBucketInfo.incrUsedNamespace(numMissingParents); + + if (numMissingParents > 0) { + // Add cache entries for the prefix directories. + // Skip adding for the file key itself, until Key Commit. + OMFileRequest.addKeyTableCacheEntries(omMetadataManager, volumeName, + bucketName, Optional.absent(), Optional.of(missingParentInfos), + trxnLogIndex); + } // Add to cache entry can be done outside of lock for this openKey. // Even if bucket gets deleted, when commitKey we shall identify if diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequestWithFSO.java index 09bc5e640bb2..e7bf882b7d1b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCreateRequestWithFSO.java @@ -185,8 +185,10 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, long preAllocatedSpace = newLocationList.size() * ozoneManager.getScmBlockSize() * repConfig .getRequiredNodes(); - checkBucketQuotaInBytes(omBucketInfo, preAllocatedSpace); - checkBucketQuotaInNamespace(omBucketInfo, 1L); + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + preAllocatedSpace); + checkBucketQuotaInNamespace(omBucketInfo, numKeysCreated + 1L); + omBucketInfo.incrUsedNamespace(numKeysCreated); // Add to cache entry can be done outside of lock for this openKey. // Even if bucket gets deleted, when commitKey we shall identify if diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java index 8c79e16dcd94..6f9b602bb748 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java @@ -39,6 +39,7 @@ import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.OzoneAcl; import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.PrefixManager; import org.apache.hadoop.ozone.om.ResolvedBucket; import org.apache.hadoop.ozone.om.helpers.BucketEncryptionKeyInfo; @@ -134,7 +135,8 @@ protected List< OmKeyLocationInfo > allocateBlock(ScmClient scmClient, OzoneBlockTokenSecretManager secretManager, ReplicationConfig replicationConfig, ExcludeList excludeList, long requestedSize, long scmBlockSize, int preallocateBlocksMax, - boolean grpcBlockTokenEnabled, String omID) throws IOException { + boolean grpcBlockTokenEnabled, String omID, OMMetrics omMetrics) + throws IOException { int dataGroupSize = replicationConfig instanceof ECReplicationConfig ? ((ECReplicationConfig) replicationConfig).getData() : 1; int numBlocks = (int) Math.min(preallocateBlocksMax, @@ -148,6 +150,7 @@ protected List< OmKeyLocationInfo > allocateBlock(ScmClient scmClient, .allocateBlock(scmBlockSize, numBlocks, replicationConfig, omID, excludeList); } catch (SCMException ex) { + omMetrics.incNumBlockAllocateCallFails(); if (ex.getResult() .equals(SCMException.ResultCodes.SAFE_MODE_EXCEPTION)) { throw new OMException(ex.getMessage(), @@ -517,18 +520,20 @@ protected FileEncryptionInfo getFileEncryptionInfo(KeyArgs keyArgs) { /** * Check bucket quota in bytes. + * @paran metadataManager * @param omBucketInfo * @param allocateSize * @throws IOException */ - protected void checkBucketQuotaInBytes(OmBucketInfo omBucketInfo, + protected void checkBucketQuotaInBytes( + OMMetadataManager metadataManager, OmBucketInfo omBucketInfo, long allocateSize) throws IOException { if (omBucketInfo.getQuotaInBytes() > OzoneConsts.QUOTA_RESET) { long usedBytes = omBucketInfo.getUsedBytes(); long quotaInBytes = omBucketInfo.getQuotaInBytes(); if (quotaInBytes - usedBytes < allocateSize) { throw new OMException("The DiskSpace quota of bucket:" - + omBucketInfo.getBucketName() + "exceeded: quotaInBytes: " + + omBucketInfo.getBucketName() + " exceeded quotaInBytes: " + quotaInBytes + " Bytes but diskspace consumed: " + (usedBytes + allocateSize) + " Bytes.", OMException.ResultCodes.QUOTA_EXCEEDED); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3InitiateMultipartUploadRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3InitiateMultipartUploadRequestWithFSO.java index 8a4e5c99e95b..a0b8b45da54e 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3InitiateMultipartUploadRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3InitiateMultipartUploadRequestWithFSO.java @@ -159,8 +159,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, // care of in the final complete multipart upload. AWS S3 behavior is // also like this, even when key exists in a bucket, user can still // initiate MPU. - final OmBucketInfo bucketInfo = omMetadataManager.getBucketTable() - .get(omMetadataManager.getBucketKey(volumeName, bucketName)); + final OmBucketInfo bucketInfo = getBucketInfo(omMetadataManager, + volumeName, bucketName); final ReplicationConfig replicationConfig = OzoneConfigUtil .resolveReplicationConfigPreference(keyArgs.getType(), keyArgs.getFactor(), keyArgs.getEcReplicationConfig(), @@ -193,6 +193,12 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, OMPBHelper.convert(keyArgs.getFileEncryptionInfo()) : null) .setParentObjectID(pathInfoFSO.getLastKnownParentId()) .build(); + + // validate and update namespace for missing parent directory + if (null != missingParentInfos) { + checkBucketQuotaInNamespace(bucketInfo, missingParentInfos.size()); + bucketInfo.incrUsedNamespace(missingParentInfos.size()); + } // Add cache entries for the prefix directories. // Skip adding for the file key itself, until Key Commit. @@ -218,7 +224,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, .setKeyName(keyName) .setMultipartUploadID(keyArgs.getMultipartUploadID())) .build(), multipartKeyInfo, omKeyInfo, multipartKey, - missingParentInfos, getBucketLayout(), volumeId, bucketId); + missingParentInfos, getBucketLayout(), volumeId, bucketId, + bucketInfo.copyObject()); result = Result.SUCCESS; } catch (IOException ex) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCommitPartRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCommitPartRequest.java index 0644e1f27447..fdf4269e9ca2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCommitPartRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCommitPartRequest.java @@ -229,8 +229,13 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, omBucketInfo = getBucketInfo(omMetadataManager, volumeName, bucketName); long correctedSpace = omKeyInfo.getReplicatedSize(); - // TODO: S3MultipartUpload did not check quota and did not add nameSpace, - // we need to fix these issues in HDDS-6650. + if (null != oldPartKeyInfo) { + OmKeyInfo partKeyToBeDeleted = + OmKeyInfo.getFromProtobuf(oldPartKeyInfo.getPartKeyInfo()); + correctedSpace -= partKeyToBeDeleted.getReplicatedSize(); + } + checkBucketQuotaInBytes(omMetadataManager, omBucketInfo, + correctedSpace); omBucketInfo.incrUsedBytes(correctedSpace); omResponse.setCommitMultiPartUploadResponse( diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCompleteRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCompleteRequest.java index 4a198531b28e..1c748930e8b4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCompleteRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/multipart/S3MultipartUploadCompleteRequest.java @@ -229,8 +229,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, oldKeyVersionsToDelete = getOldVersionsToCleanUp(dbOzoneKey, keyToDelete, omMetadataManager, trxnLogIndex, ozoneManager.isRatisEnabled()); - long numCopy = keyToDelete.getReplicationConfig().getRequiredNodes(); - usedBytesDiff -= keyToDelete.getDataSize() * numCopy; + usedBytesDiff -= keyToDelete.getReplicatedSize(); } else { checkBucketQuotaInNamespace(omBucketInfo, 1L); omBucketInfo.incrUsedNamespace(1L); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeSetQuotaRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeSetQuotaRequest.java index d8848fc116dd..c1bf5a1c3305 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeSetQuotaRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeSetQuotaRequest.java @@ -194,15 +194,31 @@ public boolean checkQuotaBytesValid(OMMetadataManager metadataManager, || volumeQuotaInBytes == 0) { return false; } + + // if volume quota is for reset, no need further check + if (volumeQuotaInBytes == OzoneConsts.QUOTA_RESET) { + return true; + } + boolean isBucketQuotaSet = true; List bucketList = metadataManager.listBuckets( volumeName, null, null, Integer.MAX_VALUE); for (OmBucketInfo bucketInfo : bucketList) { long nextQuotaInBytes = bucketInfo.getQuotaInBytes(); if (nextQuotaInBytes > OzoneConsts.QUOTA_RESET) { totalBucketQuota += nextQuotaInBytes; + } else { + isBucketQuotaSet = false; + break; } } + + if (!isBucketQuotaSet) { + throw new OMException("Can not set volume space quota on volume " + + "as some of buckets in this volume have no quota set.", + OMException.ResultCodes.QUOTA_ERROR); + } + if (volumeQuotaInBytes < totalBucketQuota && volumeQuotaInBytes != OzoneConsts.QUOTA_RESET) { throw new OMException("Total buckets quota in this volume " + diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMDirectoryCreateResponse.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMDirectoryCreateResponse.java index da770ede82c2..d484ce116e03 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMDirectoryCreateResponse.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMDirectoryCreateResponse.java @@ -20,6 +20,7 @@ import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.response.CleanupTableInfo; import org.apache.hadoop.ozone.om.request.file.OMDirectoryCreateRequest.Result; @@ -49,15 +50,17 @@ public class OMDirectoryCreateResponse extends OmKeyResponse { private OmKeyInfo dirKeyInfo; private List parentKeyInfos; private Result result; + private OmBucketInfo bucketInfo; public OMDirectoryCreateResponse(@Nonnull OMResponse omResponse, @Nonnull OmKeyInfo dirKeyInfo, @Nonnull List parentKeyInfos, @Nonnull Result result, - @Nonnull BucketLayout bucketLayout) { + @Nonnull BucketLayout bucketLayout, @Nonnull OmBucketInfo bucketInfo) { super(omResponse, bucketLayout); this.dirKeyInfo = dirKeyInfo; this.parentKeyInfos = parentKeyInfos; this.result = result; + this.bucketInfo = bucketInfo; } /** @@ -89,6 +92,10 @@ protected void addToDBBatch(OMMetadataManager omMetadataManager, dirKeyInfo.getBucketName(), dirKeyInfo.getKeyName()); omMetadataManager.getKeyTable(getBucketLayout()) .putWithBatch(batchOperation, dirKey, dirKeyInfo); + String bucketKey = omMetadataManager.getBucketKey( + bucketInfo.getVolumeName(), bucketInfo.getBucketName()); + omMetadataManager.getBucketTable().putWithBatch(batchOperation, + bucketKey, bucketInfo); } else if (Result.DIRECTORY_ALREADY_EXISTS == result) { // When directory already exists, we don't add it to cache. And it is // not an error, in this case dirKeyInfo will be null. diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMDirectoryCreateResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMDirectoryCreateResponseWithFSO.java index da095ab1b66a..1f39e382084c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMDirectoryCreateResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMDirectoryCreateResponseWithFSO.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.request.file.OMDirectoryCreateRequest.Result; import org.apache.hadoop.ozone.om.response.CleanupTableInfo; @@ -49,18 +50,21 @@ public class OMDirectoryCreateResponseWithFSO extends OmKeyResponse { private Result result; private long volumeId; private long bucketId; + private OmBucketInfo bucketInfo; + @SuppressWarnings("checkstyle:ParameterNumber") public OMDirectoryCreateResponseWithFSO(@Nonnull OMResponse omResponse, @Nonnull long volumeId, @Nonnull long bucketId, @Nonnull OmDirectoryInfo dirInfo, @Nonnull List pDirInfos, @Nonnull Result result, - @Nonnull BucketLayout bucketLayout) { + @Nonnull BucketLayout bucketLayout, @Nonnull OmBucketInfo bucketInfo) { super(omResponse, bucketLayout); this.dirInfo = dirInfo; this.parentDirInfos = pDirInfos; this.result = result; this.volumeId = volumeId; this.bucketId = bucketId; + this.bucketInfo = bucketInfo; } /** @@ -100,6 +104,10 @@ private void addToDirectoryTable(OMMetadataManager omMetadataManager, dirInfo.getParentObjectID(), dirInfo.getName()); omMetadataManager.getDirectoryTable().putWithBatch(batchOperation, dirKey, dirInfo); + String bucketKey = omMetadataManager.getBucketKey( + bucketInfo.getVolumeName(), bucketInfo.getBucketName()); + omMetadataManager.getBucketTable().putWithBatch(batchOperation, + bucketKey, bucketInfo); } else { // When directory already exists, we don't add it to cache. And it is // not an error, in this case dirKeyInfo will be null. diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMFileCreateResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMFileCreateResponseWithFSO.java index 2c635b7a3d7b..fd0d9957cadc 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMFileCreateResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/file/OMFileCreateResponseWithFSO.java @@ -87,6 +87,12 @@ public void addToDBBatch(OMMetadataManager omMetadataMgr, omMetadataMgr.getDirectoryTable().putWithBatch(batchOp, parentKey, parentDirInfo); } + + String bucketKey = omMetadataMgr.getBucketKey( + getOmBucketInfo().getVolumeName(), + getOmBucketInfo().getBucketName()); + omMetadataMgr.getBucketTable().putWithBatch(batchOp, + bucketKey, getOmBucketInfo()); } OMFileRequest.addToOpenFileTable(omMetadataMgr, batchOp, getOmKeyInfo(), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java index eed768fbb0f1..b89d56dc04ee 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java @@ -85,8 +85,10 @@ public void addToDBBatch(OMMetadataManager omMetadataManager, OmKeyInfo keyInfo = OmKeyInfo.getFromProtobuf(key); String ozoneDbKey = omMetadataManager.getOzonePathKey(volumeId, bucketId, keyInfo.getParentObjectID(), keyInfo.getFileName()); + String ozoneDeleteKey = omMetadataManager.getOzoneDeletePathKey( + key.getObjectID(), ozoneDbKey); omMetadataManager.getDeletedDirTable().putWithBatch(batchOperation, - ozoneDbKey, keyInfo); + ozoneDeleteKey, keyInfo); omMetadataManager.getDirectoryTable().deleteWithBatch(batchOperation, ozoneDbKey); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyCreateResponse.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyCreateResponse.java index 4a31b1661a41..962c5b82b8ff 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyCreateResponse.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyCreateResponse.java @@ -91,6 +91,13 @@ public void addToDBBatch(OMMetadataManager omMetadataManager, omMetadataManager.getKeyTable(getBucketLayout()) .putWithBatch(batchOperation, parentKey, parentKeyInfo); } + + // namespace quota changes for parent directory + String bucketKey = omMetadataManager.getBucketKey( + getOmBucketInfo().getVolumeName(), + getOmBucketInfo().getBucketName()); + omMetadataManager.getBucketTable().putWithBatch(batchOperation, + bucketKey, getOmBucketInfo()); } String openKey = omMetadataManager.getOpenKey(omKeyInfo.getVolumeName(), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyDeleteResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyDeleteResponseWithFSO.java index 22f1702836a4..e18bcaae1fe8 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyDeleteResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyDeleteResponseWithFSO.java @@ -84,8 +84,10 @@ public void addToDBBatch(OMMetadataManager omMetadataManager, // Sets full absolute key name to OmKeyInfo, which is // required for moving the sub-files to KeyDeletionService. omKeyInfo.setKeyName(keyName); + String ozoneDeleteKey = omMetadataManager.getOzoneDeletePathKey( + omKeyInfo.getObjectID(), ozoneDbKey); omMetadataManager.getDeletedDirTable().putWithBatch( - batchOperation, ozoneDbKey, omKeyInfo); + batchOperation, ozoneDeleteKey, omKeyInfo); } else { Table keyTable = omMetadataManager.getKeyTable(getBucketLayout()); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeysDeleteResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeysDeleteResponseWithFSO.java index d0d98e1f549c..612fa3661836 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeysDeleteResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeysDeleteResponseWithFSO.java @@ -70,8 +70,10 @@ public void addToDBBatch(OMMetadataManager omMetadataManager, omKeyInfo.getParentObjectID(), omKeyInfo.getFileName()); omMetadataManager.getDirectoryTable().deleteWithBatch(batchOperation, ozoneDbKey); + String ozoneDeleteKey = omMetadataManager.getOzoneDeletePathKey( + omKeyInfo.getObjectID(), ozoneDbKey); omMetadataManager.getDeletedDirTable().putWithBatch( - batchOperation, ozoneDbKey, omKeyInfo); + batchOperation, ozoneDeleteKey, omKeyInfo); } // remove keys from FileTable and add to DeletedTable diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/multipart/S3InitiateMultipartUploadResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/multipart/S3InitiateMultipartUploadResponseWithFSO.java index 25506e024a82..a380980dfa34 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/multipart/S3InitiateMultipartUploadResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/multipart/S3InitiateMultipartUploadResponseWithFSO.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmMultipartKeyInfo; @@ -48,6 +49,7 @@ public class S3InitiateMultipartUploadResponseWithFSO extends private String mpuDBKey; private long volumeId; private long bucketId; + private OmBucketInfo omBucketInfo; @SuppressWarnings("parameternumber") public S3InitiateMultipartUploadResponseWithFSO( @@ -56,12 +58,13 @@ public S3InitiateMultipartUploadResponseWithFSO( @Nonnull OmKeyInfo omKeyInfo, @Nonnull String mpuDBKey, @Nonnull List parentDirInfos, @Nonnull BucketLayout bucketLayout, @Nonnull long volumeId, - @Nonnull long bucketId) { + @Nonnull long bucketId, @Nonnull OmBucketInfo omBucketInfo) { super(omResponse, omMultipartKeyInfo, omKeyInfo, bucketLayout); this.parentDirInfos = parentDirInfos; this.mpuDBKey = mpuDBKey; this.volumeId = volumeId; this.bucketId = bucketId; + this.omBucketInfo = omBucketInfo; } /** @@ -89,6 +92,13 @@ public void addToDBBatch(OMMetadataManager omMetadataManager, omMetadataManager.getDirectoryTable().putWithBatch(batchOperation, parentKey, parentDirInfo); } + + // namespace quota changes for parent directory + String bucketKey = omMetadataManager.getBucketKey( + omBucketInfo.getVolumeName(), + omBucketInfo.getBucketName()); + omMetadataManager.getBucketTable().putWithBatch(batchOperation, + bucketKey, omBucketInfo); } OMFileRequest.addToOpenFileTable(omMetadataManager, batchOperation, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 834361bbfe53..3a633f255119 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -18,6 +18,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ServiceException; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.BackgroundService; import org.apache.hadoop.hdds.utils.BackgroundTask; @@ -27,6 +28,7 @@ import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.ClientVersion; +import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; @@ -148,61 +150,54 @@ public BackgroundTaskResult call() throws Exception { deleteTableIterator = ozoneManager.getMetadataManager(). getDeletedDirTable().iterator()) { + List> allSubDirList + = new ArrayList<>((int) remainNum); long startTime = Time.monotonicNow(); while (remainNum > 0 && deleteTableIterator.hasNext()) { pendingDeletedDirInfo = deleteTableIterator.next(); - // step-0: Get one pending deleted directory - if (LOG.isDebugEnabled()) { - LOG.debug("Pending deleted dir name: {}", - pendingDeletedDirInfo.getValue().getKeyName()); - } - final String[] keys = pendingDeletedDirInfo.getKey() - .split(OM_KEY_PREFIX); - final long volumeId = Long.parseLong(keys[1]); - final long bucketId = Long.parseLong(keys[2]); - - // step-1: get all sub directories under the deletedDir - List subDirs = ozoneManager.getKeyManager() - .getPendingDeletionSubDirs(volumeId, bucketId, - pendingDeletedDirInfo.getValue(), remainNum); - remainNum = remainNum - subDirs.size(); - - if (LOG.isDebugEnabled()) { - for (OmKeyInfo dirInfo : subDirs) { - LOG.debug("Moved sub dir name: {}", dirInfo.getKeyName()); - } - } - - // step-2: get all sub files under the deletedDir - List subFiles = ozoneManager.getKeyManager() - .getPendingDeletionSubFiles(volumeId, bucketId, - pendingDeletedDirInfo.getValue(), remainNum); - remainNum = remainNum - subFiles.size(); - - if (LOG.isDebugEnabled()) { - for (OmKeyInfo fileInfo : subFiles) { - LOG.debug("Moved sub file name: {}", fileInfo.getKeyName()); - } - } - - // step-3: Since there is a boundary condition of 'numEntries' in - // each batch, check whether the sub paths count reached batch size - // limit. If count reached limit then there can be some more child - // paths to be visited and will keep the parent deleted directory - // for one more pass. - String purgeDeletedDir = remainNum > 0 ? - pendingDeletedDirInfo.getKey() : null; - PurgePathRequest request = wrapPurgeRequest(volumeId, bucketId, - purgeDeletedDir, subFiles, subDirs); + PurgePathRequest request = prepareDeleteDirRequest( + remainNum, pendingDeletedDirInfo.getValue(), + pendingDeletedDirInfo.getKey(), allSubDirList); purgePathRequestList.add(request); - + remainNum = remainNum - request.getDeletedSubFilesCount(); + remainNum = remainNum - request.getMarkDeletedSubDirsCount(); // Count up the purgeDeletedDir, subDirs and subFiles - if (purgeDeletedDir != null) { + if (request.getDeletedDir() != null + && !request.getDeletedDir().isEmpty()) { dirNum++; } - subDirNum += subDirs.size(); - subFileNum += subFiles.size(); + subDirNum += request.getMarkDeletedSubDirsCount(); + subFileNum += request.getDeletedSubFilesCount(); + } + + // Optimization to handle delete sub-dir and keys to remove quickly + // This case will be useful to handle when depth of directory is high + int subdirDelNum = 0; + int subDirRecursiveCnt = 0; + while (remainNum > 0 && subDirRecursiveCnt < allSubDirList.size()) { + try { + Pair stringOmKeyInfoPair + = allSubDirList.get(subDirRecursiveCnt); + PurgePathRequest request = prepareDeleteDirRequest( + remainNum, stringOmKeyInfoPair.getValue(), + stringOmKeyInfoPair.getKey(), allSubDirList); + purgePathRequestList.add(request); + remainNum = remainNum - request.getDeletedSubFilesCount(); + remainNum = remainNum - request.getMarkDeletedSubDirsCount(); + // Count up the purgeDeletedDir, subDirs and subFiles + if (request.getDeletedDir() != null + && !request.getDeletedDir().isEmpty()) { + subdirDelNum++; + } + subDirNum += request.getMarkDeletedSubDirsCount(); + subFileNum += request.getDeletedSubFilesCount(); + subDirRecursiveCnt++; + } catch (IOException e) { + LOG.error("Error while running delete directories and files " + + "background task. Will retry at next run for subset.", e); + break; + } } // TODO: need to handle delete with non-ratis @@ -211,14 +206,15 @@ public BackgroundTaskResult call() throws Exception { } if (dirNum != 0 || subDirNum != 0 || subFileNum != 0) { - deletedDirsCount.addAndGet(dirNum); - movedDirsCount.addAndGet(subDirNum); + deletedDirsCount.addAndGet(dirNum + subdirDelNum); + movedDirsCount.addAndGet(subDirNum - subdirDelNum); movedFilesCount.addAndGet(subFileNum); - LOG.info("Number of dirs deleted: {}, Number of sub-files moved:" + + LOG.info("Number of dirs deleted: {}, Number of sub-dir " + + "deleted: {}, Number of sub-files moved:" + " {} to DeletedTable, Number of sub-dirs moved {} to " + "DeletedDirectoryTable, iteration elapsed: {}ms," + " totalRunCount: {}", - dirNum, subFileNum, subDirNum, + dirNum, subdirDelNum, subFileNum, (subDirNum - subdirDelNum), Time.monotonicNow() - startTime, getRunCount()); } @@ -232,6 +228,57 @@ public BackgroundTaskResult call() throws Exception { return BackgroundTaskResult.EmptyTaskResult.newResult(); } } + + private PurgePathRequest prepareDeleteDirRequest( + long remainNum, OmKeyInfo pendingDeletedDirInfo, String delDirName, + List> subDirList) throws IOException { + // step-0: Get one pending deleted directory + if (LOG.isDebugEnabled()) { + LOG.debug("Pending deleted dir name: {}", + pendingDeletedDirInfo.getKeyName()); + } + + final String[] keys = delDirName.split(OM_KEY_PREFIX); + final long volumeId = Long.parseLong(keys[1]); + final long bucketId = Long.parseLong(keys[2]); + + // step-1: get all sub directories under the deletedDir + List subDirs = ozoneManager.getKeyManager() + .getPendingDeletionSubDirs(volumeId, bucketId, + pendingDeletedDirInfo, remainNum); + remainNum = remainNum - subDirs.size(); + + OMMetadataManager omMetadataManager = ozoneManager.getMetadataManager(); + for (OmKeyInfo dirInfo : subDirs) { + String ozoneDbKey = omMetadataManager.getOzonePathKey(volumeId, + bucketId, dirInfo.getParentObjectID(), dirInfo.getFileName()); + String ozoneDeleteKey = omMetadataManager.getOzoneDeletePathKey( + dirInfo.getObjectID(), ozoneDbKey); + subDirList.add(Pair.of(ozoneDeleteKey, dirInfo)); + LOG.debug("Moved sub dir name: {}", dirInfo.getKeyName()); + } + + // step-2: get all sub files under the deletedDir + List subFiles = ozoneManager.getKeyManager() + .getPendingDeletionSubFiles(volumeId, bucketId, + pendingDeletedDirInfo, remainNum); + remainNum = remainNum - subFiles.size(); + + if (LOG.isDebugEnabled()) { + for (OmKeyInfo fileInfo : subFiles) { + LOG.debug("Moved sub file name: {}", fileInfo.getKeyName()); + } + } + + // step-3: Since there is a boundary condition of 'numEntries' in + // each batch, check whether the sub paths count reached batch size + // limit. If count reached limit then there can be some more child + // paths to be visited and will keep the parent deleted directory + // for one more pass. + String purgeDeletedDir = remainNum > 0 ? delDirName : null; + return wrapPurgeRequest(volumeId, bucketId, + purgeDeletedDir, subFiles, subDirs); + } /** * Returns the number of dirs deleted by the background service. diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java index e4e74b006abd..9f313992304d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java @@ -18,12 +18,12 @@ package org.apache.hadoop.ozone.protocolPB; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.stream.Collectors; +import com.google.protobuf.ByteString; import com.google.protobuf.ServiceException; import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -67,6 +67,8 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.CheckVolumeAccessRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.CheckVolumeAccessResponse; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.EchoRPCRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.EchoRPCResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.FinalizeUpgradeProgressRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.FinalizeUpgradeProgressResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.GetFileStatusRequest; @@ -284,6 +286,10 @@ public OMResponse handleReadRequest(OMRequest request) { responseBuilder.setGetKeyInfoResponse( getKeyInfo(request.getGetKeyInfoRequest(), request.getVersion())); break; + case EchoRPC: + EchoRPCResponse echoRPCResponse = + echoRPC(request.getEchoRPCRequest()); + responseBuilder.setEchoRPCResponse(echoRPCResponse); default: responseBuilder.setSuccess(false); responseBuilder.setMessage("Unrecognized Command Type: " + cmdType); @@ -946,30 +952,6 @@ public static OMResponse disallowGetFileStatusWithECReplicationConfig( return resp; } - @RequestFeatureValidator( - conditions = ValidationCondition.OLDER_CLIENT_REQUESTS, - processingPhase = RequestProcessingPhase.POST_PROCESS, - requestType = Type.EchoRPC - ) - public static OMResponse echoRPC( - OMRequest req, OMResponse resp, ValidationContext ctx) - throws ServiceException { - if (!resp.hasEchoRPCResponse()) { - return resp; - } - byte[] payloadBytes = new byte[0]; - int payloadRespSize = Math.min( - req.getEchoRPCRequest().getPayloadSizeResp() - * RPC_PAYLOAD_MULTIPLICATION_FACTOR, MAX_SIZE_KB); - if (payloadRespSize > 0) { - payloadBytes = RandomUtils.nextBytes(payloadRespSize); - } - resp = resp.toBuilder() - .setMessage(new String(payloadBytes, StandardCharsets.UTF_8)) - .clearEchoRPCResponse() - .build(); - return resp; - } @RequestFeatureValidator( conditions = ValidationCondition.OLDER_CLIENT_REQUESTS, @@ -1218,4 +1200,20 @@ private GetS3VolumeContextResponse getS3VolumeContext() public OzoneManager getOzoneManager() { return impl; } + + private EchoRPCResponse echoRPC(EchoRPCRequest req) { + EchoRPCResponse.Builder builder = + EchoRPCResponse.newBuilder(); + + byte[] payloadBytes = new byte[0]; + int payloadRespSize = Math.min( + req.getPayloadSizeResp() + * RPC_PAYLOAD_MULTIPLICATION_FACTOR, MAX_SIZE_KB); + if (payloadRespSize > 0) { + payloadBytes = RandomUtils.nextBytes(payloadRespSize); + } + builder.setPayload(ByteString.copyFrom(payloadBytes)); + return builder.build(); + } + } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java new file mode 100644 index 000000000000..ceb449b8fb18 --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.ozone.security; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto; +import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.client.CommonCertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest; +import org.apache.hadoop.hdds.security.x509.exceptions.CertificateException; +import org.apache.hadoop.ozone.om.OMStorage; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.ha.OMHANodeDetails; +import org.apache.hadoop.security.UserGroupInformation; +import org.bouncycastle.pkcs.PKCS10CertificationRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.security.KeyPair; +import java.util.function.Consumer; + +import static org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString; + +/** + * Certificate client for OzoneManager. + */ +public class OMCertificateClient extends CommonCertificateClient { + + public static final Logger LOG = + LoggerFactory.getLogger(OMCertificateClient.class); + + public static final String COMPONENT_NAME = "om"; + private String scmID; + private final String clusterID; + private final HddsProtos.OzoneManagerDetailsProto omInfo; + + @SuppressWarnings("parameternumber") + public OMCertificateClient(SecurityConfig secConfig, String scmId, + String clusterId, HddsProtos.OzoneManagerDetailsProto omDetails, + String certSerialId, String localCrlId, + Consumer persistCertIdCallback, Runnable shutdownCallback) { + super(secConfig, LOG, certSerialId, COMPONENT_NAME, persistCertIdCallback, + shutdownCallback); + this.setLocalCrlId(localCrlId != null ? + Long.parseLong(localCrlId) : 0); + this.scmID = scmId; + this.clusterID = clusterId; + this.omInfo = omDetails; + } + + public OMCertificateClient(SecurityConfig secConfig, + OMStorage omStorage, String scmID, Consumer saveCertIdCallback, + Runnable shutdownCallback) { + this(secConfig, scmID, omStorage.getClusterID(), + OzoneManager.getOmDetailsProto( + (OzoneConfiguration) secConfig.getConfiguration(), + omStorage.getOmId()), + omStorage.getOmCertSerialId(), null, + saveCertIdCallback, shutdownCallback); + } + + public OMCertificateClient(SecurityConfig secConfig, OMStorage omStorage, + String scmID) { + this(secConfig, scmID, omStorage.getClusterID(), + OzoneManager.getOmDetailsProto( + (OzoneConfiguration) secConfig.getConfiguration(), + omStorage.getOmId()), + omStorage.getOmCertSerialId(), null, null, null); + } + + public OMCertificateClient(SecurityConfig secConfig) { + this(secConfig, null, null, null, null, null, null, null); + } + + public OMCertificateClient(SecurityConfig secConfig, String certSerialId) { + this(secConfig, null, null, null, certSerialId, null, null, null); + } + + /** + * Returns a CSR builder that can be used to create a Certificate signing + * request. + * The default flag is added to allow basic SSL handshake. + * + * @return CertificateSignRequest.Builder + */ + @Override + public CertificateSignRequest.Builder getCSRBuilder() + throws CertificateException { + return getCSRBuilder(new KeyPair(getPublicKey(), getPrivateKey())); + } + + /** + * Returns a CSR builder that can be used to create a Certificate sigining + * request. + * + * @return CertificateSignRequest.Builder + */ + @Override + public CertificateSignRequest.Builder getCSRBuilder(KeyPair keyPair) + throws CertificateException { + CertificateSignRequest.Builder builder = super.getCSRBuilder() + .setDigitalEncryption(true) + .setDigitalSignature(true); + + String hostname = omInfo.getHostName(); + String subject; + if (builder.hasDnsName()) { + try { + subject = UserGroupInformation.getCurrentUser().getShortUserName() + + "@" + hostname; + } catch (IOException e) { + throw new CertificateException("Failed to getCurrentUser", e); + } + } else { + // With only IP in alt.name, certificate validation would fail if subject + // isn't a hostname either, so omit username. + subject = hostname; + } + + builder.setCA(false) + .setKey(keyPair) + .setConfiguration(getConfig()) + .setScmID(scmID) + .setClusterID(clusterID) + .setSubject(subject); + + OMHANodeDetails haOMHANodeDetails = + OMHANodeDetails.loadOMHAConfig(getConfig()); + String serviceName = + haOMHANodeDetails.getLocalNodeDetails().getServiceId(); + if (!StringUtils.isEmpty(serviceName)) { + builder.addServiceName(serviceName); + } + + LOG.info("Creating csr for OM->dns:{},ip:{},scmId:{},clusterId:{}," + + "subject:{}", hostname, omInfo.getIpAddress(), scmID, clusterID, + subject); + return builder; + } + + @Override + public String signAndStoreCertificate(PKCS10CertificationRequest request, + Path certPath) throws CertificateException { + try { + SCMGetCertResponseProto response = getScmSecureClient() + .getOMCertChain(omInfo, getEncodedString(request)); + + String pemEncodedCert = response.getX509Certificate(); + CertificateCodec certCodec = new CertificateCodec( + getSecurityConfig(), certPath); + + // Store SCM CA certificate. + if (response.hasX509CACertificate()) { + String pemEncodedRootCert = response.getX509CACertificate(); + storeCertificate(pemEncodedRootCert, + true, true, false, certCodec, false); + storeCertificate(pemEncodedCert, true, false, false, certCodec, false); + + // Store Root CA certificate if available. + if (response.hasX509RootCACertificate()) { + storeCertificate(response.getX509RootCACertificate(), + true, false, true, certCodec, false); + } + return CertificateCodec.getX509Certificate(pemEncodedCert) + .getSerialNumber().toString(); + } else { + throw new CertificateException("Unable to retrieve OM certificate " + + "chain."); + } + } catch (IOException | java.security.cert.CertificateException e) { + LOG.error("Error while signing and storing SCM signed certificate.", e); + throw new CertificateException( + "Error while signing and storing SCM signed certificate.", e); + } + } + + @Override + public Logger getLogger() { + return LOG; + } +} diff --git a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/om-overview.html b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/om-overview.html index 8a54aa0adc55..3e2eb1709971 100644 --- a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/om-overview.html +++ b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/om-overview.html @@ -22,6 +22,10 @@

Status

Rpc port {{$ctrl.overview.jmx.RpcPort}} + + OM Id + {{$ctrl.role.Id}} + OM Roles (HA) {{$ctrl.overview.jmx.RatisRoles}} diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestChunkStreams.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestChunkStreams.java index cbde28b02a0f..cba5ee1c95cf 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestChunkStreams.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestChunkStreams.java @@ -19,12 +19,14 @@ import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.scm.storage.BlockInputStream; import org.apache.hadoop.ozone.client.io.KeyInputStream; +import org.jetbrains.annotations.NotNull; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import java.io.ByteArrayInputStream; -import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; @@ -39,51 +41,9 @@ public class TestChunkStreams { @Test public void testReadGroupInputStream() throws Exception { - try (KeyInputStream groupInputStream = new KeyInputStream()) { - - String dataString = RandomStringUtils.randomAscii(500); - byte[] buf = dataString.getBytes(UTF_8); - int offset = 0; - for (int i = 0; i < 5; i++) { - int tempOffset = offset; - BlockInputStream in = - new BlockInputStream(null, 100, null, null, true, null) { - private long pos = 0; - private ByteArrayInputStream in = - new ByteArrayInputStream(buf, tempOffset, 100); - - @Override - public synchronized void seek(long pos) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public synchronized long getPos() { - return pos; - } - - @Override - public boolean seekToNewSource(long targetPos) - throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public synchronized int read() throws IOException { - return in.read(); - } - - @Override - public synchronized int read(byte[] b, int off, int len) - throws IOException { - int readLen = in.read(b, off, len); - pos += readLen; - return readLen; - } - }; - offset += 100; - groupInputStream.addStream(in); - } + String dataString = RandomStringUtils.randomAscii(500); + try (KeyInputStream groupInputStream = + new KeyInputStream("key", createInputStreams(dataString))) { byte[] resBuf = new byte[500]; int len = groupInputStream.read(resBuf, 0, 500); @@ -95,52 +55,9 @@ public synchronized int read(byte[] b, int off, int len) @Test public void testErrorReadGroupInputStream() throws Exception { - try (KeyInputStream groupInputStream = new KeyInputStream()) { - - String dataString = RandomStringUtils.randomAscii(500); - byte[] buf = dataString.getBytes(UTF_8); - int offset = 0; - for (int i = 0; i < 5; i++) { - int tempOffset = offset; - BlockInputStream in = - new BlockInputStream(null, 100, null, null, true, null) { - private long pos = 0; - private ByteArrayInputStream in = - new ByteArrayInputStream(buf, tempOffset, 100); - - @Override - public synchronized void seek(long pos) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public synchronized long getPos() { - return pos; - } - - @Override - public synchronized boolean seekToNewSource(long targetPos) - throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public synchronized int read() throws IOException { - return in.read(); - } - - @Override - public synchronized int read(byte[] b, int off, int len) - throws IOException { - int readLen = in.read(b, off, len); - pos += readLen; - return readLen; - } - }; - offset += 100; - groupInputStream.addStream(in); - } - + String dataString = RandomStringUtils.randomAscii(500); + try (KeyInputStream groupInputStream = + new KeyInputStream("key", createInputStreams(dataString))) { byte[] resBuf = new byte[600]; // read 300 bytes first int len = groupInputStream.read(resBuf, 0, 340); @@ -163,4 +80,53 @@ public synchronized int read(byte[] b, int off, int len) assertEquals(-1, len); } } + + @NotNull + private List createInputStreams(String dataString) { + byte[] buf = dataString.getBytes(UTF_8); + List streams = new ArrayList<>(); + int offset = 0; + for (int i = 0; i < 5; i++) { + BlockInputStream in = createStream(buf, offset); + offset += 100; + streams.add(in); + } + return streams; + } + + private BlockInputStream createStream(byte[] buf, int offset) { + return new BlockInputStream(null, 100, null, null, true, null) { + private long pos; + private final ByteArrayInputStream in = + new ByteArrayInputStream(buf, offset, 100); + + @Override + public synchronized void seek(long pos) { + throw new UnsupportedOperationException(); + } + + @Override + public synchronized long getPos() { + return pos; + } + + @Override + public synchronized boolean seekToNewSource(long targetPos) { + throw new UnsupportedOperationException(); + } + + @Override + public synchronized int read() { + return in.read(); + } + + @Override + public synchronized int read(byte[] b, int off, int len) { + int readLen = in.read(b, off, len); + pos += readLen; + return readLen; + } + }; + + } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOMStorage.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOMStorage.java index b87dd589c81e..49e3f99f68ba 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOMStorage.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOMStorage.java @@ -17,76 +17,311 @@ package org.apache.hadoop.ozone.om; import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; import org.apache.hadoop.hdds.HddsConfigKeys; -import org.apache.hadoop.hdds.conf.MutableConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.ozone.test.GenericTestUtils; - -import org.apache.commons.io.FileUtils; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import org.apache.hadoop.ozone.om.upgrade.OMLayoutVersionManager; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; + +import static org.apache.hadoop.ozone.common.Storage.StorageState.INITIALIZED; +import static org.apache.hadoop.ozone.om.OMStorage.ERROR_OM_IS_ALREADY_INITIALIZED; +import static org.apache.hadoop.ozone.om.OMStorage.ERROR_UNEXPECTED_OM_NODE_ID_TEMPLATE; +import static org.apache.hadoop.ozone.om.OMStorage.OM_CERT_SERIAL_ID; +import static org.apache.hadoop.ozone.om.OMStorage.OM_ID; +import static org.apache.hadoop.ozone.om.OMStorage.OM_NODE_ID; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; +import static org.junit.Assert.fail; /** * Testing OMStorage class. + * Assumptions tested: + * 1. certificate serial ID can be set and unset anytime. + * 2. OmId the UUID of the Ozone Manager can be set only when the OMStorage + * is not initialized already. Once initialized, setting OmId throws + * IOException + * 3. OmNodeId: + * 3.1. can be set when the storage is not initialized, once initialize, + * setting OmNodeId throws IOException + * 3.2. verifying the OmNodeId is possible once the storage is initialized, + * until it is not initialized, verification throws IOException + * 3.3. verifying the OmNodeId does not do anything if the provided value is + * equal to the stored value, throws an IOException otherwise + * 4. Configuration parsing: + * 4.1. getOmDbDir returns the configured + * {@link OMConfigKeys#OZONE_OM_DB_DIRS} value + * 4.2. getOmDbDir falls back to {@link HddsConfigKeys#OZONE_METADATA_DIRS} + * when {@link OMConfigKeys#OZONE_OM_DB_DIRS} is not set + * 4.3. getOmDbDir throws exception if none of the above properties are set + * 5. the protected getNodeProperties method properly returns all the keys + * that are set properly in the OMStorage object. */ public class TestOMStorage { @Rule public ExpectedException thrown = ExpectedException.none(); - /** - * Test {@link OMStorage#getOmDbDir}. - */ + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + private static final String OM_ID_STR = new UUID(1L, 1L).toString(); + @Test - public void testGetOmDbDir() { - final File testDir = createTestDir(); + public void testGetOmDbDir() throws Exception { + final File testDir = tmpFolder.newFolder(); final File dbDir = new File(testDir, "omDbDir"); - final File metaDir = new File(testDir, "metaDir"); // should be ignored. - final MutableConfigurationSource conf = new OzoneConfiguration(); - conf.set(OMConfigKeys.OZONE_OM_DB_DIRS, dbDir.getPath()); - conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDir.getPath()); + final File metaDir = new File(testDir, "metaDir"); + OzoneConfiguration conf = confWithHDDSMetaAndOMDBDir(metaDir, dbDir); + assertThat(dbDir, equalTo(OMStorage.getOmDbDir(conf))); + assertThat(dbDir.exists(), is(true)); + assertThat(metaDir.exists(), is(false)); + } + + @Test + public void testGetOmDbDirWithFallback() throws Exception { + File metaDir = tmpFolder.newFolder(); + OzoneConfiguration conf = confWithHDDSMetadataDir(metaDir); + + assertThat(metaDir, equalTo(OMStorage.getOmDbDir(conf))); + assertThat(metaDir.exists(), is(true)); + } + + @Test + public void testNoOmDbDirConfigured() { + thrown.expect(IllegalArgumentException.class); + OMStorage.getOmDbDir(new OzoneConfiguration()); + } + + @Test + public void testSetOmIdOnNotInitializedStorage() throws Exception { + OMStorage storage = new OMStorage(configWithOMDBDir()); + assertThat(storage.getState(), is(not(INITIALIZED))); + + String omId = "omId"; try { - assertEquals(dbDir, OMStorage.getOmDbDir(conf)); - assertTrue(dbDir.exists()); // should have been created. - } finally { - FileUtils.deleteQuietly(dbDir); + storage.setOmId(omId); + } catch (IOException e) { + fail("Can not set OmId on a Storage that is not initialized."); } + assertThat(storage.getOmId(), is(omId)); + assertGetNodeProperties(storage, omId); } - /** - * Test {@link OMStorage#getOmDbDir} with fallback to OZONE_METADATA_DIRS - * when OZONE_OM_DB_DIRS is undefined. - */ @Test - public void testGetOmDbDirWithFallback() { - final File testDir = createTestDir(); - final File metaDir = new File(testDir, "metaDir"); - final MutableConfigurationSource conf = new OzoneConfiguration(); - conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDir.getPath()); + public void testSetOmIdOnInitializedStorage() throws Exception { + OzoneConfiguration conf = configWithOMDBDir(); + setupAPersistedVersionFile(conf); + thrown.expect(IOException.class); + thrown.expectMessage(ERROR_OM_IS_ALREADY_INITIALIZED); + OMStorage storage = new OMStorage(conf); + storage.setOmId("omId"); + } + + @Test + public void testCertSerialIdOperations() throws Exception { + OzoneConfiguration conf = configWithOMDBDir(); + OMStorage storage = new OMStorage(conf); + + assertThat(storage.getState(), is(not(INITIALIZED))); + assertCertOps(storage); + storage.initialize(); + storage.persistCurrentState(); + + storage = new OMStorage(conf); + assertThat(storage.getState(), is(INITIALIZED)); + assertCertOps(storage); + } + + @Test + public void testSetOmNodeIdOnNotInitializedStorage() throws Exception { + OMStorage storage = new OMStorage(configWithOMDBDir()); + assertThat(storage.getState(), is(not(INITIALIZED))); + + String nodeId = "nodeId"; try { - assertEquals(metaDir, OMStorage.getOmDbDir(conf)); - assertTrue(metaDir.exists()); // should have been created. - } finally { - FileUtils.deleteQuietly(metaDir); + storage.setOmNodeId(nodeId); + } catch (IOException e) { + fail("Can not set OmNodeId on a Storage that is not initialized."); } + assertThat(storage.getOmNodeId(), is(nodeId)); + assertGetNodeProperties(storage, null, nodeId); } @Test - public void testNoOmDbDirConfigured() { - thrown.expect(IllegalArgumentException.class); - OMStorage.getOmDbDir(new OzoneConfiguration()); + public void testSetOMNodeIdOnInitializedStorageWithoutNodeID() + throws Exception { + OzoneConfiguration conf = configWithOMDBDir(); + setupAPersistedVersionFile(conf); + thrown.expect(IOException.class); + thrown.expectMessage(ERROR_OM_IS_ALREADY_INITIALIZED); + + OMStorage storage = new OMStorage(conf); + storage.setOmNodeId("nodeId"); } - public File createTestDir() { - File dir = new File(GenericTestUtils.getRandomizedTestDir(), - TestOMStorage.class.getSimpleName()); - dir.mkdirs(); - return dir; + @Test + public void testSetOMNodeIdOnInitializedStorageWithNodeID() throws Exception { + OzoneConfiguration conf = configWithOMDBDir(); + setupAPersistedVersionFileWithNodeId(conf, "nodeId"); + thrown.expect(IOException.class); + thrown.expectMessage(ERROR_OM_IS_ALREADY_INITIALIZED); + + OMStorage storage = new OMStorage(conf); + storage.setOmNodeId("nodeId"); } + + @Test + public void testValidateOrPersistOmNodeIdPersistsNewlySetValue() + throws Exception { + String nodeId = "nodeId"; + OzoneConfiguration conf = configWithOMDBDir(); + setupAPersistedVersionFile(conf); + + OMStorage storage = new OMStorage(conf); + assertThat(storage.getState(), is(INITIALIZED)); + assertThat(storage.getOmNodeId(), is(nullValue())); + + storage.validateOrPersistOmNodeId(nodeId); + assertThat(storage.getOmNodeId(), is(nodeId)); + assertGetNodeProperties(storage, OM_ID_STR, nodeId); + + storage = new OMStorage(conf); + assertThat(storage.getOmNodeId(), is(nodeId)); + assertGetNodeProperties(storage, OM_ID_STR, nodeId); + } + + @Test + public void testValidateOrPersistOmNodeIdDoesRunWithSameNodeIdAsInFile() + throws Exception { + String nodeId = "nodeId"; + OzoneConfiguration conf = configWithOMDBDir(); + setupAPersistedVersionFileWithNodeId(conf, nodeId); + + OMStorage storage = new OMStorage(conf); + assertThat(storage.getState(), is(INITIALIZED)); + assertThat(storage.getOmNodeId(), is(nodeId)); + assertGetNodeProperties(storage, OM_ID_STR, nodeId); + + storage.validateOrPersistOmNodeId(nodeId); + + assertThat(storage.getOmNodeId(), is(nodeId)); + assertGetNodeProperties(storage, OM_ID_STR, nodeId); + } + + @Test + public void testValidateOrPersistOmNodeIdThrowsWithDifferentNodeIdAsInFile() + throws Exception { + String nodeId = "nodeId"; + String newId = "newId"; + OzoneConfiguration conf = configWithOMDBDir(); + setupAPersistedVersionFileWithNodeId(conf, nodeId); + + OMStorage storage = new OMStorage(conf); + assertThat(storage.getState(), is(INITIALIZED)); + assertThat(storage.getOmNodeId(), is(nodeId)); + + thrown.expect(IOException.class); + String expectedMsg = + String.format(ERROR_UNEXPECTED_OM_NODE_ID_TEMPLATE, newId, nodeId); + thrown.expectMessage(expectedMsg); + + storage.validateOrPersistOmNodeId(newId); + } + + private void assertCertOps(OMStorage storage) throws IOException { + String certSerialId = "12345"; + String certSerialId2 = "54321"; + storage.setOmCertSerialId(certSerialId); + assertThat(storage.getOmCertSerialId(), is(certSerialId)); + assertGetNodeProperties(storage, null, null, certSerialId); + + storage.setOmCertSerialId(certSerialId2); + assertThat(storage.getOmCertSerialId(), is(certSerialId2)); + assertGetNodeProperties(storage, null, null, certSerialId2); + + storage.unsetOmCertSerialId(); + assertThat(storage.getOmCertSerialId(), is(nullValue())); + assertGetNodeProperties(storage, null, null, null); + } + + private void assertGetNodeProperties(OMStorage storage, String... values) { + Properties p = storage.getNodeProperties(); + Map e = toExpectedPropertyMapping(values); + + if (e.get(OM_ID) != null) { + assertThat(p.getProperty(OM_ID), is(e.get(OM_ID))); + } + if (e.get(OM_NODE_ID) != null) { + assertThat(p.get(OM_NODE_ID), is(e.get(OM_NODE_ID))); + } + if (e.get(OM_CERT_SERIAL_ID) != null) { + assertThat(p.get(OM_CERT_SERIAL_ID), is(e.get(OM_CERT_SERIAL_ID))); + } + } + + private Map toExpectedPropertyMapping(String... values) { + Map ret = new HashMap<>(); + String[] propNames = new String[]{OM_ID, OM_NODE_ID, OM_CERT_SERIAL_ID}; + for (int i = 0; i < values.length; i++) { + ret.put(propNames[i], values[i]); + } + return ret; + } + + private void setupAPersistedVersionFile(OzoneConfiguration conf) + throws IOException { + setupAPersistedVersionFileWithNodeId(conf, null); + } + + private void setupAPersistedVersionFileWithNodeId( + OzoneConfiguration conf, String nodeId) throws IOException { + OMStorage storage = new OMStorage(conf); + storage.setClusterId("clusterId"); + storage.setLayoutVersion(OMLayoutVersionManager.maxLayoutVersion()); + storage.setOmId(OM_ID_STR); + if (nodeId != null) { + storage.setOmNodeId(nodeId); + } + storage.initialize(); + storage.persistCurrentState(); + } + + private OzoneConfiguration configWithOMDBDir() throws IOException { + File dir = tmpFolder.newFolder(); + return configWithOMDBDir(dir); + } + + private OzoneConfiguration confWithHDDSMetaAndOMDBDir( + File metaDir, File dbDir) { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDir.getAbsolutePath()); + conf.set(OMConfigKeys.OZONE_OM_DB_DIRS, dbDir.getAbsolutePath()); + return conf; + } + + private OzoneConfiguration confWithHDDSMetadataDir(File dir) { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, dir.getAbsolutePath()); + return conf; + } + + private OzoneConfiguration configWithOMDBDir(File dir) { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(OMConfigKeys.OZONE_OM_DB_DIRS, dir.getAbsolutePath()); + return conf; + } + } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerRatisServer.java index 365eb60bfe36..94eb040795bc 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerRatisServer.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.security.x509.SecurityConfig; -import org.apache.hadoop.hdds.security.x509.certificate.client.OMCertificateClient; +import org.apache.hadoop.ozone.security.OMCertificateClient; import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.ha.ratis.RatisSnapshotInfo; diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/OMRequestTestUtils.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/OMRequestTestUtils.java index 942c3ba0d807..a50d7524a45f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/OMRequestTestUtils.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/OMRequestTestUtils.java @@ -128,6 +128,18 @@ public static void addVolumeAndBucketToDB(String volumeName, } } + public static void addVolumeAndBucketToDB( + String volumeName, OMMetadataManager omMetadataManager, + OmBucketInfo.Builder builder) + throws Exception { + if (!omMetadataManager.getVolumeTable().isExist( + omMetadataManager.getVolumeKey(volumeName))) { + addVolumeToDB(volumeName, omMetadataManager); + } + + addBucketToDB(omMetadataManager, builder); + } + @SuppressWarnings("parameterNumber") public static void addKeyToTableAndCache(String volumeName, String bucketName, String keyName, long clientID, HddsProtos.ReplicationType replicationType, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestOMBucketCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestOMBucketCreateRequest.java index 7a7aafcfe401..dfabf0b577f2 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestOMBucketCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestOMBucketCreateRequest.java @@ -186,6 +186,30 @@ public void testValidateAndUpdateCacheCrossSpaceQuota() throws Exception { OMException.ResultCodes.QUOTA_EXCEEDED.toString()); } + @Test + public void testValidateAndUpdateCacheBucketWithNoQuotaWhenVolumeQuotaSet() + throws Exception { + String volumeName = UUID.randomUUID().toString(); + String bucketName = UUID.randomUUID().toString(); + + OMRequestTestUtils.addVolumeToDB(volumeName, omMetadataManager, 1000L); + + // create a bucket with no quota + OMRequest originalRequest = + OMRequestTestUtils.createBucketRequest(bucketName, volumeName, false, + StorageTypeProto.SSD); + OMBucketCreateRequest omBucketCreateRequest = + new OMBucketCreateRequest(originalRequest); + OMRequest modifiedRequest = omBucketCreateRequest.preExecute(ozoneManager); + OMBucketCreateRequest testRequest = + new OMBucketCreateRequest(modifiedRequest); + OMClientResponse resp = testRequest.validateAndUpdateCache( + ozoneManager, 1, ozoneManagerDoubleBufferHelper); + + Assert.assertEquals(resp.getOMResponse().getStatus().toString(), + OMException.ResultCodes.QUOTA_ERROR.toString()); + } + private OMBucketCreateRequest doPreExecute(String volumeName, String bucketName) throws Exception { addCreateVolumeToTable(volumeName, omMetadataManager); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestOMBucketSetPropertyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestOMBucketSetPropertyRequest.java index 95d05c4ace51..2d96aa9571e4 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestOMBucketSetPropertyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestOMBucketSetPropertyRequest.java @@ -444,4 +444,92 @@ public void testValidateAndUpdateCacheWithQuotaNamespaceUsed() contains("Cannot update bucket quota. NamespaceQuota requested " + "is less than used namespaceQuota")); } + + @Test + public void testSettingQuotaRetainsReplication() throws Exception { + String volumeName1 = UUID.randomUUID().toString(); + String bucketName1 = UUID.randomUUID().toString(); + String volumeName2 = UUID.randomUUID().toString(); + String bucketName2 = UUID.randomUUID().toString(); + + /* Bucket with default replication */ + OMRequestTestUtils.addVolumeAndBucketToDB( + volumeName1, bucketName1, omMetadataManager); + + String bucketKey = omMetadataManager + .getBucketKey(volumeName1, bucketName1); + + OmBucketInfo dbBucketInfoBefore = + omMetadataManager.getBucketTable().get(bucketKey); + + /* Setting quota on a bucket with default replication */ + OMRequest omRequest = createSetBucketPropertyRequest(volumeName1, + bucketName1, true, 20 * GB); + + OMBucketSetPropertyRequest omBucketSetPropertyRequest = + new OMBucketSetPropertyRequest(omRequest); + + OMClientResponse omClientResponse = omBucketSetPropertyRequest + .validateAndUpdateCache(ozoneManager, 1, + ozoneManagerDoubleBufferHelper); + + Assert.assertEquals(true, omClientResponse.getOMResponse().getSuccess()); + + OmBucketInfo dbBucketInfoAfter = + omMetadataManager.getBucketTable().get(bucketKey); + + Assert.assertEquals(null, + dbBucketInfoAfter.getDefaultReplicationConfig()); + Assert.assertEquals( + dbBucketInfoBefore.getDefaultReplicationConfig(), + dbBucketInfoAfter.getDefaultReplicationConfig()); + Assert.assertEquals(20 * GB, + dbBucketInfoAfter.getQuotaInBytes()); + Assert.assertEquals(1000L, + dbBucketInfoAfter.getQuotaInNamespace()); + + /* Bucket with EC replication */ + OmBucketInfo.Builder bucketInfo = new OmBucketInfo.Builder() + .setVolumeName(volumeName2) + .setBucketName(bucketName2) + .setDefaultReplicationConfig(new DefaultReplicationConfig( + EC, new ECReplicationConfig(3, 2))); + + OMRequestTestUtils.addVolumeToDB(volumeName2, omMetadataManager); + OMRequestTestUtils.addBucketToDB(omMetadataManager, bucketInfo); + + bucketKey = omMetadataManager + .getBucketKey(volumeName2, bucketName2); + dbBucketInfoBefore = + omMetadataManager.getBucketTable().get(bucketKey); + + /* Setting quota on a bucket with non-default EC replication */ + omRequest = createSetBucketPropertyRequest(volumeName2, + bucketName2, true, 20 * GB); + + omBucketSetPropertyRequest = + new OMBucketSetPropertyRequest(omRequest); + + omClientResponse = omBucketSetPropertyRequest + .validateAndUpdateCache(ozoneManager, 1, + ozoneManagerDoubleBufferHelper); + + Assert.assertEquals(true, omClientResponse.getOMResponse().getSuccess()); + + dbBucketInfoAfter = + omMetadataManager.getBucketTable().get(bucketKey); + + Assert.assertEquals(EC, + dbBucketInfoAfter.getDefaultReplicationConfig().getType()); + Assert.assertEquals( + dbBucketInfoBefore.getDefaultReplicationConfig().getType(), + dbBucketInfoAfter.getDefaultReplicationConfig().getType()); + Assert.assertEquals( + dbBucketInfoBefore.getDefaultReplicationConfig().getFactor(), + dbBucketInfoAfter.getDefaultReplicationConfig().getFactor()); + Assert.assertEquals(20 * GB, + dbBucketInfoAfter.getQuotaInBytes()); + Assert.assertEquals(1000L, + dbBucketInfoAfter.getQuotaInNamespace()); + } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequest.java index 5d69bb072b52..9caff1bed723 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequest.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.audit.AuditLogger; import org.apache.hadoop.ozone.audit.AuditMessage; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OMMetrics; @@ -156,6 +157,42 @@ public void testValidateAndUpdateCache() throws Exception { .get(omMetadataManager.getOzoneDirKey(volumeName, bucketName, keyName)) != null); + OmBucketInfo bucketInfo = omMetadataManager.getBucketTable() + .get(omMetadataManager.getBucketKey(volumeName, bucketName)); + Assert.assertEquals(OzoneFSUtils.getFileCount(keyName), + bucketInfo.getUsedNamespace()); + } + + @Test + public void testValidateAndUpdateCacheWithNamespaceQuotaExceed() + throws Exception { + String volumeName = "vol1"; + String bucketName = "bucket1"; + String keyName = "test/" + genRandomKeyName(); + + // Add volume and bucket entries to DB with quota + // create bucket with quota limit 1 + OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, omMetadataManager, + OmBucketInfo.newBuilder().setVolumeName(volumeName) + .setBucketName(bucketName) + .setBucketLayout(getBucketLayout()) + .setQuotaInNamespace(1)); + + OMRequest omRequest = createDirectoryRequest(volumeName, bucketName, + keyName); + OMDirectoryCreateRequest omDirectoryCreateRequest = + new OMDirectoryCreateRequest(omRequest, getBucketLayout()); + OMRequest modifiedOmRequest = + omDirectoryCreateRequest.preExecute(ozoneManager); + + omDirectoryCreateRequest = + new OMDirectoryCreateRequest(modifiedOmRequest, getBucketLayout()); + OMClientResponse omClientResponse = + omDirectoryCreateRequest.validateAndUpdateCache(ozoneManager, 100L, + ozoneManagerDoubleBufferHelper); + + Assert.assertTrue(omClientResponse.getOMResponse().getStatus() + == OzoneManagerProtocolProtos.Status.QUOTA_EXCEEDED); } @Test diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequestWithFSO.java index 99652ed776ab..d7f4c184d4c7 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequestWithFSO.java @@ -165,6 +165,45 @@ public void testValidateAndUpdateCache() throws Exception { Assert.assertTrue(omClientResponse.getOMResponse().getStatus() == OzoneManagerProtocolProtos.Status.OK); verifyDirectoriesInDB(dirs, volumeId, bucketId); + + OmBucketInfo bucketInfo = omMetadataManager.getBucketTable() + .get(omMetadataManager.getBucketKey(volumeName, bucketName)); + Assert.assertEquals(OzoneFSUtils.getFileCount(keyName), + bucketInfo.getUsedNamespace()); + } + + @Test + public void testValidateAndUpdateCacheWithNamespaceQuotaExceeded() + throws Exception { + String volumeName = "vol1"; + String bucketName = "bucket1"; + List dirs = new ArrayList(); + String keyName = createDirKey(dirs, 3); + + // add volume and create bucket with quota limit 1 + OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, omMetadataManager, + OmBucketInfo.newBuilder().setVolumeName(volumeName) + .setBucketName(bucketName) + .setBucketLayout(getBucketLayout()) + .setQuotaInNamespace(1)); + + OMRequest omRequest = createDirectoryRequest(volumeName, bucketName, + keyName); + OMDirectoryCreateRequestWithFSO omDirCreateRequestFSO = + new OMDirectoryCreateRequestWithFSO(omRequest, + BucketLayout.FILE_SYSTEM_OPTIMIZED); + + OMRequest modifiedOmReq = + omDirCreateRequestFSO.preExecute(ozoneManager); + + omDirCreateRequestFSO = + new OMDirectoryCreateRequestWithFSO(modifiedOmReq, + BucketLayout.FILE_SYSTEM_OPTIMIZED); + OMClientResponse omClientResponse = + omDirCreateRequestFSO.validateAndUpdateCache(ozoneManager, 100L, + ozoneManagerDoubleBufferHelper); + Assert.assertTrue(omClientResponse.getOMResponse().getStatus() + == OzoneManagerProtocolProtos.Status.QUOTA_EXCEEDED); } @Test diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequest.java index 47f2934a3a17..3158e3597c00 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequest.java @@ -21,6 +21,7 @@ import java.util.List; import java.util.UUID; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; import org.jetbrains.annotations.NotNull; import org.junit.Assert; @@ -167,6 +168,32 @@ public void testValidateAndUpdateCache() throws Exception { .getLocalID(), omKeyLocationInfo.getLocalID()); } + @Test + public void testValidateAndUpdateCacheWithNamespaceQuotaExceeded() + throws Exception { + keyName = "test/" + keyName; + OMRequest omRequest = createFileRequest(volumeName, bucketName, keyName, + HddsProtos.ReplicationFactor.ONE, HddsProtos.ReplicationType.RATIS, + false, true); + + // add volume and create bucket with quota limit 1 + OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, omMetadataManager, + OmBucketInfo.newBuilder().setVolumeName(volumeName) + .setBucketName(bucketName) + .setBucketLayout(getBucketLayout()) + .setQuotaInNamespace(1)); + + OMFileCreateRequest omFileCreateRequest = getOMFileCreateRequest(omRequest); + OMRequest modifiedOmRequest = omFileCreateRequest.preExecute(ozoneManager); + + omFileCreateRequest = getOMFileCreateRequest(modifiedOmRequest); + OMClientResponse omFileCreateResponse = + omFileCreateRequest.validateAndUpdateCache(ozoneManager, 100L, + ozoneManagerDoubleBufferHelper); + Assert.assertTrue(omFileCreateResponse.getOMResponse().getStatus() + == OzoneManagerProtocolProtos.Status.QUOTA_EXCEEDED); + } + @Test public void testValidateAndUpdateCacheWithVolumeNotFound() throws Exception { OMRequest omRequest = createFileRequest(volumeName, bucketName, keyName, @@ -261,7 +288,12 @@ public void testValidateAndUpdateCacheWithRecursiveAndOverWrite() String key = "c/d/e/f"; // Should be able to create file even if parent directories does not exist testNonRecursivePath(key, false, true, false); - + + // 3 parent directory created c/d/e + Assert.assertEquals(omMetadataManager.getBucketTable().get( + omMetadataManager.getBucketKey(volumeName, bucketName)) + .getUsedNamespace(), 3); + // Add the key to key table OMRequestTestUtils.addKeyToTable(false, volumeName, bucketName, key, 0L, HddsProtos.ReplicationType.RATIS, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequestWithFSO.java index 89d34f520f40..303ee36ad641 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMFileCreateRequestWithFSO.java @@ -20,9 +20,12 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; +import org.apache.hadoop.ozone.om.response.OMClientResponse; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; @@ -68,6 +71,31 @@ public void testValidateAndUpdateCacheWithNonRecursive() throws Exception { testNonRecursivePath("a/b/e", false, false, false); } + @Test + public void testValidateAndUpdateCacheWithNamespaceQuotaExceeded() + throws Exception { + OMRequest omRequest = createFileRequest(volumeName, bucketName, + "/test/a1/a2", HddsProtos.ReplicationFactor.ONE, + HddsProtos.ReplicationType.RATIS, false, true); + + // create bucket with quota limit 1 + OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, omMetadataManager, + OmBucketInfo.newBuilder().setVolumeName(volumeName) + .setBucketName(bucketName) + .setBucketLayout(getBucketLayout()) + .setQuotaInNamespace(1)); + + OMFileCreateRequest omFileCreateRequest = getOMFileCreateRequest(omRequest); + OMRequest modifiedOmRequest = omFileCreateRequest.preExecute(ozoneManager); + + omFileCreateRequest = getOMFileCreateRequest(modifiedOmRequest); + OMClientResponse omFileCreateResponse = + omFileCreateRequest.validateAndUpdateCache(ozoneManager, 100L, + ozoneManagerDoubleBufferHelper); + Assert.assertTrue(omFileCreateResponse.getOMResponse().getStatus() + == OzoneManagerProtocolProtos.Status.QUOTA_EXCEEDED); + } + @Test public void testValidateAndUpdateCacheWithRecursiveAndOverWrite() throws Exception { @@ -75,6 +103,9 @@ public void testValidateAndUpdateCacheWithRecursiveAndOverWrite() // Should be able to create file even if parent directories does not exist testNonRecursivePath(key, false, true, false); Assert.assertEquals("Invalid metrics value", 3, omMetrics.getNumKeys()); + Assert.assertEquals(omMetadataManager.getBucketTable().get( + omMetadataManager.getBucketKey(volumeName, bucketName)) + .getUsedNamespace(), omMetrics.getNumKeys()); // Add the key to key table OmDirectoryInfo omDirInfo = getDirInfo("c/d/e"); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java index f7d1384451f0..d3f5933e2bd1 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java @@ -38,6 +38,7 @@ import org.apache.hadoop.ozone.om.response.key.OMKeyPurgeResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; +import org.jetbrains.annotations.NotNull; import org.junit.Assert; import org.junit.Test; @@ -108,7 +109,7 @@ private void updateBlockInfo(OmKeyInfo omKeyInfo) throws IOException { * @return OMRequest */ private OMRequest createPurgeKeysRequest(String purgeDeletedDir, - List keyList) throws IOException { + List keyList, OmBucketInfo bucketInfo) throws IOException { List purgePathRequestList = new ArrayList<>(); List subFiles = new ArrayList<>(); @@ -117,7 +118,7 @@ private OMRequest createPurgeKeysRequest(String purgeDeletedDir, } List subDirs = new ArrayList<>(); Long volumeId = 1L; - Long bucketId = 1L; + Long bucketId = bucketInfo.getObjectID(); OzoneManagerProtocolProtos.PurgePathRequest request = wrapPurgeRequest( volumeId, bucketId, purgeDeletedDir, subFiles, subDirs); purgePathRequestList.add(request); @@ -180,25 +181,18 @@ public void testValidateAndUpdateCacheCheckQuota() throws Exception { // Create and Delete keys. The keys should be moved to DeletedKeys table List deletedKeyInfos = createAndDeleteKeys(1, null); // The keys should be present in the DeletedKeys table before purging - List deletedKeyNames = new ArrayList<>(); - for (OmKeyInfo deletedKey : deletedKeyInfos) { - String keyName = omMetadataManager.getOzoneKey(deletedKey.getVolumeName(), - deletedKey.getBucketName(), deletedKey.getKeyName()); - Assert.assertTrue(omMetadataManager.getDeletedTable().isExist( - keyName)); - deletedKeyNames.add(keyName); - } + List deletedKeyNames = validateDeletedKeysTable(deletedKeyInfos); // Create PurgeKeysRequest to purge the deleted keys - OMRequest omRequest = createPurgeKeysRequest(null, deletedKeyInfos); - + String bucketKey = omMetadataManager.getBucketKey(volumeName, bucketName); + OmBucketInfo omBucketInfo = omMetadataManager.getBucketTable().get( + bucketKey); + OMRequest omRequest = createPurgeKeysRequest( + null, deletedKeyInfos, omBucketInfo); OMRequest preExecutedRequest = preExecute(omRequest); OMDirectoriesPurgeRequestWithFSO omKeyPurgeRequest = new OMDirectoriesPurgeRequestWithFSO(preExecutedRequest); - String bucketKey = omMetadataManager.getBucketKey(volumeName, bucketName); - OmBucketInfo omBucketInfo = omMetadataManager.getBucketTable().get( - bucketKey); Assert.assertEquals(1000L * deletedKeyNames.size(), omBucketInfo.getUsedBytes()); OMDirectoriesPurgeResponseWithFSO omClientResponse @@ -210,16 +204,91 @@ public void testValidateAndUpdateCacheCheckQuota() throws Exception { Assert.assertEquals(0L * deletedKeyNames.size(), omBucketInfo.getUsedBytes()); + performBatchOperationCommit(omClientResponse); + + // The keys should exist in the DeletedKeys table after dir delete + validateDeletedKeys(deletedKeyNames); + } + + @Test + public void testValidateAndUpdateCacheQuotaBucketRecreated() + throws Exception { + // Create and Delete keys. The keys should be moved to DeletedKeys table + List deletedKeyInfos = createAndDeleteKeys(1, null); + // The keys should be present in the DeletedKeys table before purging + List deletedKeyNames = validateDeletedKeysTable(deletedKeyInfos); + + // Create PurgeKeysRequest to purge the deleted keys + String bucketKey = omMetadataManager.getBucketKey(volumeName, bucketName); + OmBucketInfo omBucketInfo = omMetadataManager.getBucketTable().get( + bucketKey); + OMRequest omRequest = createPurgeKeysRequest( + null, deletedKeyInfos, omBucketInfo); + OMRequest preExecutedRequest = preExecute(omRequest); + OMDirectoriesPurgeRequestWithFSO omKeyPurgeRequest = + new OMDirectoriesPurgeRequestWithFSO(preExecutedRequest); + + // recreate bucket + omMetadataManager.getBucketTable().delete(bucketKey); + OMRequestTestUtils.addBucketToDB(volumeName, bucketName, + omMetadataManager); + omBucketInfo = omMetadataManager.getBucketTable().get( + bucketKey); + omBucketInfo.incrUsedBytes(1000); + omBucketInfo.incrUsedNamespace(100L); + omMetadataManager.getBucketTable().addCacheEntry(new CacheKey<>(bucketKey), + new CacheValue<>(Optional.of(omBucketInfo), 1L)); + omMetadataManager.getBucketTable().put(bucketKey, omBucketInfo); + + // prevalidate bucket + omBucketInfo = omMetadataManager.getBucketTable().get(bucketKey); + Assert.assertEquals(1000L, omBucketInfo.getUsedBytes()); + + // perform delete + OMDirectoriesPurgeResponseWithFSO omClientResponse + = (OMDirectoriesPurgeResponseWithFSO) omKeyPurgeRequest + .validateAndUpdateCache(ozoneManager, 100L, + ozoneManagerDoubleBufferHelper); + + // validate bucket info, no change expected + omBucketInfo = omMetadataManager.getBucketTable().get( + bucketKey); + Assert.assertEquals(1000L, omBucketInfo.getUsedBytes()); + + performBatchOperationCommit(omClientResponse); + + // The keys should exist in the DeletedKeys table after dir delete + validateDeletedKeys(deletedKeyNames); + } + + private void performBatchOperationCommit( + OMDirectoriesPurgeResponseWithFSO omClientResponse) throws IOException { try (BatchOperation batchOperation = - omMetadataManager.getStore().initBatchOperation()) { + omMetadataManager.getStore().initBatchOperation()) { omClientResponse.addToDBBatch(omMetadataManager, batchOperation); // Do manual commit and see whether addToBatch is successful or not. omMetadataManager.getStore().commitBatchOperation(batchOperation); } + } - // The keys should exist in the DeletedKeys table after dir delete + @NotNull + private List validateDeletedKeysTable( + List deletedKeyInfos) throws IOException { + List deletedKeyNames = new ArrayList<>(); + for (OmKeyInfo deletedKey : deletedKeyInfos) { + String keyName = omMetadataManager.getOzoneKey(deletedKey.getVolumeName(), + deletedKey.getBucketName(), deletedKey.getKeyName()); + Assert.assertTrue(omMetadataManager.getDeletedTable().isExist( + keyName)); + deletedKeyNames.add(keyName); + } + return deletedKeyNames; + } + + private void validateDeletedKeys( + List deletedKeyNames) throws IOException { for (String deletedKey : deletedKeyNames) { Assert.assertTrue(omMetadataManager.getDeletedTable().isExist( deletedKey)); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequest.java index 10552e380d18..edf08d0563ab 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequest.java @@ -79,11 +79,17 @@ public void testValidateAndUpdateCacheWithUnknownBlockId() throws Exception { OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, bucketName, omMetadataManager, omKeyCommitRequest.getBucketLayout()); - String ozoneKey = addKeyToOpenKeyTable(allocatedLocationList); + String openKey = addKeyToOpenKeyTable(allocatedLocationList); + String ozoneKey = getOzonePathKey(); + + OmKeyInfo omKeyInfo = + omMetadataManager.getOpenKeyTable( + omKeyCommitRequest.getBucketLayout()).get(openKey); + Assert.assertNotNull(omKeyInfo); // Key should not be there in key table, as validateAndUpdateCache is // still not called. - OmKeyInfo omKeyInfo = + omKeyInfo = omMetadataManager.getKeyTable(omKeyCommitRequest.getBucketLayout()) .get(ozoneKey); @@ -99,7 +105,7 @@ public void testValidateAndUpdateCacheWithUnknownBlockId() throws Exception { // Entry should be deleted from openKey Table. omKeyInfo = omMetadataManager.getOpenKeyTable(omKeyCommitRequest.getBucketLayout()) - .get(ozoneKey); + .get(openKey); Assert.assertNull(omKeyInfo); // Now entry should be created in key Table. @@ -141,11 +147,17 @@ public void testValidateAndUpdateCache() throws Exception { OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, bucketName, omMetadataManager, omKeyCommitRequest.getBucketLayout()); - String ozoneKey = addKeyToOpenKeyTable(allocatedLocationList); + String openKey = addKeyToOpenKeyTable(allocatedLocationList); + String ozoneKey = getOzonePathKey(); + + OmKeyInfo omKeyInfo = + omMetadataManager.getOpenKeyTable( + omKeyCommitRequest.getBucketLayout()).get(openKey); + Assert.assertNotNull(omKeyInfo); // Key should not be there in key table, as validateAndUpdateCache is // still not called. - OmKeyInfo omKeyInfo = + omKeyInfo = omMetadataManager.getKeyTable(omKeyCommitRequest.getBucketLayout()) .get(ozoneKey); @@ -161,14 +173,13 @@ public void testValidateAndUpdateCache() throws Exception { // Entry should be deleted from openKey Table. omKeyInfo = omMetadataManager.getOpenKeyTable(omKeyCommitRequest.getBucketLayout()) - .get(ozoneKey); + .get(openKey); Assert.assertNull(omKeyInfo); // Now entry should be created in key Table. omKeyInfo = omMetadataManager.getKeyTable(omKeyCommitRequest.getBucketLayout()) .get(ozoneKey); - Assert.assertNotNull(omKeyInfo); // DB keyInfo format verifyKeyName(omKeyInfo); @@ -214,11 +225,17 @@ public void testValidateAndUpdateCacheWithUncommittedBlocks() OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, bucketName, omMetadataManager, omKeyCommitRequest.getBucketLayout()); - String ozoneKey = addKeyToOpenKeyTable(allocatedBlockList); + String openKey = addKeyToOpenKeyTable(allocatedBlockList); + String ozoneKey = getOzonePathKey(); + + OmKeyInfo omKeyInfo = + omMetadataManager.getOpenKeyTable( + omKeyCommitRequest.getBucketLayout()).get(openKey); + Assert.assertNotNull(omKeyInfo); // Key should not be there in key table, as validateAndUpdateCache is // still not called. - OmKeyInfo omKeyInfo = + omKeyInfo = omMetadataManager.getKeyTable(omKeyCommitRequest.getBucketLayout()) .get(ozoneKey); @@ -243,7 +260,7 @@ public void testValidateAndUpdateCacheWithUncommittedBlocks() // Entry should be deleted from openKey Table. omKeyInfo = omMetadataManager.getOpenKeyTable(omKeyCommitRequest.getBucketLayout()) - .get(ozoneKey); + .get(openKey); Assert.assertNull(omKeyInfo); // Now entry should be created in key Table. @@ -613,7 +630,8 @@ protected String addKeyToOpenKeyTable(List locationList) clientID, replicationType, replicationFactor, omMetadataManager, locationList, version); - return getOzonePathKey(); + return omMetadataManager.getOpenKey(volumeName, bucketName, + keyName, clientID); } @NotNull diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequestWithFSO.java index bd5eb65c9048..64dfea1412e7 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCommitRequestWithFSO.java @@ -57,11 +57,12 @@ private long getBucketID() throws java.io.IOException { @Override protected String getOzonePathKey() throws IOException { - final long volumeId = getVolumeID(); + final long volumeID = getVolumeID(); final long bucketID = getBucketID(); String fileName = OzoneFSUtils.getFileName(keyName); - return omMetadataManager.getOzonePathKey(volumeId, bucketID, - bucketID, fileName); + + return omMetadataManager.getOzonePathKey(volumeID, bucketID, + parentID, fileName); } @Override @@ -84,11 +85,9 @@ protected String addKeyToOpenKeyTable(List locationList) omKeyInfoFSO.appendNewBlocks(locationList, false); String fileName = OzoneFSUtils.getFileName(keyName); - OMRequestTestUtils.addFileToKeyTable(true, false, + return OMRequestTestUtils.addFileToKeyTable(true, false, fileName, omKeyInfoFSO, clientID, txnLogId, omMetadataManager); - return omMetadataManager.getOzonePathKey(getVolumeID(), getBucketID(), - parentID, fileName); } @NotNull diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequest.java index 542127380fb8..fbd4bd9873ac 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequest.java @@ -32,6 +32,7 @@ import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.lock.OzoneLockProvider; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; import org.junit.Assert; @@ -204,6 +205,32 @@ public void testValidateAndUpdateCache() throws Exception { } + @Test + public void testValidateAndUpdateCacheWithNamespaceQuotaExceeded() + throws Exception { + when(ozoneManager.getOzoneLockProvider()).thenReturn( + new OzoneLockProvider(keyPathLockEnabled, enableFileSystemPaths)); + OMRequest modifiedOmRequest = + doPreExecute(createKeyRequest(false, 0, "test/" + keyName)); + + // test with FSO type + OMKeyCreateRequest omKeyCreateRequest = getOMKeyCreateRequest( + modifiedOmRequest, BucketLayout.FILE_SYSTEM_OPTIMIZED); + + // create bucket with quota limit 1 + OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, omMetadataManager, + OmBucketInfo.newBuilder().setVolumeName(volumeName) + .setBucketName(bucketName) + .setBucketLayout(BucketLayout.FILE_SYSTEM_OPTIMIZED) + .setQuotaInNamespace(1)); + + OMClientResponse omKeyCreateResponse = + omKeyCreateRequest.validateAndUpdateCache(ozoneManager, 100L, + ozoneManagerDoubleBufferHelper); + Assert.assertTrue(omKeyCreateResponse.getOMResponse().getStatus() + == OzoneManagerProtocolProtos.Status.QUOTA_EXCEEDED); + } + private void checkResponse(OMRequest modifiedOmRequest, OMClientResponse omKeyCreateResponse, long id, boolean override, BucketLayout bucketLayout) throws Exception { @@ -690,4 +717,9 @@ protected String getOzoneKey() throws IOException { protected OMKeyCreateRequest getOMKeyCreateRequest(OMRequest omRequest) { return new OMKeyCreateRequest(omRequest, BucketLayout.DEFAULT); } + + protected OMKeyCreateRequest getOMKeyCreateRequest( + OMRequest omRequest, BucketLayout layout) { + return new OMKeyCreateRequest(omRequest, layout); + } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequestWithFSO.java index 1cdc2a135598..ffb0414ae0e9 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyCreateRequestWithFSO.java @@ -164,6 +164,12 @@ protected OMKeyCreateRequest getOMKeyCreateRequest(OMRequest omRequest) { BucketLayout.FILE_SYSTEM_OPTIMIZED); } + @Override + protected OMKeyCreateRequest getOMKeyCreateRequest( + OMRequest omRequest, BucketLayout layout) { + return new OMKeyCreateRequestWithFSO(omRequest, layout); + } + @Override public BucketLayout getBucketLayout() { return BucketLayout.FILE_SYSTEM_OPTIMIZED; diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartUploadCompleteRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartUploadCompleteRequest.java index 0657887d08c8..842710cecae8 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartUploadCompleteRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartUploadCompleteRequest.java @@ -155,7 +155,7 @@ private void checkValidateAndUpdateCacheSuccess(String volumeName, .getCacheValue(new CacheKey<>( omMetadataManager.getBucketKey(volumeName, bucketName))) .getCacheValue(); - Assert.assertEquals(1L, omBucketInfo.getUsedNamespace()); + Assert.assertEquals(getNamespaceCount(), omBucketInfo.getUsedNamespace()); } protected void addVolumeAndBucket(String volumeName, String bucketName) @@ -331,5 +331,9 @@ protected String getOzoneDBKey(String volumeName, String bucketName, protected String getKeyName() { return UUID.randomUUID().toString(); } + + protected long getNamespaceCount() { + return 1L; + } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartUploadCompleteRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartUploadCompleteRequestWithFSO.java index 95bd06869866..fd0a9d37169c 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartUploadCompleteRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartUploadCompleteRequestWithFSO.java @@ -50,6 +50,12 @@ protected String getKeyName() { return keyName; } + @Override + protected long getNamespaceCount() { + // parent directory count which is also created + return 5L; + } + @Override protected void addVolumeAndBucket(String volumeName, String bucketName) throws Exception { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeSetQuotaRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeSetQuotaRequest.java index 8cb02cf77a7b..2e63633d421b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeSetQuotaRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeSetQuotaRequest.java @@ -204,4 +204,27 @@ public void testValidateAndUpdateCacheWithQuota() throws Exception { contains("Total buckets quota in this volume " + "should not be greater than volume quota")); } + + @Test + public void testValidateAndUpdateCacheQuotaSetFailureWhenBucketQuotaNotSet() + throws Exception { + + String volumeName = UUID.randomUUID().toString(); + String bucketName = UUID.randomUUID().toString(); + + OMRequestTestUtils.addVolumeAndBucketToDB(volumeName, + bucketName, omMetadataManager); + OMRequest originalRequest = + OMRequestTestUtils.createSetVolumePropertyRequest(volumeName, + 5 * GB, 100L); + + OMVolumeSetQuotaRequest omVolumeSetQuotaRequest = + new OMVolumeSetQuotaRequest(originalRequest); + + OMClientResponse omClientResponse = omVolumeSetQuotaRequest + .validateAndUpdateCache(ozoneManager, 1, + ozoneManagerDoubleBufferHelper); + Assert.assertEquals(omClientResponse.getOMResponse().getStatus(), + OzoneManagerProtocolProtos.Status.QUOTA_ERROR); + } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/file/TestOMDirectoryCreateResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/file/TestOMDirectoryCreateResponse.java index 6703f4c55ea5..e9198220e543 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/file/TestOMDirectoryCreateResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/file/TestOMDirectoryCreateResponse.java @@ -20,14 +20,17 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OzoneFSUtils; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; import org.apache.hadoop.ozone.om.request.file.OMDirectoryCreateRequest.Result; +import org.apache.hadoop.ozone.om.response.TestOMResponseUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos .OMResponse; @@ -41,6 +44,7 @@ import java.util.ArrayList; import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; /** * Tests OMDirectoryCreateResponse. @@ -79,6 +83,13 @@ public void testAddToDBBatch() throws Exception { bucketName, OzoneFSUtils.addTrailingSlashIfNeeded(keyName), HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.ONE); + ThreadLocalRandom random = ThreadLocalRandom.current(); + long usedNamespace = Math.abs(random.nextLong(Long.MAX_VALUE)); + OmBucketInfo omBucketInfo = TestOMResponseUtils.createBucket( + volumeName, bucketName); + omBucketInfo = omBucketInfo.toBuilder() + .setUsedNamespace(usedNamespace).build(); + OMResponse omResponse = OMResponse.newBuilder().setCreateDirectoryResponse( OzoneManagerProtocolProtos.CreateDirectoryResponse.getDefaultInstance()) .setStatus(OzoneManagerProtocolProtos.Status.OK) @@ -87,7 +98,7 @@ public void testAddToDBBatch() throws Exception { OMDirectoryCreateResponse omDirectoryCreateResponse = new OMDirectoryCreateResponse(omResponse, omKeyInfo, - new ArrayList<>(), Result.SUCCESS, getBucketLayout()); + new ArrayList<>(), Result.SUCCESS, getBucketLayout(), omBucketInfo); omDirectoryCreateResponse.addToDBBatch(omMetadataManager, batchOperation); @@ -96,6 +107,12 @@ public void testAddToDBBatch() throws Exception { Assert.assertNotNull(omMetadataManager.getKeyTable(getBucketLayout()).get( omMetadataManager.getOzoneDirKey(volumeName, bucketName, keyName))); + + Table.KeyValue keyValue = + omMetadataManager.getBucketTable().iterator().next(); + Assert.assertEquals(omMetadataManager.getBucketKey(volumeName, + bucketName), keyValue.getKey()); + Assert.assertEquals(usedNamespace, keyValue.getValue().getUsedNamespace()); } public BucketLayout getBucketLayout() { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/file/TestOMDirectoryCreateResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/file/TestOMDirectoryCreateResponseWithFSO.java index e411eb626d32..b7746e526ae9 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/file/TestOMDirectoryCreateResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/file/TestOMDirectoryCreateResponseWithFSO.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; +import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; @@ -33,6 +34,7 @@ import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; import org.apache.hadoop.ozone.om.request.file.OMDirectoryCreateRequestWithFSO; +import org.apache.hadoop.ozone.om.response.TestOMResponseUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; import org.junit.Assert; @@ -44,6 +46,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; /** * Tests OMDirectoryCreateResponseWithFSO - prefix layout. @@ -67,6 +70,8 @@ public void setup() throws Exception { @Test public void testAddToDBBatch() throws Exception { + String volumeName = UUID.randomUUID().toString(); + String bucketName = UUID.randomUUID().toString(); String keyName = UUID.randomUUID().toString(); final String volume = "volume"; @@ -84,12 +89,18 @@ public void testAddToDBBatch() throws Exception { .setStatus(OzoneManagerProtocolProtos.Status.OK) .setCmdType(OzoneManagerProtocolProtos.Type.CreateDirectory) .build(); + ThreadLocalRandom random = ThreadLocalRandom.current(); + long usedNamespace = Math.abs(random.nextLong(Long.MAX_VALUE)); + OmBucketInfo omBucketInfo = TestOMResponseUtils.createBucket( + volumeName, bucketName); + omBucketInfo = omBucketInfo.toBuilder() + .setUsedNamespace(usedNamespace).build(); OMDirectoryCreateResponseWithFSO omDirectoryCreateResponseWithFSO = new OMDirectoryCreateResponseWithFSO(omResponse, volumeId, bucketId, omDirInfo, new ArrayList<>(), OMDirectoryCreateRequestWithFSO.Result.SUCCESS, - BucketLayout.FILE_SYSTEM_OPTIMIZED); + BucketLayout.FILE_SYSTEM_OPTIMIZED, omBucketInfo); omDirectoryCreateResponseWithFSO .addToDBBatch(omMetadataManager, batchOperation); @@ -100,6 +111,12 @@ public void testAddToDBBatch() throws Exception { Assert.assertNotNull(omMetadataManager.getDirectoryTable().get( omMetadataManager.getOzonePathKey(volumeId, bucketId, parentID, keyName))); + + Table.KeyValue keyValue = + omMetadataManager.getBucketTable().iterator().next(); + Assert.assertEquals(omMetadataManager.getBucketKey(volumeName, + bucketName), keyValue.getKey()); + Assert.assertEquals(usedNamespace, keyValue.getValue().getUsedNamespace()); } private void addVolumeToDB(String volumeName) throws IOException { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/key/TestOMKeysDeleteResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/key/TestOMKeysDeleteResponseWithFSO.java index 9d17220631ab..6127c50533ab 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/key/TestOMKeysDeleteResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/key/TestOMKeysDeleteResponseWithFSO.java @@ -53,6 +53,7 @@ public class TestOMKeysDeleteResponseWithFSO private List dirDeleteList = new ArrayList<>(); private List dirDBKeys = new ArrayList<>(); + private List dirDelDBKeys = new ArrayList<>(); private long volId; @Override @@ -82,6 +83,8 @@ protected void createPreRequisities() throws Exception { bucketName, dirInfo, dir); dirDeleteList.add(dirKeyInfo); dirDBKeys.add(dirOzoneDBKey); + dirDelDBKeys.add(omMetadataManager.getOzoneDeletePathKey( + dirKeyInfo.getObjectID(), dirOzoneDBKey)); // create set of keys directly under the bucket String ozoneDBKey = ""; @@ -150,10 +153,12 @@ public void testKeysDeleteResponseWithNoBucketExists() throws Exception { RepeatedOmKeyInfo repeatedOmKeyInfo = omMetadataManager.getDeletedTable().get(dirDBKey); Assert.assertNull(repeatedOmKeyInfo); + } + for (String dirDelDBKey : dirDelDBKeys) { // dir added to the deleted dir table, for deep cleanups OmKeyInfo omDirInfo = - omMetadataManager.getDeletedDirTable().get(dirDBKey); + omMetadataManager.getDeletedDirTable().get(dirDelDBKey); Assert.assertNotNull(omDirInfo); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartResponse.java index 5a5e2a1f6732..378f10205373 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartResponse.java @@ -94,7 +94,7 @@ public void tearDown() { public S3InitiateMultipartUploadResponse createS3InitiateMPUResponse( String volumeName, String bucketName, String keyName, - String multipartUploadID) { + String multipartUploadID) throws IOException { OmMultipartKeyInfo multipartKeyInfo = new OmMultipartKeyInfo.Builder() .setUploadID(multipartUploadID) .setCreationTime(Time.now()) @@ -193,7 +193,7 @@ public PartKeyInfo createPartKeyInfoFSO( public S3InitiateMultipartUploadResponse createS3InitiateMPUResponseFSO( String volumeName, String bucketName, long parentID, String keyName, String multipartUploadID, List parentDirInfos, - long volumeId, long bucketId) { + long volumeId, long bucketId) throws IOException { OmMultipartKeyInfo multipartKeyInfo = new OmMultipartKeyInfo.Builder() .setUploadID(multipartUploadID) .setCreationTime(Time.now()) @@ -232,9 +232,13 @@ public S3InitiateMultipartUploadResponse createS3InitiateMPUResponseFSO( omKeyInfo.getVolumeName(), omKeyInfo.getBucketName(), keyName, multipartUploadID); + String buckDBKey = omMetadataManager.getBucketKey(volumeName, bucketName); + OmBucketInfo omBucketInfo = + omMetadataManager.getBucketTable().get(buckDBKey); + return new S3InitiateMultipartUploadResponseWithFSO(omResponse, multipartKeyInfo, omKeyInfo, mpuKey, parentDirInfos, getBucketLayout(), - volumeId, bucketId); + volumeId, bucketId, omBucketInfo); } @SuppressWarnings("checkstyle:ParameterNumber") @@ -326,7 +330,7 @@ public S3MultipartUploadCompleteResponse createS3CompleteMPUResponseFSO( protected S3InitiateMultipartUploadResponse getS3InitiateMultipartUploadResp( OmMultipartKeyInfo multipartKeyInfo, OmKeyInfo omKeyInfo, - OMResponse omResponse, long volumeId, long bucketId) { + OMResponse omResponse, long volumeId, long bucketId) throws IOException { return new S3InitiateMultipartUploadResponse(omResponse, multipartKeyInfo, omKeyInfo, getBucketLayout()); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartUploadAbortResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartUploadAbortResponse.java index ab77a436b8c8..5f830ed85742 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartUploadAbortResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartUploadAbortResponse.java @@ -100,7 +100,8 @@ public void testAddDBToBatch() throws Exception { protected S3InitiateMultipartUploadResponse getS3InitiateMultipartUploadResponse( String volumeName, String bucketName, String keyName, - String multipartUploadID, long volumeId, long bucketId) { + String multipartUploadID, long volumeId, long bucketId) + throws IOException { return createS3InitiateMPUResponse(volumeName, bucketName, keyName, multipartUploadID); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartUploadAbortResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartUploadAbortResponseWithFSO.java index b984e769f89f..7a98ec3c3ab3 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartUploadAbortResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/multipart/TestS3MultipartUploadAbortResponseWithFSO.java @@ -65,23 +65,29 @@ protected String getMultipartOpenKey(String volumeName, String bucketName, protected S3InitiateMultipartUploadResponse getS3InitiateMultipartUploadResp( OmMultipartKeyInfo multipartKeyInfo, OmKeyInfo omKeyInfo, OzoneManagerProtocolProtos.OMResponse omResponse, long volumeId, - long bucketId) { + long bucketId) throws IOException { String mpuDBKey = omMetadataManager.getMultipartKey(omKeyInfo.getVolumeName(), omKeyInfo.getBucketName(), omKeyInfo.getKeyName(), multipartKeyInfo.getUploadID()); + String buckDBKey = omMetadataManager.getBucketKey(omKeyInfo.getVolumeName(), + omKeyInfo.getBucketName()); + OmBucketInfo omBucketInfo = + omMetadataManager.getBucketTable().get(buckDBKey); + return new S3InitiateMultipartUploadResponseWithFSO(omResponse, multipartKeyInfo, omKeyInfo, mpuDBKey, new ArrayList<>(), - getBucketLayout(), volumeId, bucketId); + getBucketLayout(), volumeId, bucketId, omBucketInfo); } @Override protected S3InitiateMultipartUploadResponse getS3InitiateMultipartUploadResponse( String volumeName, String bucketName, String keyName, - String multipartUploadID, long volumeId, long bucketId) { + String multipartUploadID, long volumeId, long bucketId) + throws IOException { return createS3InitiateMPUResponseFSO(volumeName, bucketName, parentID, keyName, multipartUploadID, new ArrayList<>(), volumeId, bucketId); diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestCertificateClientInit.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/security/TestOmCertificateClientInit.java similarity index 73% rename from hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestCertificateClientInit.java rename to hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/security/TestOmCertificateClientInit.java index 72730858fc6f..d37af74e88a3 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestCertificateClientInit.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/security/TestOmCertificateClientInit.java @@ -16,11 +16,12 @@ * limitations under the License. * */ -package org.apache.hadoop.hdds.security.x509.certificate.client; +package org.apache.hadoop.ozone.security; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; @@ -53,21 +54,18 @@ import static org.junit.jupiter.params.provider.Arguments.arguments; /** - * Test class for {@link DefaultCertificateClient}. + * Test class for {@link OMCertificateClient}. */ -public class TestCertificateClientInit { +public class TestOmCertificateClientInit { private KeyPair keyPair; private String certSerialId = "3284792342234"; - private CertificateClient dnCertificateClient; private CertificateClient omCertificateClient; private HDDSKeyGenerator keyGenerator; private Path metaDirPath; private SecurityConfig securityConfig; - private KeyCodec dnKeyCodec; private KeyCodec omKeyCodec; private X509Certificate x509Certificate; - private static final String DN_COMPONENT = DNCertificateClient.COMPONENT_NAME; private static final String OM_COMPONENT = OMCertificateClient.COMPONENT_NAME; private static Stream parameters() { @@ -95,71 +93,18 @@ public void setUp() throws Exception { keyPair = keyGenerator.generateKey(); x509Certificate = getX509Certificate(); certSerialId = x509Certificate.getSerialNumber().toString(); - dnCertificateClient = new DNCertificateClient(securityConfig, - certSerialId); - omCertificateClient = new OMCertificateClient(securityConfig, - certSerialId); - dnKeyCodec = new KeyCodec(securityConfig, DN_COMPONENT); + omCertificateClient = new OMCertificateClient(securityConfig, certSerialId); omKeyCodec = new KeyCodec(securityConfig, OM_COMPONENT); - Files.createDirectories(securityConfig.getKeyLocation(DN_COMPONENT)); Files.createDirectories(securityConfig.getKeyLocation(OM_COMPONENT)); } @AfterEach public void tearDown() { - dnCertificateClient = null; omCertificateClient = null; FileUtils.deleteQuietly(metaDirPath.toFile()); } - - @ParameterizedTest - @MethodSource("parameters") - public void testInitDatanode(boolean pvtKeyPresent, boolean pubKeyPresent, - boolean certPresent, InitResponse expectedResult) throws Exception { - if (pvtKeyPresent) { - dnKeyCodec.writePrivateKey(keyPair.getPrivate()); - } else { - FileUtils.deleteQuietly(Paths.get( - securityConfig.getKeyLocation(DN_COMPONENT).toString(), - securityConfig.getPrivateKeyFileName()).toFile()); - } - - if (pubKeyPresent) { - if (dnCertificateClient.getPublicKey() == null) { - dnKeyCodec.writePublicKey(keyPair.getPublic()); - } - } else { - FileUtils.deleteQuietly( - Paths.get(securityConfig.getKeyLocation(DN_COMPONENT).toString(), - securityConfig.getPublicKeyFileName()).toFile()); - } - - if (certPresent) { - CertificateCodec codec = new CertificateCodec(securityConfig, - DN_COMPONENT); - codec.writeCertificate(new X509CertificateHolder( - x509Certificate.getEncoded())); - } else { - FileUtils.deleteQuietly(Paths.get( - securityConfig.getKeyLocation(DN_COMPONENT).toString(), - securityConfig.getCertificateFileName()).toFile()); - } - InitResponse response = dnCertificateClient.init(); - - assertEquals(expectedResult, response); - - if (!response.equals(FAILURE)) { - assertTrue(OzoneSecurityUtil.checkIfFileExist( - securityConfig.getKeyLocation(DN_COMPONENT), - securityConfig.getPrivateKeyFileName())); - assertTrue(OzoneSecurityUtil.checkIfFileExist( - securityConfig.getKeyLocation(DN_COMPONENT), - securityConfig.getPublicKeyFileName())); - } - } - @ParameterizedTest @MethodSource("parameters") public void testInitOzoneManager(boolean pvtKeyPresent, boolean pubKeyPresent, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/security/TestOzoneDelegationTokenSecretManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/security/TestOzoneDelegationTokenSecretManager.java index 4814e8f783b3..0003c73efc3c 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/security/TestOzoneDelegationTokenSecretManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/security/TestOzoneDelegationTokenSecretManager.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.security.x509.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; -import org.apache.hadoop.hdds.security.x509.certificate.client.OMCertificateClient; import org.apache.hadoop.hdds.server.ServerUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.ozone.OzoneConsts; diff --git a/hadoop-ozone/ozonefs-common/pom.xml b/hadoop-ozone/ozonefs-common/pom.xml index 8f8819ec9fc3..de50a4f7cf52 100644 --- a/hadoop-ozone/ozonefs-common/pom.xml +++ b/hadoop-ozone/ozonefs-common/pom.xml @@ -19,12 +19,12 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-filesystem-common Apache Ozone FileSystem Common jar - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT UTF-8 true @@ -92,7 +92,6 @@ org.mockito mockito-all - 1.10.19 test @@ -103,13 +102,12 @@ org.powermock powermock-module-junit4 - 1.6.5 + ${powermock1.version} test org.powermock powermock-api-mockito - 1.6.5 test diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneClientAdapterImpl.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneClientAdapterImpl.java index 9424a785de1f..3eddc01d6cb5 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneClientAdapterImpl.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneClientAdapterImpl.java @@ -53,8 +53,8 @@ import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.OzoneKey; import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; @@ -97,7 +97,7 @@ public class BasicOzoneClientAdapterImpl implements OzoneClientAdapter { private OzoneConfiguration config; private long nextReplicationConfigRefreshTime; private long bucketRepConfigRefreshPeriodMS; - private java.time.Clock clock = new MonotonicClock(ZoneOffset.UTC); + private java.time.Clock clock = Clock.system(ZoneOffset.UTC); /** * Create new OzoneClientAdapter implementation. @@ -243,7 +243,7 @@ public OzoneFSOutputStream createFile(String key, short replication, this.clientConfiguredReplicationConfig, getReplicationConfigWithRefreshCheck(), config), overWrite, recursive); - return new OzoneFSOutputStream(ozoneOutputStream.getOutputStream()); + return new OzoneFSOutputStream(ozoneOutputStream); } catch (OMException ex) { if (ex.getResult() == OMException.ResultCodes.FILE_ALREADY_EXISTS || ex.getResult() == OMException.ResultCodes.NOT_A_FILE) { @@ -266,6 +266,29 @@ private ReplicationConfig getReplicationConfigWithRefreshCheck() return this.bucketReplicationConfig; } + @Override + public OzoneFSDataStreamOutput createStreamFile(String key, short replication, + boolean overWrite, boolean recursive) throws IOException { + incrementCounter(Statistic.OBJECTS_CREATED, 1); + try { + final ReplicationConfig replicationConfig + = OzoneClientUtils.resolveClientSideReplicationConfig( + replication, clientConfiguredReplicationConfig, + getReplicationConfigWithRefreshCheck(), config); + final OzoneDataStreamOutput out = bucket.createStreamFile( + key, 0, replicationConfig, overWrite, recursive); + return new OzoneFSDataStreamOutput(out.getByteBufStreamOutput()); + } catch (OMException ex) { + if (ex.getResult() == OMException.ResultCodes.FILE_ALREADY_EXISTS + || ex.getResult() == OMException.ResultCodes.NOT_A_FILE) { + throw new FileAlreadyExistsException( + ex.getResult().name() + ": " + ex.getMessage()); + } else { + throw ex; + } + } + } + @Override public void renameKey(String key, String newKeyName) throws IOException { incrementCounter(Statistic.OBJECTS_RENAMED, 1); diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java index e8da91148859..c274b89a1490 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.utils.LegacyHadoopConfigurationSource; +import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OzoneFSUtils; import org.apache.hadoop.security.UserGroupInformation; @@ -270,6 +271,13 @@ public FSDataOutputStream createNonRecursive(Path path, private FSDataOutputStream createOutputStream(String key, short replication, boolean overwrite, boolean recursive) throws IOException { + boolean isRatisStreamingEnabled = getConf().getBoolean( + OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED, + OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED_DEFAULT); + if (isRatisStreamingEnabled) { + return new FSDataOutputStream(adapter.createStreamFile(key, + replication, overwrite, recursive), statistics); + } return new FSDataOutputStream(adapter.createFile(key, replication, overwrite, recursive), statistics); } diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneClientAdapterImpl.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneClientAdapterImpl.java index 40ed07e55405..2606bd4fdf94 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneClientAdapterImpl.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneClientAdapterImpl.java @@ -61,6 +61,7 @@ import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.OzoneKey; import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.client.protocol.ClientProtocol; import org.apache.hadoop.ozone.client.BucketArgs; @@ -344,7 +345,8 @@ public void close() throws IOException { @Override public InputStream readFile(String pathStr) throws IOException { incrementCounter(Statistic.OBJECTS_READ, 1); - OFSPath ofsPath = new OFSPath(pathStr); + OFSPath ofsPath = new OFSPath(pathStr, + config); String key = ofsPath.getKeyName(); try { OzoneBucket bucket = getBucket(ofsPath, false); @@ -369,7 +371,7 @@ protected void incrementCounter(Statistic objectsRead, long count) { public OzoneFSOutputStream createFile(String pathStr, short replication, boolean overWrite, boolean recursive) throws IOException { incrementCounter(Statistic.OBJECTS_CREATED, 1); - OFSPath ofsPath = new OFSPath(pathStr); + OFSPath ofsPath = new OFSPath(pathStr, config); if (ofsPath.isRoot() || ofsPath.isVolume() || ofsPath.isBucket()) { throw new IOException("Cannot create file under root or volume."); } @@ -381,7 +383,38 @@ public OzoneFSOutputStream createFile(String pathStr, short replication, OzoneClientUtils.resolveClientSideReplicationConfig(replication, this.clientConfiguredReplicationConfig, bucket.getReplicationConfig(), config), overWrite, recursive); - return new OzoneFSOutputStream(ozoneOutputStream.getOutputStream()); + return new OzoneFSOutputStream(ozoneOutputStream); + } catch (OMException ex) { + if (ex.getResult() == OMException.ResultCodes.FILE_ALREADY_EXISTS + || ex.getResult() == OMException.ResultCodes.NOT_A_FILE) { + throw new FileAlreadyExistsException( + ex.getResult().name() + ": " + ex.getMessage()); + } else { + throw ex; + } + } + } + + @Override + public OzoneFSDataStreamOutput createStreamFile(String pathStr, + short replication, boolean overWrite, boolean recursive) + throws IOException { + incrementCounter(Statistic.OBJECTS_CREATED, 1); + OFSPath ofsPath = new OFSPath(pathStr, config); + if (ofsPath.isRoot() || ofsPath.isVolume() || ofsPath.isBucket()) { + throw new IOException("Cannot create file under root or volume."); + } + String key = ofsPath.getKeyName(); + try { + // Hadoop CopyCommands class always sets recursive to true + final OzoneBucket bucket = getBucket(ofsPath, recursive); + final ReplicationConfig replicationConfig + = OzoneClientUtils.resolveClientSideReplicationConfig( + replication, clientConfiguredReplicationConfig, + bucket.getReplicationConfig(), config); + final OzoneDataStreamOutput out = bucket.createStreamFile( + key, 0, replicationConfig, overWrite, recursive); + return new OzoneFSDataStreamOutput(out.getByteBufStreamOutput()); } catch (OMException ex) { if (ex.getResult() == OMException.ResultCodes.FILE_ALREADY_EXISTS || ex.getResult() == OMException.ResultCodes.NOT_A_FILE) { @@ -412,8 +445,8 @@ public void renameKey(String key, String newKeyName) throws IOException { @Override public void rename(String path, String newPath) throws IOException { incrementCounter(Statistic.OBJECTS_RENAMED, 1); - OFSPath ofsPath = new OFSPath(path); - OFSPath ofsNewPath = new OFSPath(newPath); + OFSPath ofsPath = new OFSPath(path, config); + OFSPath ofsNewPath = new OFSPath(newPath, config); // Check path and newPathName are in the same volume and same bucket. // This should have been checked in BasicRootedOzoneFileSystem#rename @@ -438,8 +471,8 @@ public void rename(String path, String newPath) throws IOException { void rename(OzoneBucket bucket, String path, String newPath) throws IOException { incrementCounter(Statistic.OBJECTS_RENAMED, 1); - OFSPath ofsPath = new OFSPath(path); - OFSPath ofsNewPath = new OFSPath(newPath); + OFSPath ofsPath = new OFSPath(path, config); + OFSPath ofsNewPath = new OFSPath(newPath, config); // No same-bucket policy check here since this call path is controlled String key = ofsPath.getKeyName(); String newKey = ofsNewPath.getKeyName(); @@ -456,7 +489,7 @@ void rename(OzoneBucket bucket, String path, String newPath) public boolean createDirectory(String pathStr) throws IOException { LOG.trace("creating dir for path: {}", pathStr); incrementCounter(Statistic.OBJECTS_CREATED, 1); - OFSPath ofsPath = new OFSPath(pathStr); + OFSPath ofsPath = new OFSPath(pathStr, config); if (ofsPath.getVolumeName().isEmpty()) { // Volume name unspecified, invalid param, return failure return false; @@ -498,7 +531,7 @@ public boolean deleteObject(String path, boolean recursive) throws IOException { LOG.trace("issuing delete for path to key: {}", path); incrementCounter(Statistic.OBJECTS_DELETED, 1); - OFSPath ofsPath = new OFSPath(path); + OFSPath ofsPath = new OFSPath(path, config); String keyName = ofsPath.getKeyName(); if (keyName.length() == 0) { return false; @@ -533,11 +566,12 @@ private boolean areInSameBucket(List keyNameList) { return true; } String firstKeyPath = keyNameList.get(0); - final String volAndBucket = new OFSPath(firstKeyPath).getNonKeyPath(); + final String volAndBucket = new OFSPath(firstKeyPath, config) + .getNonKeyPath(); // return true only if all key paths' volume and bucket in the list match // the first element's return keyNameList.stream().skip(1).allMatch(p -> - new OFSPath(p).getNonKeyPath().equals(volAndBucket)); + new OFSPath(p, config).getNonKeyPath().equals(volAndBucket)); } /** @@ -563,7 +597,7 @@ public boolean deleteObjects(List keyNameList) { return false; } try { - OFSPath firstKeyPath = new OFSPath(keyNameList.get(0)); + OFSPath firstKeyPath = new OFSPath(keyNameList.get(0), config); OzoneBucket bucket = getBucket(firstKeyPath, false); return deleteObjects(bucket, keyNameList); } catch (IOException ioe) { @@ -584,7 +618,7 @@ public boolean deleteObjects(List keyNameList) { */ boolean deleteObjects(OzoneBucket bucket, List keyNameList) { List keyList = keyNameList.stream() - .map(p -> new OFSPath(p).getKeyName()) + .map(p -> new OFSPath(p, config).getKeyName()) .collect(Collectors.toList()); try { incrementCounter(Statistic.OBJECTS_DELETED, keyNameList.size()); @@ -600,7 +634,7 @@ boolean deleteObjects(OzoneBucket bucket, List keyNameList) { public FileStatusAdapter getFileStatus(String path, URI uri, Path qualifiedPath, String userName) throws IOException { incrementCounter(Statistic.OBJECTS_QUERY, 1); - OFSPath ofsPath = new OFSPath(path); + OFSPath ofsPath = new OFSPath(path, config); String key = ofsPath.getKeyName(); if (ofsPath.isRoot()) { return getFileStatusAdapterForRoot(uri); @@ -685,7 +719,7 @@ public Collection getTrashRoots(boolean allUsers, @Override public Iterator listKeys(String pathStr) throws IOException { incrementCounter(Statistic.OBJECTS_LIST, 1); - OFSPath ofsPath = new OFSPath(pathStr); + OFSPath ofsPath = new OFSPath(pathStr, config); String key = ofsPath.getKeyName(); OzoneBucket bucket; try { @@ -704,7 +738,7 @@ private List listStatusRoot( boolean recursive, String startPath, long numEntries, URI uri, Path workingDir, String username) throws IOException { - OFSPath ofsStartPath = new OFSPath(startPath); + OFSPath ofsStartPath = new OFSPath(startPath, config); // list volumes Iterator iter = objectStore.listVolumesByUser( username, null, ofsStartPath.getVolumeName()); @@ -728,7 +762,7 @@ private List listStatusVolume(String volumeStr, boolean recursive, String startPath, long numEntries, URI uri, Path workingDir, String username) throws IOException { - OFSPath ofsStartPath = new OFSPath(startPath); + OFSPath ofsStartPath = new OFSPath(startPath, config); // list buckets in the volume OzoneVolume volume = objectStore.getVolume(volumeStr); UserGroupInformation ugi = @@ -792,12 +826,12 @@ public List listStatus(String pathStr, boolean recursive, // OFSPath initializer will error out. // The goal is to refuse processing startPaths from other authorities. - OFSPath ofsPath = new OFSPath(pathStr); + OFSPath ofsPath = new OFSPath(pathStr, config); if (ofsPath.isRoot()) { return listStatusRoot( recursive, startPath, numEntries, uri, workingDir, username); } - OFSPath ofsStartPath = new OFSPath(startPath); + OFSPath ofsStartPath = new OFSPath(startPath, config); if (ofsPath.isVolume()) { String startBucket = ofsStartPath.getBucketName(); return listStatusVolume(ofsPath.getVolumeName(), @@ -1056,7 +1090,7 @@ private static FileStatusAdapter getFileStatusAdapterForVolume( return new FileStatusAdapter(0L, 0L, path, true, (short)0, 0L, ozoneVolume.getCreationTime().getEpochSecond() * 1000, 0L, FsPermission.getDirDefault().toShort(), - owner, group, path, new BlockLocation[0], false, false + owner, group, null, new BlockLocation[0], false, false ); } @@ -1082,7 +1116,7 @@ private static FileStatusAdapter getFileStatusAdapterForBucket( return new FileStatusAdapter(0L, 0L, path, true, (short)0, 0L, ozoneBucket.getCreationTime().getEpochSecond() * 1000, 0L, FsPermission.getDirDefault().toShort(), - owner, group, path, new BlockLocation[0], + owner, group, null, new BlockLocation[0], !StringUtils.isEmpty(ozoneBucket.getEncryptionKeyName()), ozoneBucket.getReplicationConfig() != null && ozoneBucket.getReplicationConfig().getReplicationType() == @@ -1116,7 +1150,7 @@ public FileChecksum getFileChecksum(String keyName, long length) OzoneClientConfig.ChecksumCombineMode combineMode = config.getObject(OzoneClientConfig.class).getChecksumCombineMode(); - OFSPath ofsPath = new OFSPath(keyName); + OFSPath ofsPath = new OFSPath(keyName, config); OzoneVolume volume = objectStore.getVolume(ofsPath.getVolumeName()); OzoneBucket bucket = getBucket(ofsPath, false); diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneFileSystem.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneFileSystem.java index dc7c12ed9b83..764098451c8d 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneFileSystem.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneFileSystem.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.utils.LegacyHadoopConfigurationSource; import org.apache.hadoop.ozone.OFSPath; +import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -251,6 +252,13 @@ public FSDataOutputStream createNonRecursive(Path path, private FSDataOutputStream createOutputStream(String key, short replication, boolean overwrite, boolean recursive) throws IOException { + boolean isRatisStreamingEnabled = getConf().getBoolean( + OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED, + OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED_DEFAULT); + if (isRatisStreamingEnabled) { + return new FSDataOutputStream(adapter.createStreamFile(key, + replication, overwrite, recursive), statistics); + } return new FSDataOutputStream(adapter.createFile(key, replication, overwrite, recursive), statistics); } @@ -275,7 +283,8 @@ private class RenameIterator extends OzoneListingIterator { this.dstPath = pathToKey(dstPath); LOG.trace("rename from:{} to:{}", this.srcPath, this.dstPath); // Initialize bucket here to reduce number of RPC calls - OFSPath ofsPath = new OFSPath(srcPath); + OFSPath ofsPath = new OFSPath(srcPath, + OzoneConfiguration.of(getConfSource())); // TODO: Refactor later. adapterImpl = (BasicRootedOzoneClientAdapterImpl) adapter; this.bucket = adapterImpl.getBucket(ofsPath, false); @@ -320,8 +329,10 @@ public boolean rename(Path src, Path dst) throws IOException { } // src and dst should be in the same bucket - OFSPath ofsSrc = new OFSPath(src); - OFSPath ofsDst = new OFSPath(dst); + OFSPath ofsSrc = new OFSPath(src, + OzoneConfiguration.of(getConfSource())); + OFSPath ofsDst = new OFSPath(dst, + OzoneConfiguration.of(getConfSource())); if (!ofsSrc.isInSameBucketAs(ofsDst)) { throw new IOException("Cannot rename a key to a different bucket"); } @@ -468,7 +479,8 @@ && listStatus(f).length != 0) { throw new PathIsNotEmptyDirectoryException(f.toString()); } // Initialize bucket here to reduce number of RPC calls - OFSPath ofsPath = new OFSPath(f); + OFSPath ofsPath = new OFSPath(f, + OzoneConfiguration.of(getConfSource())); // TODO: Refactor later. adapterImpl = (BasicRootedOzoneClientAdapterImpl) adapter; this.bucket = adapterImpl.getBucket(ofsPath, false); @@ -498,7 +510,8 @@ private class DeleteIteratorWithFSO extends OzoneListingIterator { this.f = f; this.recursive = recursive; // Initialize bucket here to reduce number of RPC calls - OFSPath ofsPath = new OFSPath(f); + OFSPath ofsPath = new OFSPath(f, + OzoneConfiguration.of(getConfSource())); adapterImpl = (BasicRootedOzoneClientAdapterImpl) adapter; this.bucket = adapterImpl.getBucket(ofsPath, false); LOG.debug("Deleting bucket with name {} is via DeleteIteratorWithFSO.", @@ -529,7 +542,8 @@ private class DeleteIteratorFactory { DeleteIteratorFactory(Path f, boolean recursive) { this.path = f; this.recursive = recursive; - this.ofsPath = new OFSPath(f); + this.ofsPath = new OFSPath(f, + OzoneConfiguration.of(getConfSource())); } OzoneListingIterator getDeleteIterator() @@ -594,7 +608,9 @@ public boolean delete(Path f, boolean recursive) throws IOException { if (status.isDirectory()) { LOG.debug("delete: Path is a directory: {}", f); - OFSPath ofsPath = new OFSPath(key); + + OFSPath ofsPath = new OFSPath(key, + OzoneConfiguration.of(getConfSource())); // Handle rm root if (ofsPath.isRoot()) { @@ -835,7 +851,8 @@ public String getUsername() { */ @Override public Path getTrashRoot(Path path) { - OFSPath ofsPath = new OFSPath(path); + OFSPath ofsPath = new OFSPath(path, + OzoneConfiguration.of(getConfSource())); return ofsPath.getTrashRoot(); } @@ -1242,7 +1259,8 @@ boolean iterate() throws IOException { OZONE_FS_ITERATE_BATCH_SIZE_DEFAULT); if (status.isDir()) { LOG.trace("Iterating directory: {}", pathKey); - OFSPath ofsPath = new OFSPath(pathKey); + OFSPath ofsPath = new OFSPath(pathKey, + OzoneConfiguration.of(getConfSource())); String ofsPathPrefix = ofsPath.getNonKeyPathNoPrefixDelim() + OZONE_URI_DELIMITER; if (isFSO) { @@ -1250,7 +1268,8 @@ boolean iterate() throws IOException { fileStatuses = listStatusAdapter(path); for (FileStatusAdapter fileStatus : fileStatuses) { String keyName = - new OFSPath(fileStatus.getPath().toString()).getKeyName(); + new OFSPath(fileStatus.getPath().toString(), + OzoneConfiguration.of(getConfSource())).getKeyName(); keyPathList.add(ofsPathPrefix + keyName); } if (keyPathList.size() >= batchSize) { @@ -1323,28 +1342,27 @@ public boolean isNumber(String number) { return true; } - FileStatus convertFileStatus(FileStatusAdapter fileStatusAdapter) { - Path symLink = null; - try { - fileStatusAdapter.getSymlink(); - } catch (Exception ex) { - //NOOP: If not symlink symlink remains null. - } - - FileStatus fileStatus = new FileStatus( - fileStatusAdapter.getLength(), - fileStatusAdapter.isDir(), - fileStatusAdapter.getBlockReplication(), - fileStatusAdapter.getBlocksize(), - fileStatusAdapter.getModificationTime(), - fileStatusAdapter.getAccessTime(), - new FsPermission(fileStatusAdapter.getPermission()), - fileStatusAdapter.getOwner(), - fileStatusAdapter.getGroup(), - symLink, - fileStatusAdapter.getPath() + protected FileStatus constructFileStatus( + FileStatusAdapter fileStatusAdapter) { + return new FileStatus(fileStatusAdapter.getLength(), + fileStatusAdapter.isDir(), + fileStatusAdapter.getBlockReplication(), + fileStatusAdapter.getBlocksize(), + fileStatusAdapter.getModificationTime(), + fileStatusAdapter.getAccessTime(), + new FsPermission(fileStatusAdapter.getPermission()), + fileStatusAdapter.getOwner(), + fileStatusAdapter.getGroup(), + fileStatusAdapter.getSymlink(), + fileStatusAdapter.getPath(), + false, + fileStatusAdapter.isEncrypted(), + fileStatusAdapter.isErasureCoded() ); + } + FileStatus convertFileStatus(FileStatusAdapter fileStatusAdapter) { + FileStatus fileStatus = constructFileStatus(fileStatusAdapter); BlockLocation[] blockLocations = fileStatusAdapter.getBlockLocations(); if (blockLocations == null || blockLocations.length == 0) { return fileStatus; diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneClientAdapter.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneClientAdapter.java index 31bf351f01a6..24566cb83f8b 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneClientAdapter.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneClientAdapter.java @@ -45,6 +45,9 @@ public interface OzoneClientAdapter { OzoneFSOutputStream createFile(String key, short replication, boolean overWrite, boolean recursive) throws IOException; + OzoneFSDataStreamOutput createStreamFile(String key, short replication, + boolean overWrite, boolean recursive) throws IOException; + void renameKey(String key, String newKeyName) throws IOException; // Users should use rename instead of renameKey in OFS. diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneFSDataStreamOutput.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneFSDataStreamOutput.java new file mode 100644 index 000000000000..515dbca92b42 --- /dev/null +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneFSDataStreamOutput.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.ozone; + +import org.apache.hadoop.hdds.scm.storage.ByteBufferStreamOutput; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +/** + * The ByteBuffer output stream for Ozone file system. + */ +public class OzoneFSDataStreamOutput extends OutputStream + implements ByteBufferStreamOutput { + + private final ByteBufferStreamOutput byteBufferStreamOutput; + + public OzoneFSDataStreamOutput( + ByteBufferStreamOutput byteBufferStreamOutput) { + this.byteBufferStreamOutput = byteBufferStreamOutput; + } + + /** + * Try to write the [off:off + len) slice in ByteBuf b to DataStream. + * + * @param b the data. + * @param off the start offset in the data. + * @param len the number of bytes to write. + * @throws IOException if an I/O error occurs. + */ + @Override + public void write(ByteBuffer b, int off, int len) + throws IOException { + byteBufferStreamOutput.write(b, off, len); + } + + /** + * Writes the specified byte to this output stream. The general + * contract for write is that one byte is written + * to the output stream. The byte to be written is the eight + * low-order bits of the argument b. The 24 + * high-order bits of b are ignored. + *

+ * Subclasses of OutputStream must provide an + * implementation for this method. + * + * @param b the byte. + * @throws IOException if an I/O error occurs. In particular, + * an IOException may be thrown if the + * output stream has been closed. + */ + @Override + public void write(int b) throws IOException { + byte[] singleBytes = new byte[1]; + singleBytes[0] = (byte) b; + byteBufferStreamOutput.write(ByteBuffer.wrap(singleBytes)); + } + + /** + * Flushes this DataStream output and forces any buffered output bytes + * to be written out. + * + * @throws IOException if an I/O error occurs. + */ + @Override + public void flush() throws IOException { + byteBufferStreamOutput.flush(); + } + + /** + * Closes this stream and releases any system resources associated + * with it. If the stream is already closed then invoking this + * method has no effect. + * + *

As noted in {@link AutoCloseable#close()}, cases where the + * close may fail require careful attention. It is strongly advised + * to relinquish the underlying resources and to internally + * mark the {@code Closeable} as closed, prior to throwing + * the {@code IOException}. + * + * @throws IOException if an I/O error occurs + */ + @Override + public void close() throws IOException { + byteBufferStreamOutput.close(); + } +} diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneFSOutputStream.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneFSOutputStream.java index efbf93beb5a5..b75ad63cb500 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneFSOutputStream.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneFSOutputStream.java @@ -18,6 +18,9 @@ package org.apache.hadoop.fs.ozone; +import org.apache.hadoop.fs.Syncable; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; + import java.io.IOException; import java.io.OutputStream; @@ -28,11 +31,11 @@ * TODO: Make outputStream generic for both rest and rpc clients * This class is not thread safe. */ -public class OzoneFSOutputStream extends OutputStream { +public class OzoneFSOutputStream extends OutputStream implements Syncable { - private final OutputStream outputStream; + private final OzoneOutputStream outputStream; - public OzoneFSOutputStream(OutputStream outputStream) { + public OzoneFSOutputStream(OzoneOutputStream outputStream) { this.outputStream = outputStream; } @@ -55,4 +58,14 @@ public synchronized void flush() throws IOException { public synchronized void close() throws IOException { outputStream.close(); } + + @Override + public void hflush() throws IOException { + hsync(); + } + + @Override + public void hsync() throws IOException { + outputStream.hsync(); + } } diff --git a/hadoop-ozone/ozonefs-common/src/test/java/org/apache/hadoop/fs/ozone/TestOFSPath.java b/hadoop-ozone/ozonefs-common/src/test/java/org/apache/hadoop/fs/ozone/TestOFSPath.java index bfa968c5bbb7..5fcd4710bcf3 100644 --- a/hadoop-ozone/ozonefs-common/src/test/java/org/apache/hadoop/fs/ozone/TestOFSPath.java +++ b/hadoop-ozone/ozonefs-common/src/test/java/org/apache/hadoop/fs/ozone/TestOFSPath.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.fs.ozone; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.OFSPath; import org.junit.Assert; import org.junit.Test; @@ -28,10 +29,12 @@ */ public class TestOFSPath { + private OzoneConfiguration conf = new OzoneConfiguration(); + @Test public void testParsingPathWithSpace() { // Two most common cases: file key and dir key inside a bucket - OFSPath ofsPath = new OFSPath("/volume1/bucket2/dir3/key4 space"); + OFSPath ofsPath = new OFSPath("/volume1/bucket2/dir3/key4 space", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals("volume1", ofsPath.getVolumeName()); Assert.assertEquals("bucket2", ofsPath.getBucketName()); @@ -44,7 +47,7 @@ public void testParsingPathWithSpace() { @Test public void testParsingVolumeBucketWithKey() { // Two most common cases: file key and dir key inside a bucket - OFSPath ofsPath = new OFSPath("/volume1/bucket2/dir3/key4"); + OFSPath ofsPath = new OFSPath("/volume1/bucket2/dir3/key4", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals("volume1", ofsPath.getVolumeName()); Assert.assertEquals("bucket2", ofsPath.getBucketName()); @@ -54,7 +57,7 @@ public void testParsingVolumeBucketWithKey() { Assert.assertEquals("/volume1/bucket2/dir3/key4", ofsPath.toString()); // The ending '/' matters for key inside a bucket, indicating directory - ofsPath = new OFSPath("/volume1/bucket2/dir3/dir5/"); + ofsPath = new OFSPath("/volume1/bucket2/dir3/dir5/", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals("volume1", ofsPath.getVolumeName()); Assert.assertEquals("bucket2", ofsPath.getBucketName()); @@ -68,7 +71,7 @@ public void testParsingVolumeBucketWithKey() { @Test public void testParsingVolumeBucketOnly() { // Volume and bucket only - OFSPath ofsPath = new OFSPath("/volume1/bucket2/"); + OFSPath ofsPath = new OFSPath("/volume1/bucket2/", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals("volume1", ofsPath.getVolumeName()); Assert.assertEquals("bucket2", ofsPath.getBucketName()); @@ -79,7 +82,7 @@ public void testParsingVolumeBucketOnly() { Assert.assertEquals("/volume1/bucket2/", ofsPath.toString()); // The trailing '/' doesn't matter when parsing a bucket path - ofsPath = new OFSPath("/volume1/bucket2"); + ofsPath = new OFSPath("/volume1/bucket2", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals("volume1", ofsPath.getVolumeName()); Assert.assertEquals("bucket2", ofsPath.getBucketName()); @@ -93,7 +96,7 @@ public void testParsingVolumeBucketOnly() { @Test public void testParsingVolumeOnly() { // Volume only - OFSPath ofsPath = new OFSPath("/volume1/"); + OFSPath ofsPath = new OFSPath("/volume1/", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals("volume1", ofsPath.getVolumeName()); Assert.assertEquals("", ofsPath.getBucketName()); @@ -104,7 +107,7 @@ public void testParsingVolumeOnly() { Assert.assertEquals("/volume1/", ofsPath.toString()); // The trailing '/' doesn't matter when parsing a volume path - ofsPath = new OFSPath("/volume1"); + ofsPath = new OFSPath("/volume1", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals("volume1", ofsPath.getVolumeName()); Assert.assertEquals("", ofsPath.getBucketName()); @@ -120,7 +123,7 @@ public void testParsingVolumeOnly() { @Test public void testParsingEmptyInput() { - OFSPath ofsPath = new OFSPath(""); + OFSPath ofsPath = new OFSPath("", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals("", ofsPath.getVolumeName()); Assert.assertEquals("", ofsPath.getBucketName()); @@ -133,7 +136,8 @@ public void testParsingEmptyInput() { @Test public void testParsingWithAuthority() { - OFSPath ofsPath = new OFSPath("ofs://svc1:9876/volume1/bucket2/dir3/"); + OFSPath ofsPath = new OFSPath("ofs://svc1:9876/volume1/bucket2/dir3/", + conf); Assert.assertEquals("svc1:9876", ofsPath.getAuthority()); Assert.assertEquals("volume1", ofsPath.getVolumeName()); Assert.assertEquals("bucket2", ofsPath.getBucketName()); @@ -155,7 +159,7 @@ public void testParsingMount() { bucketName = ""; // Make javac happy } // Mount only - OFSPath ofsPath = new OFSPath("/tmp/"); + OFSPath ofsPath = new OFSPath("/tmp/", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals( OFSPath.OFS_MOUNT_TMP_VOLUMENAME, ofsPath.getVolumeName()); @@ -167,7 +171,7 @@ public void testParsingMount() { Assert.assertEquals("/tmp/", ofsPath.toString()); // Mount with key - ofsPath = new OFSPath("/tmp/key1"); + ofsPath = new OFSPath("/tmp/key1", conf); Assert.assertEquals("", ofsPath.getAuthority()); Assert.assertEquals( OFSPath.OFS_MOUNT_TMP_VOLUMENAME, ofsPath.getVolumeName()); diff --git a/hadoop-ozone/ozonefs-hadoop2/pom.xml b/hadoop-ozone/ozonefs-hadoop2/pom.xml index 30b743a50142..c58dd2185a0a 100644 --- a/hadoop-ozone/ozonefs-hadoop2/pom.xml +++ b/hadoop-ozone/ozonefs-hadoop2/pom.xml @@ -19,12 +19,12 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-filesystem-hadoop2 Apache Ozone FS Hadoop 2.x compatibility jar - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT true org.apache.hadoop.ozone.shaded @@ -53,19 +53,19 @@ org.apache.hadoop hadoop-common provided - 2.7.3 + ${hadoop2.version} org.apache.hadoop hadoop-annotations provided - 2.7.3 + ${hadoop2.version} org.apache.hadoop hadoop-auth provided - 2.7.3 + ${hadoop2.version} ch.qos.reload4j diff --git a/hadoop-ozone/ozonefs-hadoop2/src/main/java/org/apache/hadoop/fs/ozone/RootedOzoneFileSystem.java b/hadoop-ozone/ozonefs-hadoop2/src/main/java/org/apache/hadoop/fs/ozone/RootedOzoneFileSystem.java index 67aa705ab80f..249b2767bde9 100644 --- a/hadoop-ozone/ozonefs-hadoop2/src/main/java/org/apache/hadoop/fs/ozone/RootedOzoneFileSystem.java +++ b/hadoop-ozone/ozonefs-hadoop2/src/main/java/org/apache/hadoop/fs/ozone/RootedOzoneFileSystem.java @@ -17,9 +17,27 @@ */ package org.apache.hadoop.fs.ozone; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.permission.FsPermission; + /** * Minimal Rooted Ozone File System compatible with Hadoop 2.x. */ public class RootedOzoneFileSystem extends BasicRootedOzoneFileSystem { - + @Override + protected FileStatus constructFileStatus( + FileStatusAdapter fileStatusAdapter) { + return new FileStatus(fileStatusAdapter.getLength(), + fileStatusAdapter.isDir(), + fileStatusAdapter.getBlockReplication(), + fileStatusAdapter.getBlocksize(), + fileStatusAdapter.getModificationTime(), + fileStatusAdapter.getAccessTime(), + new FsPermission(fileStatusAdapter.getPermission()), + fileStatusAdapter.getOwner(), + fileStatusAdapter.getGroup(), + fileStatusAdapter.getSymlink(), + fileStatusAdapter.getPath() + ); + } } diff --git a/hadoop-ozone/ozonefs-hadoop3-client/pom.xml b/hadoop-ozone/ozonefs-hadoop3-client/pom.xml index 213fdceed0b6..134395079b2e 100644 --- a/hadoop-ozone/ozonefs-hadoop3-client/pom.xml +++ b/hadoop-ozone/ozonefs-hadoop3-client/pom.xml @@ -19,7 +19,7 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT org.apache.hadoop.ozone.shaded diff --git a/hadoop-ozone/ozonefs-shaded/pom.xml b/hadoop-ozone/ozonefs-shaded/pom.xml index 0c94d6e5ca1a..ef7a92fab0a0 100644 --- a/hadoop-ozone/ozonefs-shaded/pom.xml +++ b/hadoop-ozone/ozonefs-shaded/pom.xml @@ -19,12 +19,12 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-filesystem-shaded Apache Ozone FileSystem Shaded jar - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT true diff --git a/hadoop-ozone/ozonefs/pom.xml b/hadoop-ozone/ozonefs/pom.xml index 393d16f74644..8cf019e05d59 100644 --- a/hadoop-ozone/ozonefs/pom.xml +++ b/hadoop-ozone/ozonefs/pom.xml @@ -19,12 +19,12 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-filesystem Apache Ozone FileSystem jar - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT UTF-8 true diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index 221cd7af0390..c3aefb4afc86 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -16,10 +16,10 @@ org.apache.ozone ozone-main - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Project Apache Ozone pom diff --git a/hadoop-ozone/recon-codegen/pom.xml b/hadoop-ozone/recon-codegen/pom.xml index 3bfaa743a513..802f72683802 100644 --- a/hadoop-ozone/recon-codegen/pom.xml +++ b/hadoop-ozone/recon-codegen/pom.xml @@ -18,7 +18,7 @@ ozone org.apache.ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT 4.0.0 ozone-reconcodegen @@ -35,7 +35,6 @@ org.apache.derby derby - 10.14.2.0 com.google.inject.extensions diff --git a/hadoop-ozone/recon/pom.xml b/hadoop-ozone/recon/pom.xml index e0af9ff94659..6f6755d3267f 100644 --- a/hadoop-ozone/recon/pom.xml +++ b/hadoop-ozone/recon/pom.xml @@ -18,7 +18,7 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Recon 4.0.0 @@ -330,7 +330,6 @@ org.apache.derby derby - 10.14.2.0 org.xerial @@ -353,7 +352,6 @@ org.javassist javassist - 3.21.0-GA diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java index 154e6a1db49f..124e48981d60 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java @@ -25,9 +25,6 @@ import org.apache.hadoop.hdds.StringUtils; import org.apache.hadoop.hdds.cli.GenericCli; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos; -import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.recon.ReconConfig; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.security.x509.SecurityConfig; @@ -35,6 +32,7 @@ import org.apache.hadoop.hdds.security.x509.certificate.client.ReconCertificateClient; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.OzoneSecurityUtil; +import org.apache.hadoop.ozone.recon.scm.ReconSafeModeManager; import org.apache.hadoop.ozone.recon.scm.ReconStorageConfig; import org.apache.hadoop.ozone.recon.metrics.ReconTaskStatusMetrics; import org.apache.hadoop.ozone.recon.spi.OzoneManagerServiceProvider; @@ -48,22 +46,18 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.client.AuthenticationException; -import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.hadoop.ozone.recon.codegen.ReconSchemaGenerationModule; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.net.InetAddress; import java.net.InetSocketAddress; -import java.security.cert.CertificateException; import static org.apache.hadoop.hdds.recon.ReconConfig.ConfigStrings.OZONE_RECON_KERBEROS_KEYTAB_FILE_KEY; import static org.apache.hadoop.hdds.recon.ReconConfig.ConfigStrings.OZONE_RECON_KERBEROS_PRINCIPAL_KEY; -import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec.getX509Certificate; -import static org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString; import static org.apache.hadoop.ozone.common.Storage.StorageState.INITIALIZED; import static org.apache.hadoop.ozone.conf.OzoneServiceConfig.DEFAULT_SHUTDOWN_HOOK_PRIORITY; +import static org.apache.hadoop.util.ExitUtil.terminate; /** * Recon server main class that stops and starts recon services. @@ -79,6 +73,7 @@ public class ReconServer extends GenericCli { private ReconDBProvider reconDBProvider; private ReconNamespaceSummaryManager reconNamespaceSummaryManager; private OzoneStorageContainerManager reconStorageContainerManager; + private ReconSafeModeManager reconSafeModeMgr; private OzoneConfiguration configuration; private ReconStorageConfig reconStorage; private CertificateClient certClient; @@ -102,8 +97,7 @@ public Void call() throws Exception { configuration = createOzoneConfiguration(); ConfigurationProvider.setConfiguration(configuration); - injector = Guice.createInjector(new - ReconControllerModule(), + injector = Guice.createInjector(new ReconControllerModule(), new ReconRestServletModule(configuration), new ReconSchemaGenerationModule()); @@ -124,7 +118,6 @@ public Void call() throws Exception { "Initializing certificate."); initializeCertificateClient(configuration); } - reconStorage.persistCurrentState(); } catch (Exception e) { LOG.error("Error during initializing Recon certificate", e); } @@ -139,11 +132,14 @@ public Void call() throws Exception { LOG.info("Creating Recon Schema."); reconSchemaManager.createReconSchema(); + this.reconSafeModeMgr = injector.getInstance(ReconSafeModeManager.class); + this.reconSafeModeMgr.setInSafeMode(true); httpServer = injector.getInstance(ReconHttpServer.class); this.ozoneManagerServiceProvider = injector.getInstance(OzoneManagerServiceProvider.class); this.reconStorageContainerManager = injector.getInstance(OzoneStorageContainerManager.class); + this.reconTaskStatusMetrics = injector.getInstance(ReconTaskStatusMetrics.class); LOG.info("Recon server initialized successfully!"); @@ -172,9 +168,9 @@ public Void call() throws Exception { private void initializeCertificateClient(OzoneConfiguration conf) throws IOException { LOG.info("Initializing secure Recon."); - certClient = new ReconCertificateClient( - new SecurityConfig(configuration), - reconStorage.getReconCertSerialId()); + certClient = new ReconCertificateClient(new SecurityConfig(configuration), + reconStorage.getReconCertSerialId(), reconStorage.getClusterID(), + reconStorage.getReconId(), this::saveNewCertId, null); CertificateClient.InitResponse response = certClient.init(); if (response.equals(CertificateClient.InitResponse.REINIT)) { @@ -182,7 +178,8 @@ private void initializeCertificateClient(OzoneConfiguration conf) reconStorage.unsetReconCertSerialId(); reconStorage.persistCurrentState(); certClient = new ReconCertificateClient(new SecurityConfig(configuration), - reconStorage.getReconCertSerialId()); + reconStorage.getReconCertSerialId(), reconStorage.getClusterID(), + reconStorage.getReconId(), this::saveNewCertId, this::terminateRecon); response = certClient.init(); } LOG.info("Init response: {}", response); @@ -191,7 +188,12 @@ private void initializeCertificateClient(OzoneConfiguration conf) LOG.info("Initialization successful, case:{}.", response); break; case GETCERT: - getSCMSignedCert(conf); + String certId = certClient.signAndStoreCertificate( + certClient.getCSRBuilder().build()); + reconStorage.setReconCertSerialId(certId); + reconStorage.persistCurrentState(); + // set new certificate ID + certClient.setCertificateId(certId); LOG.info("Successfully stored SCM signed certificate, case:{}.", response); break; @@ -209,53 +211,23 @@ private void initializeCertificateClient(OzoneConfiguration conf) } } - /** - * Get SCM signed certificate and store it using certificate client. - * @param config - * */ - private void getSCMSignedCert(OzoneConfiguration config) { + public void saveNewCertId(String newCertId) { try { - PKCS10CertificationRequest csr = ReconUtils.getCSR(config, certClient); - LOG.info("Creating CSR for Recon."); - - SCMSecurityProtocolClientSideTranslatorPB secureScmClient = - HddsServerUtil.getScmSecurityClientWithMaxRetry(config); - HddsProtos.NodeDetailsProto.Builder reconDetailsProtoBuilder = - HddsProtos.NodeDetailsProto.newBuilder() - .setHostName(InetAddress.getLocalHost().getHostName()) - .setClusterId(reconStorage.getClusterID()) - .setUuid(reconStorage.getReconId()) - .setNodeType(HddsProtos.NodeType.RECON); - - SCMSecurityProtocolProtos.SCMGetCertResponseProto response = - secureScmClient.getCertificateChain( - reconDetailsProtoBuilder.build(), - getEncodedString(csr)); - // Persist certificates. - if (response.hasX509CACertificate()) { - String pemEncodedCert = response.getX509Certificate(); - certClient.storeCertificate(pemEncodedCert, true); - certClient.storeCertificate(response.getX509CACertificate(), true, - true); - - // Store Root CA certificate. - if (response.hasX509RootCACertificate()) { - certClient.storeRootCACertificate( - response.getX509RootCACertificate(), true); - } - String reconCertSerialId = getX509Certificate(pemEncodedCert). - getSerialNumber().toString(); - reconStorage.setReconCertSerialId(reconCertSerialId); - } else { - throw new RuntimeException("Unable to retrieve recon certificate " + - "chain"); - } - } catch (IOException | CertificateException e) { - LOG.error("Error while storing SCM signed certificate.", e); - throw new RuntimeException(e); + reconStorage.setReconCertSerialId(newCertId); + reconStorage.persistCurrentState(); + } catch (IOException ex) { + // New cert ID cannot be persisted into VERSION file. + LOG.error("Failed to persist new cert ID {} to VERSION file." + + "Terminating OzoneManager...", newCertId, ex); + terminateRecon(); } } + public void terminateRecon() { + stop(); + terminate(1); + } + /** * Need a way to restart services from tests. */ @@ -278,17 +250,26 @@ public void start() throws Exception { } } - public void stop() throws Exception { + public void stop() { if (isStarted) { LOG.info("Stopping Recon server"); if (httpServer != null) { - httpServer.stop(); + try { + httpServer.stop(); + } catch (Exception e) { + LOG.error("Stopping HttpServer is failed.", e); + } } + if (reconStorageContainerManager != null) { reconStorageContainerManager.stop(); } if (ozoneManagerServiceProvider != null) { - ozoneManagerServiceProvider.stop(); + try { + ozoneManagerServiceProvider.stop(); + } catch (Exception e) { + LOG.error("Stopping ozoneManagerServiceProvider is failed.", e); + } } if (reconTaskStatusMetrics != null) { reconTaskStatusMetrics.unregister(); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java index c9e31563d34c..2c97a0dfd3b2 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java @@ -149,6 +149,18 @@ public final class ReconServerConfigKeys { public static final long OZONE_RECON_NSSUMMARY_FLUSH_TO_DB_MAX_THRESHOLD_DEFAULT = 150 * 1000L; + + public static final String OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DELAY = + "ozone.recon.scm.snapshot.task.interval.delay"; + + public static final String OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DEFAULT + = "24h"; + + public static final String OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY = + "ozone.recon.scm.snapshot.task.initial.delay"; + + public static final String + OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY_DEFAULT = "1m"; /** * Private constructor for utility class. */ diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ClusterStateEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ClusterStateEndpoint.java index c7b2374e187a..ba3e28d4293b 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ClusterStateEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ClusterStateEndpoint.java @@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.recon.api; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; @@ -90,13 +91,15 @@ public Response getClusterState() { List datanodeDetails = nodeManager.getAllNodes(); int containers = this.containerManager.getContainers().size(); int pipelines = this.pipelineManager.getPipelines().size(); - List unhealthyContainers = containerHealthSchemaManager + List missingContainers = containerHealthSchemaManager .getUnhealthyContainers( ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, 0, MISSING_CONTAINER_COUNT_LIMIT); - int totalMissingContainerCount = unhealthyContainers.size() == + int totalMissingContainerCount = missingContainers.size() == MISSING_CONTAINER_COUNT_LIMIT ? - MISSING_CONTAINER_COUNT_LIMIT : unhealthyContainers.size(); + MISSING_CONTAINER_COUNT_LIMIT : missingContainers.size(); + int openContainersCount = this.containerManager.getContainerStateCount( + HddsProtos.LifeCycleState.OPEN); int healthyDatanodes = nodeManager.getNodeCount(NodeStatus.inServiceHealthy()) + nodeManager.getNodeCount(NodeStatus.inServiceHealthyReadOnly()); @@ -139,6 +142,7 @@ public Response getClusterState() { .setMissingContainers(totalMissingContainerCount) .setTotalDatanodes(datanodeDetails.size()) .setHealthyDatanodes(healthyDatanodes) + .setOpenContainers(openContainersCount) .build(); return Response.ok(response).build(); } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NSSummaryEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NSSummaryEndpoint.java index aa2699f8bbdd..5b104c461158 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NSSummaryEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/NSSummaryEndpoint.java @@ -80,8 +80,10 @@ public Response getBasicInfo( NamespaceSummaryResponse namespaceSummaryResponse; if (!isInitializationComplete()) { namespaceSummaryResponse = - new NamespaceSummaryResponse(EntityType.UNKNOWN); - namespaceSummaryResponse.setStatus(ResponseStatus.INITIALIZING); + NamespaceSummaryResponse.newBuilder() + .setEntityType(EntityType.UNKNOWN) + .setStatus(ResponseStatus.INITIALIZING) + .build(); return Response.ok(namespaceSummaryResponse).build(); } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/BucketEntityHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/BucketEntityHandler.java index 36f1636af0f6..7ad961195ee7 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/BucketEntityHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/BucketEntityHandler.java @@ -19,12 +19,15 @@ import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.recon.api.types.BucketObjectDBInfo; +import org.apache.hadoop.ozone.recon.api.types.CountStats; import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse; import org.apache.hadoop.ozone.recon.api.types.EntityType; import org.apache.hadoop.ozone.recon.api.types.DUResponse; import org.apache.hadoop.ozone.recon.api.types.QuotaUsageResponse; import org.apache.hadoop.ozone.recon.api.types.FileSizeDistributionResponse; import org.apache.hadoop.ozone.recon.api.types.NSSummary; +import org.apache.hadoop.ozone.recon.api.types.ResponseStatus; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager; @@ -49,16 +52,37 @@ public BucketEntityHandler( @Override public NamespaceSummaryResponse getSummaryResponse() throws IOException { - NamespaceSummaryResponse namespaceSummaryResponse = - new NamespaceSummaryResponse(EntityType.BUCKET); + String[] names = getNames(); assert (names.length == 2); long bucketObjectId = getBucketHandler().getBucketObjectId(names); - namespaceSummaryResponse - .setNumTotalDir(getTotalDirCount(bucketObjectId)); - namespaceSummaryResponse.setNumTotalKey(getTotalKeyCount(bucketObjectId)); - return namespaceSummaryResponse; + CountStats countStats = new CountStats( + -1, -1, + getTotalDirCount(bucketObjectId), getTotalKeyCount(bucketObjectId)); + return NamespaceSummaryResponse.newBuilder() + .setEntityType(EntityType.BUCKET) + .setCountStats(countStats) + .setObjectDBInfo(getBucketObjDbInfo(names)) + .setStatus(ResponseStatus.OK) + .build(); + } + + private BucketObjectDBInfo getBucketObjDbInfo(String[] names) + throws IOException { + String volName = names[0]; + String bucketName = names[1]; + String bucketKey = getOmMetadataManager(). + getBucketKey(volName, bucketName); + if (null == bucketKey) { + return new BucketObjectDBInfo(); + } + OmBucketInfo omBucketInfo = getOmMetadataManager() + .getBucketTable().getSkipCache(bucketKey); + if (null == omBucketInfo) { + return new BucketObjectDBInfo(); + } + return new BucketObjectDBInfo(omBucketInfo); } @Override diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/BucketHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/BucketHandler.java index d12c3cdb6ad3..737744155673 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/BucketHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/BucketHandler.java @@ -17,14 +17,10 @@ */ package org.apache.hadoop.ozone.recon.api.handlers; -import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; -import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; -import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; @@ -97,6 +93,9 @@ public abstract long getDirObjectId(String[] names, int cutoff) public abstract OmKeyInfo getKeyInfo(String[] names) throws IOException; + public abstract OmDirectoryInfo getDirInfo(String[] names) + throws IOException; + /** * Fixing the existing path and appending the next level entity to it. * @param path @@ -115,27 +114,6 @@ public static String buildSubpath(String path, String nextLevel) { return subpath; } - public long getKeySizeWithReplication(OmKeyInfo keyInfo) { - OmKeyLocationInfoGroup locationGroup = keyInfo.getLatestVersionLocations(); - List keyLocations = - locationGroup.getBlocksLatestVersionOnly(); - long du = 0L; - // a key could be too large to fit in one single container - for (OmKeyLocationInfo location: keyLocations) { - BlockID block = location.getBlockID(); - ContainerID containerId = new ContainerID(block.getContainerID()); - try { - int replicationFactor = - containerManager.getContainerReplicas(containerId).size(); - long blockSize = location.getLength() * replicationFactor; - du += blockSize; - } catch (ContainerNotFoundException cnfe) { - LOG.warn("Cannot find container {}", block.getContainerID(), cnfe); - } - } - return du; - } - /** * Example: /vol1/buck1/a/b/c/d/e/file1.txt -> a/b/c/d/e/file1.txt. * @param names parsed request diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/DirectoryEntityHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/DirectoryEntityHandler.java index 0cfa6f1b47fc..fc7022e2dab2 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/DirectoryEntityHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/DirectoryEntityHandler.java @@ -18,8 +18,11 @@ package org.apache.hadoop.ozone.recon.api.handlers; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.recon.api.types.CountStats; import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse; import org.apache.hadoop.ozone.recon.api.types.EntityType; +import org.apache.hadoop.ozone.recon.api.types.ObjectDBInfo; import org.apache.hadoop.ozone.recon.api.types.ResponseStatus; import org.apache.hadoop.ozone.recon.api.types.DUResponse; import org.apache.hadoop.ozone.recon.api.types.NSSummary; @@ -55,13 +58,24 @@ public NamespaceSummaryResponse getSummaryResponse() throws IOException { // path should exist so we don't need any extra verification/null check long dirObjectId = getBucketHandler().getDirObjectId(getNames()); - NamespaceSummaryResponse namespaceSummaryResponse = - new NamespaceSummaryResponse(EntityType.DIRECTORY); - namespaceSummaryResponse - .setNumTotalDir(getTotalDirCount(dirObjectId)); - namespaceSummaryResponse.setNumTotalKey(getTotalKeyCount(dirObjectId)); + CountStats countStats = new CountStats( + -1, -1, + getTotalDirCount(dirObjectId), getTotalKeyCount(dirObjectId)); + return NamespaceSummaryResponse.newBuilder() + .setEntityType(EntityType.DIRECTORY) + .setCountStats(countStats) + .setObjectDBInfo(getDirectoryObjDbInfo(getNames())) + .setStatus(ResponseStatus.OK) + .build(); + } - return namespaceSummaryResponse; + private ObjectDBInfo getDirectoryObjDbInfo(String[] names) + throws IOException { + OmDirectoryInfo omDirectoryInfo = getBucketHandler().getDirInfo(names); + if (null == omDirectoryInfo) { + return new ObjectDBInfo(); + } + return new ObjectDBInfo(omDirectoryInfo); } @Override diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/FSOBucketHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/FSOBucketHandler.java index ba188b70bdb6..147e785a4bc9 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/FSOBucketHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/FSOBucketHandler.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.ozone.recon.api.handlers; +import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; @@ -143,7 +144,7 @@ public long calculateDUUnderObject(long parentId) } OmKeyInfo keyInfo = kv.getValue(); if (keyInfo != null) { - totalDU += getKeySizeWithReplication(keyInfo); + totalDU += keyInfo.getReplicatedSize(); } } } @@ -212,7 +213,7 @@ public long handleDirectKeys(long parentId, boolean withReplica, diskUsage.setSize(keyInfo.getDataSize()); if (withReplica) { - long keyDU = getKeySizeWithReplication(keyInfo); + long keyDU = keyInfo.getReplicatedSize(); keyDataSizeWithReplica += keyDU; diskUsage.setSizeWithReplica(keyDU); } @@ -276,4 +277,23 @@ public OmKeyInfo getKeyInfo(String[] names) throws IOException { parentObjectId, fileName); return getOmMetadataManager().getFileTable().getSkipCache(ozoneKey); } + + @Override + public OmDirectoryInfo getDirInfo(String[] names) throws IOException { + String path = OM_KEY_PREFIX; + path += String.join(OM_KEY_PREFIX, names); + Preconditions.checkArgument( + names.length >= 3, + "Path should be a directory: %s", path); + long parentObjectId = getDirObjectId(names, names.length - 1); + String dirKey = getOmMetadataManager().getOzonePathKey( + getVolumeObjectId(names), + getBucketObjectId(names), + parentObjectId, + names[names.length - 1]); + OmDirectoryInfo dirInfo = getOmMetadataManager() + .getDirectoryTable().getSkipCache(dirKey); + return dirInfo; + } + } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/KeyEntityHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/KeyEntityHandler.java index f70945462a39..a687bf3d0bdd 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/KeyEntityHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/KeyEntityHandler.java @@ -19,8 +19,11 @@ import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.recon.api.types.CountStats; +import org.apache.hadoop.ozone.recon.api.types.KeyObjectDBInfo; import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse; import org.apache.hadoop.ozone.recon.api.types.EntityType; +import org.apache.hadoop.ozone.recon.api.types.ObjectDBInfo; import org.apache.hadoop.ozone.recon.api.types.ResponseStatus; import org.apache.hadoop.ozone.recon.api.types.DUResponse; import org.apache.hadoop.ozone.recon.api.types.QuotaUsageResponse; @@ -46,10 +49,24 @@ public KeyEntityHandler( @Override public NamespaceSummaryResponse getSummaryResponse() throws IOException { - NamespaceSummaryResponse namespaceSummaryResponse = - new NamespaceSummaryResponse(EntityType.KEY); + CountStats countStats = new CountStats( + -1, -1, + -1, 0); + return NamespaceSummaryResponse.newBuilder() + .setEntityType(EntityType.KEY) + .setCountStats(countStats) + .setObjectDBInfo(getKeyDbObjectInfo(getNames())) + .setStatus(ResponseStatus.OK) + .build(); + } - return namespaceSummaryResponse; + private ObjectDBInfo getKeyDbObjectInfo(String[] names) + throws IOException { + OmKeyInfo omKeyInfo = getBucketHandler().getKeyInfo(names); + if (null == omKeyInfo) { + return new KeyObjectDBInfo(); + } + return new KeyObjectDBInfo(omKeyInfo); } @Override @@ -64,8 +81,7 @@ public DUResponse getDuResponse( duResponse.setSize(keyInfo.getDataSize()); if (withReplica) { - long keySizeWithReplica = getBucketHandler() - .getKeySizeWithReplication(keyInfo); + long keySizeWithReplica = keyInfo.getReplicatedSize(); duResponse.setSizeWithReplica(keySizeWithReplica); } return duResponse; diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/LegacyBucketHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/LegacyBucketHandler.java index e4d218fed9b3..162fbc2e93d6 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/LegacyBucketHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/LegacyBucketHandler.java @@ -17,11 +17,13 @@ */ package org.apache.hadoop.ozone.recon.api.handlers; +import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OzoneFSUtils; import org.apache.hadoop.ozone.recon.api.types.DUResponse; @@ -163,7 +165,7 @@ public long calculateDUUnderObject(long parentId) if (keyInfo.getKeyName().endsWith(OM_KEY_PREFIX)) { continue; } - totalDU += getKeySizeWithReplication(keyInfo); + totalDU += keyInfo.getReplicatedSize(); } } @@ -248,7 +250,7 @@ public long handleDirectKeys(long parentId, boolean withReplica, diskUsage.setSize(keyInfo.getDataSize()); if (withReplica) { - long keyDU = getKeySizeWithReplication(keyInfo); + long keyDU = keyInfo.getReplicatedSize(); keyDataSizeWithReplica += keyDU; diskUsage.setSizeWithReplica(keyDU); } @@ -322,4 +324,16 @@ public Table getKeyTable() { getOmMetadataManager().getKeyTable(getBucketLayout()); return keyTable; } + + @Override + public OmDirectoryInfo getDirInfo(String[] names) throws IOException { + String path = OM_KEY_PREFIX; + path += String.join(OM_KEY_PREFIX, names); + Preconditions.checkArgument( + names.length >= 3, + "Path should be a directory: %s", path); + return OmDirectoryInfo.newBuilder() + .setName(names[2]) + .build(); + } } \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/RootEntityHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/RootEntityHandler.java index 357f3a30c267..72f0b0a08b18 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/RootEntityHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/RootEntityHandler.java @@ -18,16 +18,22 @@ package org.apache.hadoop.ozone.recon.api.handlers; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmPrefixInfo; import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; import org.apache.hadoop.ozone.recon.ReconConstants; +import org.apache.hadoop.ozone.recon.api.types.CountStats; import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse; import org.apache.hadoop.ozone.recon.api.types.EntityType; import org.apache.hadoop.ozone.recon.api.types.DUResponse; +import org.apache.hadoop.ozone.recon.api.types.ObjectDBInfo; import org.apache.hadoop.ozone.recon.api.types.QuotaUsageResponse; import org.apache.hadoop.ozone.recon.api.types.FileSizeDistributionResponse; +import org.apache.hadoop.ozone.recon.api.types.ResponseStatus; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import java.io.IOException; import java.util.ArrayList; @@ -48,12 +54,9 @@ public RootEntityHandler( @Override public NamespaceSummaryResponse getSummaryResponse() throws IOException { - NamespaceSummaryResponse namespaceSummaryResponse = - new NamespaceSummaryResponse(EntityType.ROOT); + List volumes = listVolumes(); - namespaceSummaryResponse.setNumVolume(volumes.size()); List allBuckets = listBucketsUnderVolume(null); - namespaceSummaryResponse.setNumBucket(allBuckets.size()); int totalNumDir = 0; long totalNumKey = 0L; for (OmBucketInfo bucket : allBuckets) { @@ -61,11 +64,25 @@ public NamespaceSummaryResponse getSummaryResponse() totalNumDir += getTotalDirCount(bucketObjectId); totalNumKey += getTotalKeyCount(bucketObjectId); } + CountStats countStats = new CountStats( + volumes.size(), allBuckets.size(), totalNumDir, totalNumKey); + + return NamespaceSummaryResponse.newBuilder() + .setEntityType(EntityType.ROOT) + .setCountStats(countStats) + .setObjectDBInfo(getPrefixObjDbInfo()) + .setStatus(ResponseStatus.OK) + .build(); + } - namespaceSummaryResponse.setNumTotalDir(totalNumDir); - namespaceSummaryResponse.setNumTotalKey(totalNumKey); - - return namespaceSummaryResponse; + private ObjectDBInfo getPrefixObjDbInfo() + throws IOException { + OmPrefixInfo omPrefixInfo = getOmMetadataManager().getPrefixTable() + .getSkipCache(OzoneConsts.OM_KEY_PREFIX); + if (null == omPrefixInfo) { + return new ObjectDBInfo(); + } + return new ObjectDBInfo(omPrefixInfo); } @Override @@ -127,26 +144,9 @@ public DUResponse getDuResponse( public QuotaUsageResponse getQuotaResponse() throws IOException { QuotaUsageResponse quotaUsageResponse = new QuotaUsageResponse(); - List volumes = listVolumes(); - List buckets = listBucketsUnderVolume(null); - long quotaInBytes = 0L; - long quotaUsedInBytes = 0L; - - for (OmVolumeArgs volume: volumes) { - final long quota = volume.getQuotaInBytes(); - assert (quota >= -1L); - if (quota == -1L) { - // If one volume has unlimited quota, the "root" quota is unlimited. - quotaInBytes = -1L; - break; - } - quotaInBytes += quota; - } - for (OmBucketInfo bucket: buckets) { - long bucketObjectId = bucket.getObjectID(); - quotaUsedInBytes += getTotalSize(bucketObjectId); - } - + SCMNodeStat stats = getReconSCM().getScmNodeManager().getStats(); + long quotaInBytes = stats.getCapacity().get(); + long quotaUsedInBytes = getDuResponse(true, true).getSizeWithReplica(); quotaUsageResponse.setQuota(quotaInBytes); quotaUsageResponse.setQuotaUsed(quotaUsedInBytes); return quotaUsageResponse; diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/UnknownEntityHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/UnknownEntityHandler.java index c1d5dfe4ad1f..b5a5bd9a0be9 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/UnknownEntityHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/UnknownEntityHandler.java @@ -44,11 +44,9 @@ public UnknownEntityHandler( @Override public NamespaceSummaryResponse getSummaryResponse() throws IOException { - NamespaceSummaryResponse namespaceSummaryResponse = - new NamespaceSummaryResponse(EntityType.UNKNOWN); - namespaceSummaryResponse.setStatus(ResponseStatus.PATH_NOT_FOUND); - - return namespaceSummaryResponse; + return NamespaceSummaryResponse.newBuilder() + .setEntityType(EntityType.UNKNOWN) + .setStatus(ResponseStatus.PATH_NOT_FOUND).build(); } @Override diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/VolumeEntityHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/VolumeEntityHandler.java index e8c4c03bc06b..8c9ae62c0a9a 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/VolumeEntityHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/handlers/VolumeEntityHandler.java @@ -21,11 +21,14 @@ import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; import org.apache.hadoop.ozone.recon.ReconConstants; +import org.apache.hadoop.ozone.recon.api.types.CountStats; import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse; import org.apache.hadoop.ozone.recon.api.types.EntityType; import org.apache.hadoop.ozone.recon.api.types.DUResponse; import org.apache.hadoop.ozone.recon.api.types.QuotaUsageResponse; import org.apache.hadoop.ozone.recon.api.types.FileSizeDistributionResponse; +import org.apache.hadoop.ozone.recon.api.types.ResponseStatus; +import org.apache.hadoop.ozone.recon.api.types.VolumeObjectDBInfo; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager; @@ -48,11 +51,9 @@ public VolumeEntityHandler( @Override public NamespaceSummaryResponse getSummaryResponse() throws IOException { - NamespaceSummaryResponse namespaceSummaryResponse = - new NamespaceSummaryResponse(EntityType.VOLUME); + String[] names = getNames(); List buckets = listBucketsUnderVolume(names[0]); - namespaceSummaryResponse.setNumBucket(buckets.size()); int totalDir = 0; long totalKey = 0L; @@ -63,10 +64,29 @@ public NamespaceSummaryResponse getSummaryResponse() totalKey += getTotalKeyCount(bucketObjectId); } - namespaceSummaryResponse.setNumTotalDir(totalDir); - namespaceSummaryResponse.setNumTotalKey(totalKey); + CountStats countStats = new CountStats( + -1, buckets.size(), totalDir, totalKey); + + return NamespaceSummaryResponse.newBuilder() + .setEntityType(EntityType.VOLUME) + .setCountStats(countStats) + .setObjectDBInfo(getVolumeObjDbInfo(names)) + .setStatus(ResponseStatus.OK) + .build(); + } - return namespaceSummaryResponse; + private VolumeObjectDBInfo getVolumeObjDbInfo(String[] names) + throws IOException { + String dbVolumeKey = getOmMetadataManager().getVolumeKey(names[0]); + if (null == dbVolumeKey) { + return new VolumeObjectDBInfo(); + } + OmVolumeArgs volumeArgs = + getOmMetadataManager().getVolumeTable().getSkipCache(dbVolumeKey); + if (null == volumeArgs) { + return new VolumeObjectDBInfo(); + } + return new VolumeObjectDBInfo(volumeArgs); } @Override diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/BucketObjectDBInfo.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/BucketObjectDBInfo.java new file mode 100644 index 000000000000..ef2ae7dddfe9 --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/BucketObjectDBInfo.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.api.types; + +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.hadoop.hdds.client.DefaultReplicationConfig; +import org.apache.hadoop.hdds.protocol.StorageType; +import org.apache.hadoop.ozone.om.helpers.BucketEncryptionKeyInfo; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; + + +/** + * Encapsulates the low level bucket info. + */ +public class BucketObjectDBInfo extends ObjectDBInfo { + @JsonProperty("volumeName") + private String volumeName; + + @JsonProperty("storageType") + private StorageType storageType; + + @JsonProperty("versioning") + private boolean isVersioningEnabled; + + @JsonProperty("usedBytes") + private String usedBytes; + + @JsonProperty("encryptionInfo") + private BucketEncryptionKeyInfo bekInfo; + + @JsonProperty("replicationConfigInfo") + private DefaultReplicationConfig defaultReplicationConfig; + + @JsonProperty("sourceVolume") + private String sourceVolume; + + @JsonProperty("sourceBucket") + private String sourceBucket; + + @JsonProperty("bucketLayout") + private BucketLayout bucketLayout; + + @JsonProperty("owner") + private String owner; + + public BucketObjectDBInfo() { + + } + + public BucketObjectDBInfo(OmBucketInfo omBucketInfo) { + super.setMetadata(omBucketInfo.getMetadata()); + super.setName(omBucketInfo.getBucketName()); + super.setQuotaInBytes(omBucketInfo.getQuotaInBytes()); + super.setQuotaInNamespace(omBucketInfo.getQuotaInNamespace()); + super.setUsedNamespace(omBucketInfo.getUsedNamespace()); + super.setCreationTime(omBucketInfo.getCreationTime()); + super.setModificationTime(omBucketInfo.getModificationTime()); + super.setAcls(omBucketInfo.getAcls()); + this.volumeName = omBucketInfo.getVolumeName(); + this.sourceBucket = omBucketInfo.getSourceBucket(); + this.sourceVolume = omBucketInfo.getSourceVolume(); + this.isVersioningEnabled = omBucketInfo.getIsVersionEnabled(); + this.storageType = omBucketInfo.getStorageType(); + this.defaultReplicationConfig = omBucketInfo.getDefaultReplicationConfig(); + this.bucketLayout = omBucketInfo.getBucketLayout(); + this.owner = omBucketInfo.getOwner(); + this.bekInfo = omBucketInfo.getEncryptionKeyInfo(); + } + + public String getVolumeName() { + return volumeName; + } + + public void setVolumeName(String volumeName) { + this.volumeName = volumeName; + } + + public StorageType getStorageType() { + return storageType; + } + + public void setStorageType(StorageType storageType) { + this.storageType = storageType; + } + + public String getUsedBytes() { + return usedBytes; + } + + public void setUsedBytes(String usedBytes) { + this.usedBytes = usedBytes; + } + + public BucketEncryptionKeyInfo getBekInfo() { + return bekInfo; + } + + public void setBekInfo(BucketEncryptionKeyInfo bekInfo) { + this.bekInfo = bekInfo; + } + + public DefaultReplicationConfig getDefaultReplicationConfig() { + return defaultReplicationConfig; + } + + public void setDefaultReplicationConfig( + DefaultReplicationConfig defaultReplicationConfig) { + this.defaultReplicationConfig = defaultReplicationConfig; + } + + public String getSourceVolume() { + return sourceVolume; + } + + public void setSourceVolume(String sourceVolume) { + this.sourceVolume = sourceVolume; + } + + public String getSourceBucket() { + return sourceBucket; + } + + public void setSourceBucket(String sourceBucket) { + this.sourceBucket = sourceBucket; + } + + public boolean isVersioningEnabled() { + return isVersioningEnabled; + } + + public void setVersioningEnabled(boolean versioningEnabled) { + isVersioningEnabled = versioningEnabled; + } + + public BucketLayout getBucketLayout() { + return bucketLayout; + } + + public void setBucketLayout(BucketLayout bucketLayout) { + this.bucketLayout = bucketLayout; + } + + public String getOwner() { + return owner; + } + + public void setOwner(String owner) { + this.owner = owner; + } + +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/ClusterStateResponse.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/ClusterStateResponse.java index b2b9692db787..7e674591a9a5 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/ClusterStateResponse.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/ClusterStateResponse.java @@ -60,6 +60,12 @@ public final class ClusterStateResponse { @JsonProperty("missingContainers") private int missingContainers; + /** + * Total count of open containers in the cluster. + */ + @JsonProperty("openContainers") + private int openContainers; + /** * Total count of volumes in the cluster. */ @@ -97,6 +103,7 @@ private ClusterStateResponse(Builder b) { this.storageReport = b.storageReport; this.containers = b.containers; this.missingContainers = b.missingContainers; + this.openContainers = b.openContainers; } /** @@ -110,6 +117,7 @@ public static final class Builder { private DatanodeStorageReport storageReport; private int containers; private int missingContainers; + private int openContainers; private long volumes; private long buckets; private long keys; @@ -118,6 +126,7 @@ public Builder() { // Default values this.containers = 0; this.missingContainers = 0; + this.openContainers = 0; this.volumes = 0; this.buckets = 0; this.keys = 0; @@ -156,6 +165,11 @@ public Builder setMissingContainers(int missingContainers) { return this; } + public Builder setOpenContainers(int openContainers) { + this.openContainers = openContainers; + return this; + } + public Builder setVolumes(long volumes) { this.volumes = volumes; return this; @@ -206,6 +220,10 @@ public int getMissingContainers() { return missingContainers; } + public int getOpenContainers() { + return openContainers; + } + public long getBuckets() { return buckets; } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/CountStats.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/CountStats.java new file mode 100644 index 000000000000..1d88831d807e --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/CountStats.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.api.types; + +import com.fasterxml.jackson.annotation.JsonProperty; + +/** Count stats which tells the number of + * volumes/buckets/dir/files etc. + * */ +public class CountStats { + /** Total number of volumes under root, -1 for other types. */ + @JsonProperty("numVolume") + private int numVolume; + + /** Total number of buckets for root/volume, -1 for other types. */ + @JsonProperty("numBucket") + private int numBucket; + + /** Total number of directories for all types except key, -1 for key. */ + @JsonProperty("numDir") + private int numTotalDir; + + /** Total number of keys. */ + @JsonProperty("numKey") + private long numTotalKey; + + public CountStats(int numVolume, int numBucket, + int numTotalDir, long numTotalKey) { + this.numVolume = numVolume; + this.numBucket = numBucket; + this.numTotalDir = numTotalDir; + this.numTotalKey = numTotalKey; + } + + public int getNumVolume() { + return numVolume; + } + + public int getNumBucket() { + return numBucket; + } + + public int getNumTotalDir() { + return numTotalDir; + } + + public long getNumTotalKey() { + return numTotalKey; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/KeyObjectDBInfo.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/KeyObjectDBInfo.java new file mode 100644 index 000000000000..f8cba210a598 --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/KeyObjectDBInfo.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.api.types; + +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; + +import java.util.List; + +/** + * Encapsulates the low level key info. + */ +public class KeyObjectDBInfo extends ObjectDBInfo { + /** volume name from om db. */ + @JsonProperty("volumeName") + private String volumeName; + @JsonProperty("bucketName") + private String bucketName; + @JsonProperty("keyName") + private String keyName; + @JsonProperty("dataSize") + private long dataSize; + @JsonProperty("keyLocationVersions") + private List keyLocationVersions; + @JsonProperty("replicationConfig") + private ReplicationConfig replicationConfig; + @JsonProperty("encInfo") + private FileEncryptionInfo encInfo; + + /** + * Support OFS use-case to identify if the key is a file or a directory. + */ + private boolean isFile; + + /** + * Represents leaf node name. This also will be used when the keyName is + * created on a FileSystemOptimized(FSO) bucket. For example, the user given + * keyName is "a/b/key1" then the fileName stores "key1". + */ + private String fileName; + + public KeyObjectDBInfo() { + + } + + public KeyObjectDBInfo(OmKeyInfo omKeyInfo) { + super.setName(omKeyInfo.getKeyName()); + super.setCreationTime(omKeyInfo.getCreationTime()); + super.setModificationTime(omKeyInfo.getModificationTime()); + super.setAcls(omKeyInfo.getAcls()); + super.setMetadata(omKeyInfo.getMetadata()); + this.setVolumeName(omKeyInfo.getVolumeName()); + this.setBucketName(omKeyInfo.getBucketName()); + this.setKeyName(omKeyInfo.getKeyName()); + this.setDataSize(omKeyInfo.getDataSize()); + this.setKeyLocationVersions(omKeyInfo.getKeyLocationVersions()); + this.setReplicationConfig(omKeyInfo.getReplicationConfig()); + this.setEncInfo(omKeyInfo.getFileEncryptionInfo()); + this.setFileName(omKeyInfo.getFileName()); + this.setFile(omKeyInfo.isFile()); + } + + public String getVolumeName() { + return volumeName; + } + + public void setVolumeName(String volumeName) { + this.volumeName = volumeName; + } + + public String getBucketName() { + return bucketName; + } + + public void setBucketName(String bucketName) { + this.bucketName = bucketName; + } + + public String getKeyName() { + return keyName; + } + + public void setKeyName(String keyName) { + this.keyName = keyName; + } + + public long getDataSize() { + return dataSize; + } + + public void setDataSize(long dataSize) { + this.dataSize = dataSize; + } + + public List getKeyLocationVersions() { + return keyLocationVersions; + } + + public void setKeyLocationVersions( + List keyLocationVersions) { + this.keyLocationVersions = keyLocationVersions; + } + + public ReplicationConfig getReplicationConfig() { + return replicationConfig; + } + + public void setReplicationConfig(ReplicationConfig replicationConfig) { + this.replicationConfig = replicationConfig; + } + + public boolean isFile() { + return isFile; + } + + public void setFile(boolean file) { + isFile = file; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public FileEncryptionInfo getEncInfo() { + return encInfo; + } + + public void setEncInfo(FileEncryptionInfo encInfo) { + this.encInfo = encInfo; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/NamespaceSummaryResponse.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/NamespaceSummaryResponse.java index 2a2d9f6de746..5ccfd9887314 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/NamespaceSummaryResponse.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/NamespaceSummaryResponse.java @@ -18,62 +18,67 @@ package org.apache.hadoop.ozone.recon.api.types; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.commons.lang3.StringUtils; /** * HTTP Response wrapped for a 'summary' request. */ public class NamespaceSummaryResponse { + /** Path for metadata summary. */ + @JsonProperty("path") + private String path; + /** The namespace the request path is on. */ @JsonProperty("type") private EntityType entityType; - /** Total number of volumes under root, -1 for other types. */ - @JsonProperty("numVolume") - private int numVolume; - - /** Total number of buckets for root/volume, -1 for other types. */ - @JsonProperty("numBucket") - private int numBucket; + /** Count stats which tells the number of volumes/buckets/dir/files etc. */ + @JsonProperty("countStats") + private CountStats countStats; - /** Total number of directories for all types except key, -1 for key. */ - @JsonProperty("numDir") - private int numTotalDir; - - /** Total number of keys. */ - @JsonProperty("numKey") - private long numTotalKey; + @JsonProperty("objectInfo") + private ObjectDBInfo objectDBInfo; /** Path Status. */ @JsonProperty("status") private ResponseStatus status; - public NamespaceSummaryResponse(EntityType entityType) { - this.entityType = entityType; - this.numVolume = -1; - this.numBucket = -1; - this.numTotalDir = -1; - this.numTotalKey = 0; - this.status = ResponseStatus.OK; + /** + * Returns new builder class that builds a NamespaceSummaryResponse. + * + * @return Builder + */ + public static NamespaceSummaryResponse.Builder newBuilder() { + return new NamespaceSummaryResponse.Builder(); } - public EntityType getEntityType() { - return this.entityType; + public NamespaceSummaryResponse(Builder b) { + this.path = b.path; + this.entityType = b.entityType; + this.countStats = b.countStats; + this.objectDBInfo = b.objectDBInfo; + this.status = b.status; } - public int getNumVolume() { - return this.numVolume; + public String getPath() { + return path; } - public int getNumBucket() { - return this.numBucket; + public void setPath(String path) { + this.path = path; } - public int getNumTotalDir() { - return this.numTotalDir; + public CountStats getCountStats() { + return countStats; } - public long getNumTotalKey() { - return this.numTotalKey; + public void setCountStats(CountStats countStats) { + this.countStats = countStats; + } + + public EntityType getEntityType() { + return this.entityType; } public ResponseStatus getStatus() { @@ -84,23 +89,72 @@ public void setEntityType(EntityType entityType) { this.entityType = entityType; } - public void setNumVolume(int numVolume) { - this.numVolume = numVolume; + public void setStatus(ResponseStatus status) { + this.status = status; } - public void setNumBucket(int numBucket) { - this.numBucket = numBucket; + public ObjectDBInfo getObjectDBInfo() { + return objectDBInfo; } - public void setNumTotalDir(int numTotalDir) { - this.numTotalDir = numTotalDir; + public void setObjectDBInfo(ObjectDBInfo objectDBInfo) { + this.objectDBInfo = objectDBInfo; } - public void setNumTotalKey(long numTotalKey) { - this.numTotalKey = numTotalKey; + /** + * Builder for NamespaceSummaryResponse. + */ + @SuppressWarnings("checkstyle:hiddenfield") + public static final class Builder { + private String path; + private EntityType entityType; + private CountStats countStats; + private ObjectDBInfo objectDBInfo; + private ResponseStatus status; + + + public Builder() { + // Default values + this.path = StringUtils.EMPTY; + this.entityType = EntityType.ROOT; + } + + public NamespaceSummaryResponse.Builder setPath(String path) { + this.path = path; + return this; + } + + public NamespaceSummaryResponse.Builder setEntityType( + EntityType entityType) { + this.entityType = entityType; + return this; + } + + public NamespaceSummaryResponse.Builder setCountStats( + CountStats countStats) { + this.countStats = countStats; + return this; + } + + public NamespaceSummaryResponse.Builder setObjectDBInfo( + ObjectDBInfo objectDBInfo) { + this.objectDBInfo = objectDBInfo; + return this; + } + + public NamespaceSummaryResponse.Builder setStatus( + ResponseStatus status) { + this.status = status; + return this; + } + + public NamespaceSummaryResponse build() { + Preconditions.checkNotNull(this.path); + Preconditions.checkNotNull(this.entityType); + Preconditions.checkNotNull(this.status); + + return new NamespaceSummaryResponse(this); + } } - public void setStatus(ResponseStatus status) { - this.status = status; - } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/ObjectDBInfo.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/ObjectDBInfo.java new file mode 100644 index 000000000000..acd034a4869d --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/ObjectDBInfo.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.api.types; + +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.hadoop.ozone.OzoneAcl; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmPrefixInfo; + +import java.util.List; +import java.util.Map; + +/** + * Encapsulates the low level DB info common to volume or bucket or dir. + */ +public class ObjectDBInfo { + @JsonProperty("metadata") + private Map metadata; + + @JsonProperty("name") + private String name; + + @JsonProperty("quotaInBytes") + private long quotaInBytes; + + @JsonProperty("quotaInNamespace") + private long quotaInNamespace; + + @JsonProperty("usedNamespace") + private long usedNamespace; + + @JsonProperty("creationTime") + private long creationTime; + + @JsonProperty("modificationTime") + private long modificationTime; + + @JsonProperty("acls") + private List acls; + + public ObjectDBInfo() { + + } + + public ObjectDBInfo(OmDirectoryInfo omDirectoryInfo) { + this.setName(omDirectoryInfo.getName()); + this.setCreationTime(omDirectoryInfo.getCreationTime()); + this.setModificationTime(omDirectoryInfo.getModificationTime()); + this.setAcls(omDirectoryInfo.getAcls()); + this.setMetadata(omDirectoryInfo.getMetadata()); + } + + public ObjectDBInfo(OmPrefixInfo omPrefixInfo) { + this.setName(omPrefixInfo.getName()); + this.setAcls(omPrefixInfo.getAcls()); + this.setMetadata(omPrefixInfo.getMetadata()); + } + + + public Map getMetadata() { + return metadata; + } + + public void setMetadata(Map metadata) { + this.metadata = metadata; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public long getQuotaInBytes() { + return quotaInBytes; + } + + public void setQuotaInBytes(long quotaInBytes) { + this.quotaInBytes = quotaInBytes; + } + + public long getQuotaInNamespace() { + return quotaInNamespace; + } + + public void setQuotaInNamespace(long quotaInNamespace) { + this.quotaInNamespace = quotaInNamespace; + } + + public long getUsedNamespace() { + return usedNamespace; + } + + public void setUsedNamespace(long usedNamespace) { + this.usedNamespace = usedNamespace; + } + + public long getCreationTime() { + return creationTime; + } + + public void setCreationTime(long creationTime) { + this.creationTime = creationTime; + } + + public long getModificationTime() { + return modificationTime; + } + + public void setModificationTime(long modificationTime) { + this.modificationTime = modificationTime; + } + + public List getAcls() { + return acls; + } + + public void setAcls(List acls) { + this.acls = acls; + } + +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/VolumeObjectDBInfo.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/VolumeObjectDBInfo.java new file mode 100644 index 000000000000..7bb759ca4910 --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/VolumeObjectDBInfo.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.api.types; + +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; + +/** + * Encapsulates the low level volume info. + */ +public class VolumeObjectDBInfo extends ObjectDBInfo { + @JsonProperty("admin") + private String admin; + + @JsonProperty("owner") + private String owner; + + @JsonProperty("volume") + private String volume; + + public VolumeObjectDBInfo() { + + } + + public VolumeObjectDBInfo(OmVolumeArgs omVolumeArgs) { + super.setMetadata(omVolumeArgs.getMetadata()); + super.setName(omVolumeArgs.getVolume()); + super.setQuotaInBytes(omVolumeArgs.getQuotaInBytes()); + super.setQuotaInNamespace(omVolumeArgs.getQuotaInNamespace()); + super.setUsedNamespace(omVolumeArgs.getUsedNamespace()); + super.setCreationTime(omVolumeArgs.getCreationTime()); + super.setModificationTime(omVolumeArgs.getModificationTime()); + super.setAcls(omVolumeArgs.getAcls()); + this.setAdmin(omVolumeArgs.getAdminName()); + this.setOwner(omVolumeArgs.getOwnerName()); + this.setVolume(omVolumeArgs.getVolume()); + } + + public String getAdmin() { + return admin; + } + + public void setAdmin(String admin) { + this.admin = admin; + } + + public String getOwner() { + return owner; + } + + public void setOwner(String owner) { + this.owner = owner; + } + + public String getVolume() { + return volume; + } + + public void setVolume(String volume) { + this.volume = volume; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ContainerHealthTask.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ContainerHealthTask.java index 4badac672e7e..606eb0a06255 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ContainerHealthTask.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ContainerHealthTask.java @@ -24,6 +24,8 @@ import java.util.List; import java.util.Set; import java.util.concurrent.TimeoutException; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.PlacementPolicy; @@ -47,6 +49,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; + /** * Class that scans the list of containers and keeps track of containers with * no replicas in a SQL table. @@ -56,11 +59,14 @@ public class ContainerHealthTask extends ReconScmTask { private static final Logger LOG = LoggerFactory.getLogger(ContainerHealthTask.class); + private ReadWriteLock lock = new ReentrantReadWriteLock(true); + private StorageContainerServiceProvider scmClient; private ContainerManager containerManager; private ContainerHealthSchemaManager containerHealthSchemaManager; private PlacementPolicy placementPolicy; private final long interval; + private Set processedContainers = new HashSet<>(); public ContainerHealthTask( @@ -79,26 +85,11 @@ public ContainerHealthTask( } @Override - public synchronized void run() { + public void run() { try { while (canRun()) { - wait(interval); - long start = Time.monotonicNow(); - long currentTime = System.currentTimeMillis(); - long existingCount = processExistingDBRecords(currentTime); - LOG.info("Container Health task thread took {} milliseconds to" + - " process {} existing database records.", - Time.monotonicNow() - start, existingCount); - start = Time.monotonicNow(); - final List containers = containerManager.getContainers(); - containers.stream() - .filter(c -> !processedContainers.contains(c)) - .forEach(c -> processContainer(c, currentTime)); - recordSingleRunCompletion(); - LOG.info("Container Health task thread took {} milliseconds for" + - " processing {} containers.", Time.monotonicNow() - start, - containers.size()); - processedContainers.clear(); + triggerContainerHealthCheck(); + Thread.sleep(interval); } } catch (Throwable t) { LOG.error("Exception in Missing Container task Thread.", t); @@ -108,6 +99,30 @@ public synchronized void run() { } } + public void triggerContainerHealthCheck() { + lock.writeLock().lock(); + try { + long start = Time.monotonicNow(); + long currentTime = System.currentTimeMillis(); + long existingCount = processExistingDBRecords(currentTime); + LOG.info("Container Health task thread took {} milliseconds to" + + " process {} existing database records.", + Time.monotonicNow() - start, existingCount); + start = Time.monotonicNow(); + final List containers = containerManager.getContainers(); + containers.stream() + .filter(c -> !processedContainers.contains(c)) + .forEach(c -> processContainer(c, currentTime)); + recordSingleRunCompletion(); + LOG.info("Container Health task thread took {} milliseconds for" + + " processing {} containers.", Time.monotonicNow() - start, + containers.size()); + processedContainers.clear(); + } finally { + lock.writeLock().unlock(); + } + } + private ContainerHealthStatus setCurrentContainer(long recordId) throws ContainerNotFoundException { ContainerInfo container = @@ -159,7 +174,8 @@ private long processExistingDBRecords(long currentTime) { currentContainer = setCurrentContainer(rec.getContainerId()); } if (ContainerHealthRecords - .retainOrUpdateRecord(currentContainer, rec)) { + .retainOrUpdateRecord(currentContainer, rec + )) { // Check if the missing container is deleted in SCM if (currentContainer.isMissing() && containerDeletedInSCM(currentContainer.getContainer())) { @@ -252,10 +268,13 @@ public static class ContainerHealthRecords { * If the record is to be retained, the fields in the record for actual * replica count, delta and reason will be updated if their counts have * changed. - * @param container ContainerHealthStatus representing the health state of - * the container. - * @param rec Existing database record from the UnhealthyContainers table. - * @return + * + * @param container ContainerHealthStatus representing the + * health state of the container. + * @param rec Existing database record from the + * UnhealthyContainers table. + * @return returns true or false if need to retain or update the unhealthy + * container record */ public static boolean retainOrUpdateRecord( ContainerHealthStatus container, UnhealthyContainersRecord rec) { @@ -329,6 +348,7 @@ public static List generateUnhealthyRecords( records.add(recordForState( container, UnHealthyContainerStates.MIS_REPLICATED, time)); } + return records; } @@ -421,5 +441,4 @@ private static void updateReason( } } } - } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java new file mode 100644 index 000000000000..ca9664f8010c --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.fsck; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerManager; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.ozone.recon.scm.ReconNodeManager; +import org.apache.hadoop.ozone.recon.scm.ReconSafeModeManager; +import org.apache.hadoop.ozone.recon.tasks.ReconTaskConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL_DEFAULT; + +/** + * Class that scans the list of containers and keeps track if + * recon warm up completed, and it exits safe mode. + */ +public class ReconSafeModeMgrTask { + + private static final Logger LOG = + LoggerFactory.getLogger(ReconSafeModeMgrTask.class); + + private ContainerManager containerManager; + private ReconNodeManager nodeManager; + private ReconSafeModeManager safeModeManager; + private List allNodes; + private List containers; + private OzoneConfiguration ozoneConfiguration; + private final long interval; + private final long dnHBInterval; + + public ReconSafeModeMgrTask( + ContainerManager containerManager, + ReconNodeManager nodeManager, + ReconSafeModeManager safeModeManager, + ReconTaskConfig reconTaskConfig, + OzoneConfiguration ozoneConfiguration) { + this.safeModeManager = safeModeManager; + this.containerManager = containerManager; + this.nodeManager = nodeManager; + this.allNodes = nodeManager.getAllNodes(); + this.containers = containerManager.getContainers(); + this.ozoneConfiguration = ozoneConfiguration; + interval = reconTaskConfig.getSafeModeWaitThreshold().toMillis(); + dnHBInterval = ozoneConfiguration.getTimeDuration(HDDS_HEARTBEAT_INTERVAL, + HDDS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + } + + public synchronized void start() { + long timeElapsed = 0L; + try { + tryReconExitSafeMode(); + while (safeModeManager.getInSafeMode() && timeElapsed <= interval) { + wait(dnHBInterval); + timeElapsed += dnHBInterval; + allNodes = nodeManager.getAllNodes(); + containers = containerManager.getContainers(); + tryReconExitSafeMode(); + } + // Exceeded safe mode grace period. Exit safe mode + if (safeModeManager.getInSafeMode()) { + safeModeManager.setInSafeMode(false); + } + } catch (Throwable t) { + LOG.error("Exception in Missing Container task Thread.", t); + if (t instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + } + } + + private void tryReconExitSafeMode() + throws InterruptedException { + // Recon starting first time + if (null == allNodes || allNodes.size() == 0) { + return; + } + if (null == containers || containers.size() == 0) { + return; + } + final Set currentContainersInAllDatanodes = + new HashSet<>(containers.size()); + allNodes.forEach(node -> { + try { + currentContainersInAllDatanodes.addAll( + nodeManager.getContainers(node)); + } catch (NodeNotFoundException e) { + LOG.error("{} node not found.", node.getUuid()); + } + }); + if (containers.size() == currentContainersInAllDatanodes.size()) { + safeModeManager.setInSafeMode(false); + } + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/PipelineSyncTask.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/PipelineSyncTask.java index 43bad418b784..b802efcf48de 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/PipelineSyncTask.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/PipelineSyncTask.java @@ -22,6 +22,9 @@ import java.io.IOException; import java.util.List; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.stream.Collectors; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -49,6 +52,8 @@ public class PipelineSyncTask extends ReconScmTask { private StorageContainerServiceProvider scmClient; private ReconPipelineManager reconPipelineManager; private ReconNodeManager nodeManager; + + private ReadWriteLock lock = new ReentrantReadWriteLock(true); private final long interval; public PipelineSyncTask(ReconPipelineManager pipelineManager, @@ -64,17 +69,11 @@ public PipelineSyncTask(ReconPipelineManager pipelineManager, } @Override - protected synchronized void run() { + public void run() { try { while (canRun()) { - long start = Time.monotonicNow(); - List pipelinesFromScm = scmClient.getPipelines(); - reconPipelineManager.initializePipelines(pipelinesFromScm); - syncOperationalStateOnDeadNodes(); - LOG.info("Pipeline sync Thread took {} milliseconds.", - Time.monotonicNow() - start); - recordSingleRunCompletion(); - wait(interval); + triggerPipelineSyncTask(); + Thread.sleep(interval); } } catch (Throwable t) { LOG.error("Exception in Pipeline sync Thread.", t); @@ -84,6 +83,22 @@ protected synchronized void run() { } } + public void triggerPipelineSyncTask() + throws IOException, TimeoutException, NodeNotFoundException { + lock.writeLock().lock(); + try { + long start = Time.monotonicNow(); + List pipelinesFromScm = scmClient.getPipelines(); + reconPipelineManager.initializePipelines(pipelinesFromScm); + syncOperationalStateOnDeadNodes(); + LOG.info("Pipeline sync Thread took {} milliseconds.", + Time.monotonicNow() - start); + recordSingleRunCompletion(); + } finally { + lock.writeLock().unlock(); + } + } + /** * For every dead node in Recon, update Operational state with that on SCM * if different. diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDatanodeProtocolServer.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDatanodeProtocolServer.java index 2f14806f9ba7..019e09790cf1 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDatanodeProtocolServer.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDatanodeProtocolServer.java @@ -46,7 +46,7 @@ public ReconDatanodeProtocolServer(OzoneConfiguration conf, OzoneStorageContainerManager scm, EventPublisher eventPublisher) throws IOException { - super(conf, scm, eventPublisher); + super(conf, scm, eventPublisher, null); } @Override diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDeadNodeHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDeadNodeHandler.java index bc1b43a8a4bb..b64244a124ce 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDeadNodeHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDeadNodeHandler.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.ozone.recon.fsck.ContainerHealthTask; import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,15 +41,20 @@ public class ReconDeadNodeHandler extends DeadNodeHandler { private static final Logger LOG = LoggerFactory.getLogger(ReconDeadNodeHandler.class); - private StorageContainerServiceProvider scmClient; + private ContainerHealthTask containerHealthTask; + private PipelineSyncTask pipelineSyncTask; public ReconDeadNodeHandler(NodeManager nodeManager, PipelineManager pipelineManager, ContainerManager containerManager, - StorageContainerServiceProvider scmClient) { + StorageContainerServiceProvider scmClient, + ContainerHealthTask containerHealthTask, + PipelineSyncTask pipelineSyncTask) { super(nodeManager, pipelineManager, containerManager); this.scmClient = scmClient; + this.containerHealthTask = containerHealthTask; + this.pipelineSyncTask = pipelineSyncTask; } @Override @@ -71,6 +77,8 @@ public void onMessage(final DatanodeDetails datanodeDetails, LOG.warn("Node {} has reached DEAD state, but SCM does not have " + "information about it.", datanodeDetails); } + containerHealthTask.triggerContainerHealthCheck(); + pipelineSyncTask.triggerPipelineSyncTask(); } catch (Exception ioEx) { LOG.error("Error trying to verify Node operational state from SCM.", ioEx); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconPipelineManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconPipelineManager.java index 41559d130a31..37ee6580e87f 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconPipelineManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconPipelineManager.java @@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.recon.scm; import java.io.IOException; +import java.time.Clock; import java.time.ZoneOffset; import java.util.List; import java.util.concurrent.TimeoutException; @@ -41,7 +42,6 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,7 +64,7 @@ private ReconPipelineManager(ConfigurationSource conf, SCMContext scmContext) { super(conf, scmhaManager, nodeManager, pipelineStateManager, pipelineFactory, eventPublisher, scmContext, - new MonotonicClock(ZoneOffset.UTC)); + Clock.system(ZoneOffset.UTC)); } public static ReconPipelineManager newReconPipelineManager( diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconSafeModeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconSafeModeManager.java index 8612cb94534b..c056f60cf844 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconSafeModeManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconSafeModeManager.java @@ -20,14 +20,24 @@ import org.apache.hadoop.hdds.scm.safemode.SafeModeManager; +import java.util.concurrent.atomic.AtomicBoolean; + /** * Recon's stub implementation of SCM's SafeMode manager. */ public class ReconSafeModeManager implements SafeModeManager { + private AtomicBoolean inSafeMode = new AtomicBoolean(true); @Override public boolean getInSafeMode() { - return false; + return this.inSafeMode.get(); + } + + /** + * Set safe mode status. + */ + public void setInSafeMode(boolean inSafeMode) { + this.inSafeMode.set(inSafeMode); } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStaleNodeHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStaleNodeHandler.java new file mode 100644 index 000000000000..998f06392492 --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStaleNodeHandler.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.scm; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.StaleNodeHandler; +import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Recon's handling of Stale node. + */ +public class ReconStaleNodeHandler extends StaleNodeHandler { + + private static final Logger LOG = + LoggerFactory.getLogger(ReconStaleNodeHandler.class); + private PipelineSyncTask pipelineSyncTask; + + public ReconStaleNodeHandler(NodeManager nodeManager, + PipelineManager pipelineManager, + OzoneConfiguration conf, + PipelineSyncTask pipelineSyncTask) { + super(nodeManager, pipelineManager, conf); + this.pipelineSyncTask = pipelineSyncTask; + } + + @Override + public void onMessage(final DatanodeDetails datanodeDetails, + final EventPublisher publisher) { + super.onMessage(datanodeDetails, publisher); + try { + pipelineSyncTask.triggerPipelineSyncTask(); + } catch (Exception exp) { + LOG.error("Error trying to trigger pipeline sync task..", + exp); + } + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index 51499a0d6c9f..d0777efa39f0 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; +import java.time.Clock; import java.time.ZoneId; import java.util.HashSet; import java.util.List; @@ -29,20 +30,28 @@ import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmUtils; import org.apache.hadoop.hdds.scm.block.BlockManager; import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler; import org.apache.hadoop.hdds.scm.container.ContainerActionsHandler; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerReportHandler; import org.apache.hadoop.hdds.scm.container.IncrementalContainerReportHandler; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaPendingOps; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; import org.apache.hadoop.hdds.scm.container.balancer.ContainerBalancer; @@ -64,7 +73,6 @@ import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineActionHandler; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; -import org.apache.hadoop.hdds.scm.safemode.SafeModeManager; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.server.events.EventQueue; @@ -79,10 +87,10 @@ import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.hadoop.ozone.recon.ReconServerConfigKeys; import org.apache.hadoop.ozone.recon.ReconUtils; import org.apache.hadoop.ozone.recon.fsck.ContainerHealthTask; +import org.apache.hadoop.ozone.recon.fsck.ReconSafeModeMgrTask; import org.apache.hadoop.ozone.recon.persistence.ContainerHealthSchemaManager; import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; @@ -93,6 +101,11 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_REPORT_QUEUE_WAIT_THRESHOLD_DEFAULT; import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.buildRpcServerStartMessage; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DELAY; + import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.ContainerReport; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.ContainerReportFromDatanode; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.IncrementalContainerReportFromDatanode; @@ -112,6 +125,7 @@ public class ReconStorageContainerManagerFacade private static final Logger LOG = LoggerFactory .getLogger(ReconStorageContainerManagerFacade.class); + public static final long CONTAINER_METADATA_SIZE = 1 * 1024 * 1024L; private final OzoneConfiguration ozoneConfiguration; private final ReconDatanodeProtocolServer datanodeProtocolServer; @@ -132,6 +146,12 @@ public class ReconStorageContainerManagerFacade private SCMContainerPlacementMetrics placementMetrics; private PlacementPolicy containerPlacementPolicy; private HDDSLayoutVersionManager scmLayoutVersionManager; + private ReconSafeModeManager safeModeManager; + private ReconSafeModeMgrTask reconSafeModeMgrTask; + + private ScheduledExecutorService scheduler; + + private AtomicBoolean isSyncDataFromSCMRunning; @Inject public ReconStorageContainerManagerFacade(OzoneConfiguration conf, @@ -139,7 +159,8 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, ReconTaskStatusDao reconTaskStatusDao, ContainerHealthSchemaManager containerHealthSchemaManager, ReconContainerMetadataManager reconContainerMetadataManager, - ReconUtils reconUtils) throws IOException { + ReconUtils reconUtils, + ReconSafeModeManager safeModeManager) throws IOException { reconNodeDetails = getReconNodeDetails(conf); this.eventQueue = new EventQueue(); eventQueue.setSilent(true); @@ -177,7 +198,7 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, scmhaManager, scmContext); ContainerReplicaPendingOps pendingOps = new ContainerReplicaPendingOps( - conf, new MonotonicClock(ZoneId.systemDefault())); + conf, Clock.system(ZoneId.systemDefault())); this.containerManager = new ReconContainerManager(conf, dbStore, ReconSCMDBDefinition.CONTAINERS.getTable(dbStore), @@ -185,11 +206,12 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, containerHealthSchemaManager, reconContainerMetadataManager, scmhaManager, sequenceIdGen, pendingOps); this.scmServiceProvider = scmServiceProvider; + this.isSyncDataFromSCMRunning = new AtomicBoolean(); NodeReportHandler nodeReportHandler = new NodeReportHandler(nodeManager); - SafeModeManager safeModeManager = new ReconSafeModeManager(); + this.safeModeManager = safeModeManager; ReconPipelineReportHandler pipelineReportHandler = new ReconPipelineReportHandler(safeModeManager, pipelineManager, scmContext, conf, scmServiceProvider); @@ -197,10 +219,26 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, PipelineActionHandler pipelineActionHandler = new PipelineActionHandler(pipelineManager, scmContext, conf); + ReconTaskConfig reconTaskConfig = conf.getObject(ReconTaskConfig.class); + PipelineSyncTask pipelineSyncTask = new PipelineSyncTask( + pipelineManager, + nodeManager, + scmServiceProvider, + reconTaskStatusDao, + reconTaskConfig); + ContainerHealthTask containerHealthTask = new ContainerHealthTask( + containerManager, + scmServiceProvider, + reconTaskStatusDao, containerHealthSchemaManager, + containerPlacementPolicy, + reconTaskConfig); + StaleNodeHandler staleNodeHandler = - new StaleNodeHandler(nodeManager, pipelineManager, conf); + new ReconStaleNodeHandler(nodeManager, pipelineManager, + conf, pipelineSyncTask); DeadNodeHandler deadNodeHandler = new ReconDeadNodeHandler(nodeManager, - pipelineManager, containerManager, scmServiceProvider); + pipelineManager, containerManager, + scmServiceProvider, containerHealthTask, pipelineSyncTask); ContainerReportHandler containerReportHandler = new ReconContainerReportHandler(nodeManager, containerManager); @@ -213,7 +251,6 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, pipelineManager, containerManager, scmContext); ContainerActionsHandler actionsHandler = new ContainerActionsHandler(); ReconNewNodeHandler newNodeHandler = new ReconNewNodeHandler(nodeManager); - // Use the same executor for both ICR and FCR. // The Executor maps the event to a thread for DN. // Dispatcher should always dispatch FCR first followed by ICR @@ -267,20 +304,11 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, eventQueue.addHandler(SCMEvents.CONTAINER_ACTIONS, actionsHandler); eventQueue.addHandler(SCMEvents.CLOSE_CONTAINER, closeContainerHandler); eventQueue.addHandler(SCMEvents.NEW_NODE, newNodeHandler); - - ReconTaskConfig reconTaskConfig = conf.getObject(ReconTaskConfig.class); - reconScmTasks.add(new PipelineSyncTask( - pipelineManager, - nodeManager, - scmServiceProvider, - reconTaskStatusDao, - reconTaskConfig)); - reconScmTasks.add(new ContainerHealthTask( - containerManager, - scmServiceProvider, - reconTaskStatusDao, containerHealthSchemaManager, - containerPlacementPolicy, - reconTaskConfig)); + reconScmTasks.add(pipelineSyncTask); + reconScmTasks.add(containerHealthTask); + reconSafeModeMgrTask = new ReconSafeModeMgrTask( + containerManager, nodeManager, safeModeManager, + reconTaskConfig, ozoneConfiguration); } /** @@ -321,6 +349,7 @@ public void start() { "Recon ScmDatanodeProtocol RPC server", getDatanodeProtocolServer().getDatanodeRpcAddress())); } + scheduler = Executors.newScheduledThreadPool(1); boolean isSCMSnapshotEnabled = ozoneConfiguration.getBoolean( ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_ENABLED, ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_ENABLED_DEFAULT); @@ -330,8 +359,40 @@ public void start() { } else { initializePipelinesFromScm(); } + LOG.debug("Started the SCM Container Info sync scheduler."); + long interval = ozoneConfiguration.getTimeDuration( + OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DELAY, + OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + long initialDelay = ozoneConfiguration.getTimeDuration( + OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY, + OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY_DEFAULT, + TimeUnit.MILLISECONDS); + // This periodic sync with SCM container cache is needed because during + // the window when recon will be down and any container being added + // newly and went missing, that container will not be reported as missing by + // recon till there is a difference of container count equivalent to + // threshold value defined in "ozone.recon.scm.container.threshold" + // between SCM container cache and recon container cache. + scheduler.scheduleWithFixedDelay(() -> { + try { + boolean isSuccess = syncWithSCMContainerInfo(); + if (!isSuccess) { + LOG.debug("SCM container info sync is already running."); + } + } catch (Throwable t) { + LOG.error("Unexpected exception while syncing data from SCM.", t); + } finally { + isSyncDataFromSCMRunning.compareAndSet(true, false); + } + }, + initialDelay, + interval, + TimeUnit.MILLISECONDS); getDatanodeProtocolServer().start(); - this.reconScmTasks.forEach(ReconScmTask::start); + reconSafeModeMgrTask.start(); + if (!this.safeModeManager.getInSafeMode()) { + this.reconScmTasks.forEach(ReconScmTask::start); + } } /** @@ -411,22 +472,102 @@ private void initializeSCMDB() { } } catch (IOException e) { LOG.error("Exception encountered while getting SCM DB."); + } finally { + isSyncDataFromSCMRunning.compareAndSet(true, false); } } public void updateReconSCMDBWithNewSnapshot() throws IOException { - DBCheckpoint dbSnapshot = scmServiceProvider.getSCMDBSnapshot(); - if (dbSnapshot != null && dbSnapshot.getCheckpointLocation() != null) { - LOG.info("Got new checkpoint from SCM : " + - dbSnapshot.getCheckpointLocation()); + if (isSyncDataFromSCMRunning.compareAndSet(false, true)) { + DBCheckpoint dbSnapshot = scmServiceProvider.getSCMDBSnapshot(); + if (dbSnapshot != null && dbSnapshot.getCheckpointLocation() != null) { + LOG.info("Got new checkpoint from SCM : " + + dbSnapshot.getCheckpointLocation()); + try { + initializeNewRdbStore(dbSnapshot.getCheckpointLocation().toFile()); + } catch (IOException e) { + LOG.error("Unable to refresh Recon SCM DB Snapshot. ", e); + } + } else { + LOG.error("Null snapshot location got from SCM."); + } + } else { + LOG.warn("SCM DB sync is already running."); + } + } + + public boolean syncWithSCMContainerInfo() + throws IOException { + if (isSyncDataFromSCMRunning.compareAndSet(false, true)) { try { - initializeNewRdbStore(dbSnapshot.getCheckpointLocation().toFile()); + List containers = containerManager.getContainers(); + + long totalContainerCount = scmServiceProvider.getContainerCount( + HddsProtos.LifeCycleState.CLOSED); + long containerCountPerCall = + getContainerCountPerCall(totalContainerCount); + long startContainerId = 1; + long retrievedContainerCount = 0; + if (totalContainerCount > 0) { + while (retrievedContainerCount < totalContainerCount) { + List listOfContainers = scmServiceProvider. + getListOfContainers(startContainerId, + Long.valueOf(containerCountPerCall).intValue(), + HddsProtos.LifeCycleState.CLOSED); + if (null != listOfContainers && listOfContainers.size() > 0) { + LOG.info("Got list of containers from SCM : " + + listOfContainers.size()); + listOfContainers.forEach(containerInfo -> { + long containerID = containerInfo.getContainerID(); + boolean isContainerPresentAtRecon = + containers.contains(containerInfo); + if (!isContainerPresentAtRecon) { + try { + ContainerWithPipeline containerWithPipeline = + scmServiceProvider.getContainerWithPipeline( + containerID); + containerManager.addNewContainer(containerWithPipeline); + } catch (IOException e) { + LOG.error("Could not get container with pipeline " + + "for container : {}", containerID); + } catch (TimeoutException e) { + LOG.error("Could not add new container {} in Recon " + + "container manager cache.", containerID); + } + } + }); + startContainerId = listOfContainers.get( + listOfContainers.size() - 1).getContainerID() + 1; + } else { + LOG.info("No containers found at SCM in CLOSED state"); + return false; + } + retrievedContainerCount += containerCountPerCall; + } + } } catch (IOException e) { LOG.error("Unable to refresh Recon SCM DB Snapshot. ", e); + return false; } } else { - LOG.error("Null snapshot location got from SCM."); + LOG.debug("SCM DB sync is already running."); + return false; } + return true; + } + + private long getContainerCountPerCall(long totalContainerCount) { + // Assumption of size of 1 container info object here is 1 MB + long containersMetaDataTotalRpcRespSizeMB = + CONTAINER_METADATA_SIZE * totalContainerCount; + long hadoopRPCSize = ozoneConfiguration.getInt( + CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, + CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH_DEFAULT); + long containerCountPerCall = containersMetaDataTotalRpcRespSizeMB <= + hadoopRPCSize ? totalContainerCount : + Math.round(Math.floor( + hadoopRPCSize / (double) CONTAINER_METADATA_SIZE)); + return containerCountPerCall; } private void deleteOldSCMDB() throws IOException { diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/StorageContainerServiceProvider.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/StorageContainerServiceProvider.java index cf57937a15e2..2ee3f6bbd7ad 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/StorageContainerServiceProvider.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/StorageContainerServiceProvider.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; @@ -78,4 +79,24 @@ List getExistContainerWithPipelinesInBatch( * @return DBCheckpoint from SCM. */ DBCheckpoint getSCMDBSnapshot(); + + /** + * Get the list of containers from SCM. This is a RPC call. + * + * @param startContainerID the start container id + * @param count the number of containers to return + * @param state the containers in given state to be returned + * @return the list of containers from SCM in a given state + * @throws IOException + */ + List getListOfContainers(long startContainerID, + int count, + HddsProtos.LifeCycleState state) + throws IOException; + + /** + * Requests SCM for container count for a given state. + * @return Total number of containers in SCM. + */ + long getContainerCount(HddsProtos.LifeCycleState state) throws IOException; } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/StorageContainerServiceProviderImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/StorageContainerServiceProviderImpl.java index 1550a89cd422..bb94c6d29842 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/StorageContainerServiceProviderImpl.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/StorageContainerServiceProviderImpl.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.ha.InterSCMGrpcClient; import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; @@ -155,6 +156,12 @@ public long getContainerCount() throws IOException { return scmClient.getContainerCount(); } + @Override + public long getContainerCount(HddsProtos.LifeCycleState state) + throws IOException { + return scmClient.getContainerCount(state); + } + public String getScmDBSnapshotUrl() { return scmDBSnapshotUrl; } @@ -194,7 +201,8 @@ connectionFactory, getScmDBSnapshotUrl(), try (SCMSnapshotDownloader downloadClient = new InterSCMGrpcClient( hostAddress, grpcPort, configuration, new ReconCertificateClient(new SecurityConfig(configuration), - reconStorage.getReconCertSerialId()))) { + reconStorage.getReconCertSerialId(), + reconStorage.getClusterID(), reconStorage.getReconId()))) { downloadClient.download(targetFile.toPath()).get(); } catch (ExecutionException | InterruptedException e) { LOG.error("Rocks DB checkpoint downloading failed", e); @@ -215,4 +223,12 @@ connectionFactory, getScmDBSnapshotUrl(), } return null; } + + @Override + public List getListOfContainers( + long startContainerID, int count, HddsProtos.LifeCycleState state) + throws IOException { + return scmClient.getListOfContainers(startContainerID, count, state); + } + } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ReconTaskConfig.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ReconTaskConfig.java index 9788bf67e8a6..6afffeb81209 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ReconTaskConfig.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ReconTaskConfig.java @@ -66,4 +66,21 @@ public void setMissingContainerTaskInterval(Duration interval) { this.missingContainerTaskInterval = interval.toMillis(); } + @Config(key = "safemode.wait.threshold", + type = ConfigType.TIME, + defaultValue = "300s", + tags = { ConfigTag.RECON, ConfigTag.OZONE }, + description = "The time interval to wait for starting container " + + "health task and pipeline sync task before recon " + + "exits out of safe or warmup mode. " + ) + private long safeModeWaitThreshold = Duration.ofMinutes(5).toMillis(); + + public Duration getSafeModeWaitThreshold() { + return Duration.ofMillis(safeModeWaitThreshold); + } + + public void setSafeModeWaitThreshold(Duration safeModeWaitThreshold) { + this.safeModeWaitThreshold = safeModeWaitThreshold.toMillis(); + } } diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/NOTICE b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/NOTICE deleted file mode 100644 index 03b67504685c..000000000000 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Apache Ozone Recon -Copyright 2019 and onwards The Apache Software Foundation. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json index 8fe3baf976e3..8e5b3ccfbd33 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/api/db.json @@ -10,6 +10,7 @@ }, "containers": 3230, "missingContainers": 1002, + "openContainers": 5, "volumes": 5, "buckets": 156, "keys": 253000 @@ -18,7 +19,7 @@ "totalCount": 12, "datanodes": [ { - "hostname": "localhost1.storage.enterprise.com", + "hostname": "localhost2.storage.enterprise.com", "uuid": "b590734e-a5f2-11ea-bb37-0242ac130002", "state": "HEALTHY", "opState": "IN_SERVICE", @@ -51,7 +52,7 @@ "buildDate": "2020-07-20T15:45Z" }, { - "hostname": "localhost2.storage.enterprise.com", + "hostname": "localhost1.storage.enterprise.com", "uuid": "b5907812-a5f2-11ea-bb37-0242ac130002", "state": "HEALTHY", "opState": "DECOMMISSIONING", @@ -66,7 +67,7 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost2.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", @@ -84,7 +85,7 @@ "buildDate": "2020-07-20T15:45Z" }, { - "hostname": "localhost3.storage.enterprise.com", + "hostname": "localhost4.storage.enterprise.com", "uuid": "b5907812-a5f2-11ea-bb37-0242ac130002", "state": "HEALTHY", "opState": "DECOMMISSIONED", @@ -99,13 +100,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost3.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost2.storage.enterprise.com" + "leaderNode": "localhost3.storage.enterprise.com" } ], "containers": 8192, @@ -117,7 +118,7 @@ "buildDate": "2020-07-20T15:45Z" }, { - "hostname": "localhost4.storage.enterprise.com", + "hostname": "localhost3.storage.enterprise.com", "uuid": "b5907812-a5f2-11ea-bb37-0242ac130002", "state": "HEALTHY", "opState": "ENTERING_MAINTENANCE", @@ -132,13 +133,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost4.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost2.storage.enterprise.com" + "leaderNode": "localhost4.storage.enterprise.com" } ], "containers": 8192, @@ -150,7 +151,7 @@ "buildDate": "2020-07-20T15:45Z" }, { - "hostname": "localhost5.storage.enterprise.com", + "hostname": "localhost6.storage.enterprise.com", "uuid": "b5907812-a5f2-11ea-bb37-0242ac130002", "state": "HEALTHY", "opState": "IN_MAINTENANCE", @@ -165,13 +166,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost5.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost2.storage.enterprise.com" + "leaderNode": "localhost5.storage.enterprise.com" } ], "containers": 8192, @@ -183,7 +184,7 @@ "buildDate": "2020-07-20T15:45Z" }, { - "hostname": "localhost6.storage.enterprise.com", + "hostname": "localhost5.storage.enterprise.com", "uuid": "b5907934-a5f2-11ea-bb37-0242ac130002", "state": "STALE", "opState": "IN_SERVICE", @@ -198,19 +199,19 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost6.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost6.storage.enterprise.com" }, { "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "STAND_ALONE", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost6.storage.enterprise.com" } ], "containers": 43, @@ -237,19 +238,19 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost7.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost7.storage.enterprise.com" }, { "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "STAND_ALONE", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost7.storage.enterprise.com" } ], "containers": 43, @@ -276,19 +277,19 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost8.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost8.storage.enterprise.com" }, { "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "STAND_ALONE", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost8.storage.enterprise.com" } ], "containers": 43, @@ -315,19 +316,19 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost9.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost9.storage.enterprise.com" }, { "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "STAND_ALONE", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost9.storage.enterprise.com" } ], "containers": 43, @@ -354,19 +355,19 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost1.storage.enterprise.com" + "leaderNode": "localhost10.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost10.storage.enterprise.com" }, { "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "STAND_ALONE", "replicationFactor": 1, - "leaderNode": "localhost3.storage.enterprise.com" + "leaderNode": "localhost10.storage.enterprise.com" } ], "containers": 43, @@ -413,13 +414,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost5.storage.enterprise.com" + "leaderNode": "localhost12.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost5.storage.enterprise.com" + "leaderNode": "localhost12.storage.enterprise.com" } ], "containers": 643, @@ -446,13 +447,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost5.storage.enterprise.com" + "leaderNode": "localhost13.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost6.storage.enterprise.com" + "leaderNode": "localhost13.storage.enterprise.com" } ], "containers": 5, @@ -479,19 +480,19 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost5.storage.enterprise.com" + "leaderNode": "localhost14.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost7.storage.enterprise.com" + "leaderNode": "localhost14.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "STAND_ALONE", "replicationFactor": 1, - "leaderNode": "localhost7.storage.enterprise.com" + "leaderNode": "localhost14.storage.enterprise.com" } ], "containers": 64, @@ -518,13 +519,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost5.storage.enterprise.com" + "leaderNode": "localhost15.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost8.storage.enterprise.com" + "leaderNode": "localhost15.storage.enterprise.com" } ], "containers": 21, @@ -551,13 +552,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost11.storage.enterprise.com" + "leaderNode": "localhost16.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost9.storage.enterprise.com" + "leaderNode": "localhost16.storage.enterprise.com" } ], "containers": 897, @@ -584,19 +585,19 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost11.storage.enterprise.com" + "leaderNode": "localhost17.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost10.storage.enterprise.com" + "leaderNode": "localhost17.storage.enterprise.com" }, { "pipelineID": "01f2e105-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "STAND_ALONE", "replicationFactor": 1, - "leaderNode": "localhost10.storage.enterprise.com" + "leaderNode": "localhost17.storage.enterprise.com" } ], "containers": 6754, @@ -623,13 +624,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost11.storage.enterprise.com" + "leaderNode": "localhost18.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost11.storage.enterprise.com" + "leaderNode": "localhost18.storage.enterprise.com" } ], "containers": 78, @@ -656,13 +657,13 @@ "pipelineID": "02e3d908-ff01-4ce6-ad75-f3ec79bcc71a", "replicationType": "RATIS", "replicationFactor": 3, - "leaderNode": "localhost11.storage.enterprise.com" + "leaderNode": "localhost19.storage.enterprise.com" }, { "pipelineID": "05e3d908-ff01-4ce6-ad75-f3ec79bcc7982", "replicationType": "RATIS", "replicationFactor": 1, - "leaderNode": "localhost12.storage.enterprise.com" + "leaderNode": "localhost19.storage.enterprise.com" } ], "containers": 543, diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/themeIcons.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/themeIcons.tsx index 986863861e07..07240c10336f 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/themeIcons.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/themeIcons.tsx @@ -34,12 +34,12 @@ export class FilledIcon extends React.Component { } interface IRatisIconProps { - replicationFactor: number; + replicationFactor: string; isLeader: boolean; } interface IReplicationIconProps { - replicationFactor: number; + replicationFactor: string; replicationType: string; leaderNode: string; isLeader: boolean; @@ -49,7 +49,7 @@ export class RatisIcon extends React.PureComponent { render() { const {replicationFactor, isLeader} = this.props; const threeFactorClass = isLeader ? 'icon-text-three-dots-leader' : 'icon-text-three-dots'; - const textClass = replicationFactor >= 3 ? threeFactorClass : 'icon-text-one-dot'; + const textClass = replicationFactor === "THREE" ? threeFactorClass : 'icon-text-one-dot'; return (

R
diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/datanodes/datanodes.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/datanodes/datanodes.tsx index 02296ce6210d..a6eb9a79e912 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/datanodes/datanodes.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/datanodes/datanodes.tsx @@ -126,7 +126,7 @@ const COLUMNS = [ key: 'hostname', isVisible: true, isSearchable: true, - sorter: (a: IDatanode, b: IDatanode) => a.hostname.localeCompare(b.hostname), + sorter: (a: IDatanode, b: IDatanode) => a.hostname.localeCompare(b.hostname, undefined, {numeric: true}), defaultSortOrder: 'ascend' as const, fixed: 'left' }, @@ -137,7 +137,7 @@ const COLUMNS = [ isVisible: true, isSearchable: true, filterMultiple: true, - filters: DatanodeStateList.map(state => ({text: state, value: state})), + filters: DatanodeStateList && DatanodeStateList.map(state => ({text: state, value: state})), onFilter: (value: DatanodeState, record: IDatanode) => record.state === value, render: (text: DatanodeState) => renderDatanodeState(text), sorter: (a: IDatanode, b: IDatanode) => a.state.localeCompare(b.state) @@ -149,7 +149,7 @@ const COLUMNS = [ isVisible: true, isSearchable: true, filterMultiple: true, - filters: DatanodeOpStateList.map(state => ({text: state, value: state})), + filters: DatanodeOpStateList && DatanodeOpStateList.map(state => ({text: state, value: state})), onFilter: (value: DatanodeOpState, record: IDatanode) => record.opState === value, render: (text: DatanodeOpState) => renderDatanodeOpState(text), sorter: (a: IDatanode, b: IDatanode) => a.opState.localeCompare(b.opState) @@ -194,7 +194,7 @@ const COLUMNS = [ return (
{ - pipelines.map((pipeline, index) => ( + pipelines && pipelines.map((pipeline, index) => (
, IDatanode const datanodesResponse: IDatanodesResponse = response.data; const totalCount = datanodesResponse.totalCount; const datanodes: IDatanodeResponse[] = datanodesResponse.datanodes; - const dataSource: IDatanode[] = datanodes.map(datanode => { + const dataSource: IDatanode[] = datanodes && datanodes.map(datanode => { return { hostname: datanode.hostname, uuid: datanode.uuid, diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/overview/overview.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/overview/overview.tsx index 5b737185ec6c..cc007ae79cc8 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/overview/overview.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/views/overview/overview.tsx @@ -40,6 +40,7 @@ interface IClusterStateResponse { volumes: number; buckets: number; keys: number; + openContainers: number; } interface IOverviewState { @@ -56,6 +57,7 @@ interface IOverviewState { lastUpdatedOMDBDelta: number; lastUpdatedOMDBFull: number; omStatus: string; + openContainers: number; } export class Overview extends React.Component, IOverviewState> { @@ -82,6 +84,7 @@ export class Overview extends React.Component, IOverviewS lastUpdatedOMDBDelta: 0, lastUpdatedOMDBFull: 0, omStatus: '', + openContainers: 0 }; this.autoReload = new AutoReloadHelper(this._loadData); } @@ -111,6 +114,7 @@ export class Overview extends React.Component, IOverviewS buckets: clusterState.buckets, keys: clusterState.keys, missingContainersCount, + openContainers: clusterState.openContainers, lastRefreshed: Number(moment()), lastUpdatedOMDBDelta: omDBDeltaObject && omDBDeltaObject.lastUpdatedTimestamp, lastUpdatedOMDBFull: omDBFullObject && omDBFullObject.lastUpdatedTimestamp @@ -154,7 +158,7 @@ export class Overview extends React.Component, IOverviewS render() { const {loading, datanodes, pipelines, storageReport, containers, volumes, buckets, - keys, missingContainersCount, lastRefreshed, lastUpdatedOMDBDelta, lastUpdatedOMDBFull, omStatus} = this.state; + keys, missingContainersCount, lastRefreshed, lastUpdatedOMDBDelta, lastUpdatedOMDBFull, omStatus, openContainers } = this.state; const datanodesElement = ( @@ -172,7 +176,12 @@ export class Overview extends React.Component, IOverviewS {containers - missingContainersCount}/{containers} ) : - containers.toString(); +
+ {containers.toString()} + + ({openContainers}) + +
const clusterCapacity = `${size(storageReport.capacity - storageReport.remaining)}/${size(storageReport.capacity)}`; return (
diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/OMMetadataManagerTestUtils.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/OMMetadataManagerTestUtils.java index ee51c318b8b1..b80924d29103 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/OMMetadataManagerTestUtils.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/OMMetadataManagerTestUtils.java @@ -233,7 +233,8 @@ public static void writeKeyToOm(OMMetadataManager omMetadataManager, long bucketObjectId, long volumeObjectId, List locationVersions, - BucketLayout bucketLayout) + BucketLayout bucketLayout, + long dataSize) throws IOException { String omKey; @@ -248,6 +249,7 @@ public static void writeKeyToOm(OMMetadataManager omMetadataManager, .setBucketName(bucketName) .setVolumeName(volName) .setKeyName(keyName) + .setDataSize(dataSize) .setOmKeyLocationInfos(locationVersions) .setReplicationConfig( StandaloneReplicationConfig.getInstance(ONE)) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithFSO.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithFSO.java index cdfb3ab28fda..be85ca85b76a 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithFSO.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithFSO.java @@ -18,7 +18,11 @@ package org.apache.hadoop.ozone.recon.api; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; +import static org.apache.hadoop.ozone.om.helpers.QuotaUtil.getReplicatedSize; + import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; @@ -26,6 +30,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.OMMetadataManager; @@ -39,13 +44,13 @@ import org.apache.hadoop.ozone.recon.ReconTestInjector; import org.apache.hadoop.ozone.recon.api.handlers.BucketHandler; import org.apache.hadoop.ozone.recon.api.handlers.EntityHandler; -import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse; import org.apache.hadoop.ozone.recon.api.types.DUResponse; -import org.apache.hadoop.ozone.recon.api.types.EntityType; import org.apache.hadoop.ozone.recon.api.types.FileSizeDistributionResponse; import org.apache.hadoop.ozone.recon.api.types.ResponseStatus; import org.apache.hadoop.ozone.recon.api.types.QuotaUsageResponse; +import org.apache.hadoop.ozone.recon.common.CommonUtils; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.scm.ReconNodeManager; import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager; import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; @@ -109,6 +114,7 @@ public class TestNSSummaryEndpointWithFSO { private ReconOMMetadataManager reconOMMetadataManager; private NSSummaryEndpoint nsSummaryEndpoint; private OzoneConfiguration ozoneConfiguration; + private CommonUtils commonUtils; private static final String TEST_PATH_UTILITY = "/vol1/buck1/a/b/c/d/e/file1.txt"; @@ -211,46 +217,47 @@ public class TestNSSummaryEndpointWithFSO { private static final long KEY_FOUR_SIZE = 2 * OzoneConsts.KB + 1; // bin 2 private static final long KEY_FIVE_SIZE = 100L; // bin 0 private static final long KEY_SIX_SIZE = 2 * OzoneConsts.KB + 1; // bin 2 + private static final long KEY_SEVEN_SIZE = 4 * OzoneConsts.KB + 1; private static final long KEY_EIGHT_SIZE = OzoneConsts.KB + 1; // bin 1 private static final long KEY_NINE_SIZE = 2 * OzoneConsts.KB + 1; // bin 2 private static final long KEY_TEN_SIZE = 2 * OzoneConsts.KB + 1; // bin 2 private static final long KEY_ELEVEN_SIZE = OzoneConsts.KB + 1; // bin 1 - private static final long LOCATION_INFO_GROUP_ONE_SIZE - = CONTAINER_ONE_REPLICA_COUNT * BLOCK_ONE_LENGTH - + CONTAINER_TWO_REPLICA_COUNT * BLOCK_TWO_LENGTH - + CONTAINER_THREE_REPLICA_COUNT * BLOCK_THREE_LENGTH; - - private static final long MULTI_BLOCK_KEY_SIZE_WITH_REPLICA - = LOCATION_INFO_GROUP_ONE_SIZE; - - private static final long LOCATION_INFO_GROUP_TWO_SIZE - = CONTAINER_FOUR_REPLICA_COUNT * BLOCK_FOUR_LENGTH - + CONTAINER_FIVE_REPLICA_COUNT * BLOCK_FIVE_LENGTH - + CONTAINER_SIX_REPLICA_COUNT * BLOCK_SIX_LENGTH; private static final long FILE1_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_ONE_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE2_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_TWO_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE3_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_THREE_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE4_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_FOUR_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE5_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_FIVE_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE6_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_SIX_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE7_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_SEVEN_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE8_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_EIGHT_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE9_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_NINE_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE10_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_TEN_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE11_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; - + getReplicatedSize(KEY_ELEVEN_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); + private static final long MULTI_BLOCK_KEY_SIZE_WITH_REPLICA + = FILE7_SIZE_WITH_REPLICA; private static final long MULTI_BLOCK_TOTAL_SIZE_WITH_REPLICA_UNDER_ROOT = FILE1_SIZE_WITH_REPLICA @@ -375,6 +382,7 @@ public void setUp() throws Exception { new NSSummaryTaskWithFSO(reconNamespaceSummaryManager, reconOMMetadataManager, ozoneConfiguration); nSSummaryTaskWithFso.reprocessWithFSO(reconOMMetadataManager); + commonUtils = new CommonUtils(); } @Test @@ -390,80 +398,48 @@ public void testUtility() { @Test public void testGetBasicInfoRoot() throws Exception { // Test root basics - Response rootResponse = nsSummaryEndpoint.getBasicInfo(ROOT_PATH); - NamespaceSummaryResponse rootResponseObj = - (NamespaceSummaryResponse) rootResponse.getEntity(); - Assert.assertEquals(EntityType.ROOT, rootResponseObj.getEntityType()); - Assert.assertEquals(2, rootResponseObj.getNumVolume()); - Assert.assertEquals(4, rootResponseObj.getNumBucket()); - Assert.assertEquals(5, rootResponseObj.getNumTotalDir()); - Assert.assertEquals(10, rootResponseObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoRoot( + nsSummaryEndpoint, reconOMMetadataManager); } @Test public void testGetBasicInfoVol() throws Exception { // Test volume basics - Response volResponse = nsSummaryEndpoint.getBasicInfo(VOL_PATH); - NamespaceSummaryResponse volResponseObj = - (NamespaceSummaryResponse) volResponse.getEntity(); - Assert.assertEquals(EntityType.VOLUME, volResponseObj.getEntityType()); - Assert.assertEquals(2, volResponseObj.getNumBucket()); - Assert.assertEquals(4, volResponseObj.getNumTotalDir()); - Assert.assertEquals(6, volResponseObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoVolume(nsSummaryEndpoint); } @Test public void testGetBasicInfoBucketOne() throws Exception { // Test bucket 1's basics - Response bucketOneResponse = - nsSummaryEndpoint.getBasicInfo(BUCKET_ONE_PATH); - NamespaceSummaryResponse bucketOneObj = - (NamespaceSummaryResponse) bucketOneResponse.getEntity(); - Assert.assertEquals(EntityType.BUCKET, bucketOneObj.getEntityType()); - Assert.assertEquals(4, bucketOneObj.getNumTotalDir()); - Assert.assertEquals(4, bucketOneObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoBucketOne( + BucketLayout.FILE_SYSTEM_OPTIMIZED, + nsSummaryEndpoint); } @Test public void testGetBasicInfoBucketTwo() throws Exception { // Test bucket 2's basics - Response bucketTwoResponse = - nsSummaryEndpoint.getBasicInfo(BUCKET_TWO_PATH); - NamespaceSummaryResponse bucketTwoObj = - (NamespaceSummaryResponse) bucketTwoResponse.getEntity(); - Assert.assertEquals(EntityType.BUCKET, bucketTwoObj.getEntityType()); - Assert.assertEquals(0, bucketTwoObj.getNumTotalDir()); - Assert.assertEquals(2, bucketTwoObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoBucketTwo( + BucketLayout.FILE_SYSTEM_OPTIMIZED, + nsSummaryEndpoint); } @Test public void testGetBasicInfoDir() throws Exception { // Test intermediate directory basics - Response dirOneResponse = nsSummaryEndpoint.getBasicInfo(DIR_ONE_PATH); - NamespaceSummaryResponse dirOneObj = - (NamespaceSummaryResponse) dirOneResponse.getEntity(); - Assert.assertEquals(EntityType.DIRECTORY, dirOneObj.getEntityType()); - Assert.assertEquals(3, dirOneObj.getNumTotalDir()); - Assert.assertEquals(3, dirOneObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoDir(nsSummaryEndpoint); } @Test public void testGetBasicInfoNoPath() throws Exception { // Test invalid path - Response invalidResponse = nsSummaryEndpoint.getBasicInfo(INVALID_PATH); - NamespaceSummaryResponse invalidObj = - (NamespaceSummaryResponse) invalidResponse.getEntity(); - Assert.assertEquals(ResponseStatus.PATH_NOT_FOUND, - invalidObj.getStatus()); + commonUtils.testNSSummaryBasicInfoNoPath(nsSummaryEndpoint); } @Test public void testGetBasicInfoKey() throws Exception { // Test key - Response keyResponse = nsSummaryEndpoint.getBasicInfo(KEY_PATH); - NamespaceSummaryResponse keyResObj = - (NamespaceSummaryResponse) keyResponse.getEntity(); - Assert.assertEquals(EntityType.KEY, keyResObj.getEntityType()); + commonUtils.testNSSummaryBasicInfoKey(nsSummaryEndpoint); } @Test @@ -948,7 +924,8 @@ private void setUpMultiBlockKey() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup), - getBucketLayout()); + getBucketLayout(), + KEY_SEVEN_SIZE); } private OmKeyLocationInfoGroup getLocationInfoGroup1() { @@ -1041,7 +1018,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_ONE_SIZE); //vol/bucket1/dir1/dir2/file2 writeKeyToOm(reconOMMetadataManager, @@ -1054,7 +1032,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_TWO_SIZE); //vol/bucket1/dir1/dir3/file3 writeKeyToOm(reconOMMetadataManager, @@ -1067,7 +1046,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_THREE_SIZE); //vol/bucket2/file4 writeKeyToOm(reconOMMetadataManager, @@ -1080,7 +1060,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_TWO_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_FOUR_SIZE); //vol/bucket2/file5 writeKeyToOm(reconOMMetadataManager, @@ -1093,7 +1074,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_TWO_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_FIVE_SIZE); //vol/bucket1/dir1/dir4/file6 writeKeyToOm(reconOMMetadataManager, @@ -1106,7 +1088,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_SIX_SIZE); //vol/bucket1/dir1/file7 writeKeyToOm(reconOMMetadataManager, @@ -1119,7 +1102,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_SEVEN_SIZE); //vol2/bucket3/file8 writeKeyToOm(reconOMMetadataManager, @@ -1132,7 +1116,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_THREE_OBJECT_ID, VOL_TWO_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_EIGHT_SIZE); //vol2/bucket3/dir5/file9 writeKeyToOm(reconOMMetadataManager, @@ -1145,7 +1130,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_THREE_OBJECT_ID, VOL_TWO_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_NINE_SIZE); //vol2/bucket3/dir5/file10 writeKeyToOm(reconOMMetadataManager, @@ -1158,7 +1144,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_THREE_OBJECT_ID, VOL_TWO_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_TEN_SIZE); //vol2/bucket4/file11 writeKeyToOm(reconOMMetadataManager, @@ -1171,7 +1158,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_FOUR_OBJECT_ID, VOL_TWO_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_ELEVEN_SIZE); } /** @@ -1245,10 +1233,18 @@ private static ReconStorageContainerManagerFacade getMockReconSCM() .thenReturn(containerReplicas6); when(reconSCM.getContainerManager()).thenReturn(containerManager); + ReconNodeManager mockReconNodeManager = mock(ReconNodeManager.class); + when(mockReconNodeManager.getStats()).thenReturn(getMockSCMRootStat()); + when(reconSCM.getScmNodeManager()).thenReturn(mockReconNodeManager); return reconSCM; } private static BucketLayout getBucketLayout() { return BucketLayout.FILE_SYSTEM_OPTIMIZED; } + + private static SCMNodeStat getMockSCMRootStat() { + return new SCMNodeStat(ROOT_QUOTA, ROOT_DATA_SIZE, + ROOT_QUOTA - ROOT_DATA_SIZE); + } } \ No newline at end of file diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithLegacy.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithLegacy.java index ccbdd3619577..31f1bed67402 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithLegacy.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestNSSummaryEndpointWithLegacy.java @@ -18,7 +18,11 @@ package org.apache.hadoop.ozone.recon.api; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; +import static org.apache.hadoop.ozone.om.helpers.QuotaUtil.getReplicatedSize; + import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; @@ -26,6 +30,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.OMConfigKeys; @@ -40,13 +45,13 @@ import org.apache.hadoop.ozone.recon.ReconTestInjector; import org.apache.hadoop.ozone.recon.api.handlers.BucketHandler; import org.apache.hadoop.ozone.recon.api.handlers.EntityHandler; -import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse; import org.apache.hadoop.ozone.recon.api.types.DUResponse; -import org.apache.hadoop.ozone.recon.api.types.EntityType; import org.apache.hadoop.ozone.recon.api.types.FileSizeDistributionResponse; import org.apache.hadoop.ozone.recon.api.types.ResponseStatus; import org.apache.hadoop.ozone.recon.api.types.QuotaUsageResponse; +import org.apache.hadoop.ozone.recon.common.CommonUtils; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.scm.ReconNodeManager; import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager; import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; @@ -110,6 +115,7 @@ public class TestNSSummaryEndpointWithLegacy { private ReconOMMetadataManager reconOMMetadataManager; private NSSummaryEndpoint nsSummaryEndpoint; private OzoneConfiguration conf; + private CommonUtils commonUtils; private static final String TEST_PATH_UTILITY = "/vol1/buck1/a/b/c/d/e/file1.txt"; @@ -213,46 +219,48 @@ public class TestNSSummaryEndpointWithLegacy { private static final long KEY_FOUR_SIZE = 2 * OzoneConsts.KB + 1; // bin 2 private static final long KEY_FIVE_SIZE = 100L; // bin 0 private static final long KEY_SIX_SIZE = 2 * OzoneConsts.KB + 1; // bin 2 + private static final long KEY_SEVEN_SIZE = 4 * OzoneConsts.KB + 1; private static final long KEY_EIGHT_SIZE = OzoneConsts.KB + 1; // bin 1 private static final long KEY_NINE_SIZE = 2 * OzoneConsts.KB + 1; // bin 2 private static final long KEY_TEN_SIZE = 2 * OzoneConsts.KB + 1; // bin 2 private static final long KEY_ELEVEN_SIZE = OzoneConsts.KB + 1; // bin 1 - private static final long LOCATION_INFO_GROUP_ONE_SIZE - = CONTAINER_ONE_REPLICA_COUNT * BLOCK_ONE_LENGTH - + CONTAINER_TWO_REPLICA_COUNT * BLOCK_TWO_LENGTH - + CONTAINER_THREE_REPLICA_COUNT * BLOCK_THREE_LENGTH; - - private static final long MULTI_BLOCK_KEY_SIZE_WITH_REPLICA - = LOCATION_INFO_GROUP_ONE_SIZE; - - private static final long LOCATION_INFO_GROUP_TWO_SIZE - = CONTAINER_FOUR_REPLICA_COUNT * BLOCK_FOUR_LENGTH - + CONTAINER_FIVE_REPLICA_COUNT * BLOCK_FIVE_LENGTH - + CONTAINER_SIX_REPLICA_COUNT * BLOCK_SIX_LENGTH; private static final long FILE1_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_ONE_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE2_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_TWO_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE3_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_THREE_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE4_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_FOUR_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE5_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_FIVE_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE6_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_SIX_SIZE, + StandaloneReplicationConfig.getInstance(ONE));; private static final long FILE7_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_SEVEN_SIZE, + StandaloneReplicationConfig.getInstance(ONE));; private static final long FILE8_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_EIGHT_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE9_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_NINE_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE10_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_TWO_SIZE; + getReplicatedSize(KEY_TEN_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); private static final long FILE11_SIZE_WITH_REPLICA = - LOCATION_INFO_GROUP_ONE_SIZE; + getReplicatedSize(KEY_ELEVEN_SIZE, + StandaloneReplicationConfig.getInstance(ONE)); + private static final long MULTI_BLOCK_KEY_SIZE_WITH_REPLICA + = FILE7_SIZE_WITH_REPLICA; private static final long MULTI_BLOCK_TOTAL_SIZE_WITH_REPLICA_UNDER_ROOT = FILE1_SIZE_WITH_REPLICA @@ -375,6 +383,7 @@ public void setUp() throws Exception { new NSSummaryTaskWithLegacy(reconNamespaceSummaryManager, reconOMMetadataManager, conf); nsSummaryTaskWithLegacy.reprocessWithLegacy(reconOMMetadataManager); + commonUtils = new CommonUtils(); } @Test @@ -389,81 +398,48 @@ public void testUtility() { @Test public void testGetBasicInfoRoot() throws Exception { - // Test root basics - Response rootResponse = nsSummaryEndpoint.getBasicInfo(ROOT_PATH); - NamespaceSummaryResponse rootResponseObj = - (NamespaceSummaryResponse) rootResponse.getEntity(); - Assert.assertEquals(EntityType.ROOT, rootResponseObj.getEntityType()); - Assert.assertEquals(2, rootResponseObj.getNumVolume()); - Assert.assertEquals(4, rootResponseObj.getNumBucket()); - Assert.assertEquals(5, rootResponseObj.getNumTotalDir()); - Assert.assertEquals(10, rootResponseObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoRoot( + nsSummaryEndpoint, reconOMMetadataManager); } @Test public void testGetBasicInfoVol() throws Exception { // Test volume basics - Response volResponse = nsSummaryEndpoint.getBasicInfo(VOL_PATH); - NamespaceSummaryResponse volResponseObj = - (NamespaceSummaryResponse) volResponse.getEntity(); - Assert.assertEquals(EntityType.VOLUME, volResponseObj.getEntityType()); - Assert.assertEquals(2, volResponseObj.getNumBucket()); - Assert.assertEquals(4, volResponseObj.getNumTotalDir()); - Assert.assertEquals(6, volResponseObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoVolume(nsSummaryEndpoint); } @Test public void testGetBasicInfoBucketOne() throws Exception { // Test bucket 1's basics - Response bucketOneResponse = - nsSummaryEndpoint.getBasicInfo(BUCKET_ONE_PATH); - NamespaceSummaryResponse bucketOneObj = - (NamespaceSummaryResponse) bucketOneResponse.getEntity(); - Assert.assertEquals(EntityType.BUCKET, bucketOneObj.getEntityType()); - Assert.assertEquals(4, bucketOneObj.getNumTotalDir()); - Assert.assertEquals(4, bucketOneObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoBucketOne( + BucketLayout.LEGACY, + nsSummaryEndpoint); } @Test public void testGetBasicInfoBucketTwo() throws Exception { // Test bucket 2's basics - Response bucketTwoResponse = - nsSummaryEndpoint.getBasicInfo(BUCKET_TWO_PATH); - NamespaceSummaryResponse bucketTwoObj = - (NamespaceSummaryResponse) bucketTwoResponse.getEntity(); - Assert.assertEquals(EntityType.BUCKET, bucketTwoObj.getEntityType()); - Assert.assertEquals(0, bucketTwoObj.getNumTotalDir()); - Assert.assertEquals(2, bucketTwoObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoBucketTwo( + BucketLayout.LEGACY, + nsSummaryEndpoint); } @Test public void testGetBasicInfoDir() throws Exception { // Test intermediate directory basics - Response dirOneResponse = nsSummaryEndpoint.getBasicInfo(DIR_ONE_PATH); - NamespaceSummaryResponse dirOneObj = - (NamespaceSummaryResponse) dirOneResponse.getEntity(); - Assert.assertEquals(EntityType.DIRECTORY, dirOneObj.getEntityType()); - Assert.assertEquals(3, dirOneObj.getNumTotalDir()); - Assert.assertEquals(3, dirOneObj.getNumTotalKey()); + commonUtils.testNSSummaryBasicInfoDir(nsSummaryEndpoint); } @Test public void testGetBasicInfoNoPath() throws Exception { // Test invalid path - Response invalidResponse = nsSummaryEndpoint.getBasicInfo(INVALID_PATH); - NamespaceSummaryResponse invalidObj = - (NamespaceSummaryResponse) invalidResponse.getEntity(); - Assert.assertEquals(ResponseStatus.PATH_NOT_FOUND, - invalidObj.getStatus()); + commonUtils.testNSSummaryBasicInfoNoPath(nsSummaryEndpoint); } @Test public void testGetBasicInfoKey() throws Exception { // Test key - Response keyResponse = nsSummaryEndpoint.getBasicInfo(KEY_PATH); - NamespaceSummaryResponse keyResObj = - (NamespaceSummaryResponse) keyResponse.getEntity(); - Assert.assertEquals(EntityType.KEY, keyResObj.getEntityType()); + commonUtils.testNSSummaryBasicInfoKey(nsSummaryEndpoint); } @Test @@ -986,7 +962,8 @@ private void setUpMultiBlockKey() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup), - getBucketLayout()); + getBucketLayout(), + KEY_SEVEN_SIZE); } private OmKeyLocationInfoGroup getLocationInfoGroup1() { @@ -1079,7 +1056,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_ONE_SIZE); //vol/bucket1/dir1/dir2/file2 writeKeyToOm(reconOMMetadataManager, @@ -1092,7 +1070,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_TWO_SIZE); //vol/bucket1/dir1/dir3/file3 writeKeyToOm(reconOMMetadataManager, @@ -1105,7 +1084,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_THREE_SIZE); //vol/bucket2/file4 writeKeyToOm(reconOMMetadataManager, @@ -1118,7 +1098,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_TWO_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_FOUR_SIZE); //vol/bucket2/file5 writeKeyToOm(reconOMMetadataManager, @@ -1131,7 +1112,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_TWO_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_FIVE_SIZE); //vol/bucket1/dir1/dir4/file6 writeKeyToOm(reconOMMetadataManager, @@ -1144,7 +1126,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_SIX_SIZE); //vol/bucket1/dir1/file7 writeKeyToOm(reconOMMetadataManager, @@ -1157,7 +1140,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_ONE_OBJECT_ID, VOL_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_SEVEN_SIZE); //vol2/bucket3/file8 writeKeyToOm(reconOMMetadataManager, @@ -1170,7 +1154,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_THREE_OBJECT_ID, VOL_TWO_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_EIGHT_SIZE); //vol2/bucket3/dir5/file9 writeKeyToOm(reconOMMetadataManager, @@ -1183,7 +1168,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_THREE_OBJECT_ID, VOL_TWO_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_NINE_SIZE); //vol2/bucket3/dir5/file10 writeKeyToOm(reconOMMetadataManager, @@ -1196,7 +1182,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_THREE_OBJECT_ID, VOL_TWO_OBJECT_ID, Collections.singletonList(locationInfoGroup2), - getBucketLayout()); + getBucketLayout(), + KEY_TEN_SIZE); //vol2/bucket4/file11 writeKeyToOm(reconOMMetadataManager, @@ -1209,7 +1196,8 @@ private void setUpMultiBlockReplicatedKeys() throws IOException { BUCKET_FOUR_OBJECT_ID, VOL_TWO_OBJECT_ID, Collections.singletonList(locationInfoGroup1), - getBucketLayout()); + getBucketLayout(), + KEY_ELEVEN_SIZE); } /** @@ -1283,10 +1271,18 @@ private static ReconStorageContainerManagerFacade getMockReconSCM() .thenReturn(containerReplicas6); when(reconSCM.getContainerManager()).thenReturn(containerManager); + ReconNodeManager mockReconNodeManager = mock(ReconNodeManager.class); + when(mockReconNodeManager.getStats()).thenReturn(getMockSCMRootStat()); + when(reconSCM.getScmNodeManager()).thenReturn(mockReconNodeManager); return reconSCM; } private static BucketLayout getBucketLayout() { return BucketLayout.LEGACY; } + + private static SCMNodeStat getMockSCMRootStat() { + return new SCMNodeStat(ROOT_QUOTA, ROOT_DATA_SIZE, + ROOT_QUOTA - ROOT_DATA_SIZE); + } } \ No newline at end of file diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestTotalOpenContainerCount.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestTotalOpenContainerCount.java new file mode 100644 index 000000000000..04eb53bb6e10 --- /dev/null +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestTotalOpenContainerCount.java @@ -0,0 +1,396 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.api; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos + .ExtendedDatanodeDetailsProto; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.PipelineID; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.LayoutVersionProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageTypeProto; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; +import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; +import org.apache.hadoop.hdfs.web.URLConnectionFactory; +import org.apache.hadoop.ozone.recon.ReconTestInjector; +import org.apache.hadoop.ozone.recon.ReconUtils; +import org.apache.hadoop.ozone.recon.api.types.ClusterStateResponse; +import org.apache.hadoop.ozone.recon.api.types.DatanodesResponse; +import org.apache.hadoop.ozone.recon.persistence.AbstractReconSqlDBTest; +import org.apache.hadoop.ozone.recon.persistence.ContainerHealthSchemaManager; +import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; +import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; +import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; +import org.apache.hadoop.ozone.recon.spi.impl.StorageContainerServiceProviderImpl; +import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.LambdaTestUtils; +import org.hadoop.ozone.recon.schema.tables.daos.GlobalStatsDao; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; +import static org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.defaultLayoutVersionProto; +import static org.apache.hadoop.ozone.recon.OMMetadataManagerTestUtils.getRandomPipeline; +import static org.apache.hadoop.ozone.recon.OMMetadataManagerTestUtils.getTestReconOmMetadataManager; +import static org.apache.hadoop.ozone.recon.OMMetadataManagerTestUtils.initializeNewOmMetadataManager; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.Response; +import java.net.HttpURLConnection; +import java.util.LinkedList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Test for Recon API endpoints. + */ +public class TestTotalOpenContainerCount extends AbstractReconSqlDBTest { + private NodeEndpoint nodeEndpoint; + private ClusterStateEndpoint clusterStateEndpoint; + private ReconOMMetadataManager reconOMMetadataManager; + private ReconStorageContainerManagerFacade reconScm; + private boolean isSetupDone = false; + private String pipelineId, pipelineId2; + private DatanodeDetails datanodeDetails; + private DatanodeDetails datanodeDetails2; + private ContainerReportsProto containerReportsProto; + private ExtendedDatanodeDetailsProto extendedDatanodeDetailsProto; + private Pipeline pipeline, pipeline2; + private static final String HOST1 = "host1.datanode"; + private static final String HOST2 = "host2.datanode"; + private static final String IP1 = "1.1.1.1"; + private static final String IP2 = "2.2.2.2"; + private ReconUtils reconUtilsMock; + private ContainerHealthSchemaManager containerHealthSchemaManager; + private List containerIDs; + private List cpw; + private StorageContainerServiceProvider mockScmServiceProvider; + private ContainerReportsProto.Builder builder; + + private void initializeInjector() throws Exception { + reconOMMetadataManager = getTestReconOmMetadataManager( + initializeNewOmMetadataManager(temporaryFolder.newFolder()), + temporaryFolder.newFolder()); + datanodeDetails = randomDatanodeDetails(); + datanodeDetails2 = randomDatanodeDetails(); + datanodeDetails.setHostName(HOST1); + datanodeDetails.setIpAddress(IP1); + datanodeDetails2.setHostName(HOST2); + datanodeDetails2.setIpAddress(IP2); + pipeline = getRandomPipeline(datanodeDetails); + pipelineId = pipeline.getId().getId().toString(); + pipeline2 = getRandomPipeline(datanodeDetails2); + pipelineId2 = pipeline2.getId().getId().toString(); + + StorageContainerLocationProtocol mockScmClient = mock( + StorageContainerLocationProtocol.class); + mockScmServiceProvider = mock( + StorageContainerServiceProviderImpl.class); + + when(mockScmServiceProvider.getPipeline( + pipeline.getId().getProtobuf())).thenReturn(pipeline); + when(mockScmServiceProvider.getPipeline( + pipeline2.getId().getProtobuf())).thenReturn(pipeline2); + + // Open 5 containers on pipeline 1 + containerIDs = new LinkedList<>(); + cpw = new LinkedList<>(); + for (long i = 1L; i <= 5L; ++i) { + ContainerInfo containerInfo = new ContainerInfo.Builder() + .setContainerID(i) + .setReplicationConfig( + RatisReplicationConfig.getInstance(ReplicationFactor.ONE)) + .setState(LifeCycleState.OPEN) + .setOwner("test") + .setPipelineID(pipeline.getId()) + .build(); + ContainerWithPipeline containerWithPipeline = + new ContainerWithPipeline(containerInfo, pipeline); + when(mockScmServiceProvider.getContainerWithPipeline(i)) + .thenReturn(containerWithPipeline); + containerIDs.add(i); + cpw.add(containerWithPipeline); + } + + // Open 5 containers on pipeline 2 + for (long i = 6L; i <= 10L; ++i) { + ContainerInfo containerInfo = new ContainerInfo.Builder() + .setContainerID(i) + .setReplicationConfig( + RatisReplicationConfig.getInstance(ReplicationFactor.ONE)) + .setState(LifeCycleState.OPEN) + .setOwner("test") + .setPipelineID(pipeline2.getId()) + .build(); + ContainerWithPipeline containerWithPipeline = + new ContainerWithPipeline(containerInfo, pipeline2); + when(mockScmServiceProvider.getContainerWithPipeline(i)) + .thenReturn(containerWithPipeline); + containerIDs.add(i); + cpw.add(containerWithPipeline); + } + + when(mockScmServiceProvider + .getExistContainerWithPipelinesInBatch(containerIDs)) + .thenReturn(cpw); + + reconUtilsMock = mock(ReconUtils.class); + HttpURLConnection urlConnectionMock = mock(HttpURLConnection.class); + when(urlConnectionMock.getResponseCode()) + .thenReturn(HttpServletResponse.SC_OK); + when(reconUtilsMock.makeHttpCall(any(URLConnectionFactory.class), + anyString(), anyBoolean())).thenReturn(urlConnectionMock); + when(reconUtilsMock.getReconDbDir(any(OzoneConfiguration.class), + anyString())).thenReturn(GenericTestUtils.getRandomizedTestDir()); + + ReconTestInjector reconTestInjector = + new ReconTestInjector.Builder(temporaryFolder) + .withReconSqlDb() + .withReconOm(reconOMMetadataManager) + .withOmServiceProvider(mock(OzoneManagerServiceProviderImpl.class)) + .addBinding(StorageContainerServiceProvider.class, + mockScmServiceProvider) + .addBinding(OzoneStorageContainerManager.class, + ReconStorageContainerManagerFacade.class) + .withContainerDB() + .addBinding(ClusterStateEndpoint.class) + .addBinding(NodeEndpoint.class) + .addBinding(ContainerHealthSchemaManager.class) + .addBinding(ReconUtils.class, reconUtilsMock) + .addBinding(StorageContainerLocationProtocol.class, mockScmClient) + .build(); + + nodeEndpoint = reconTestInjector.getInstance(NodeEndpoint.class); + GlobalStatsDao globalStatsDao = getDao(GlobalStatsDao.class); + reconScm = (ReconStorageContainerManagerFacade) + reconTestInjector.getInstance(OzoneStorageContainerManager.class); + containerHealthSchemaManager = + reconTestInjector.getInstance(ContainerHealthSchemaManager.class); + clusterStateEndpoint = + new ClusterStateEndpoint(reconScm, globalStatsDao, + containerHealthSchemaManager); + } + + @BeforeEach + public void setUp() throws Exception { + // The following setup runs only once + if (!isSetupDone) { + initializeInjector(); + isSetupDone = true; + } + String datanodeId = datanodeDetails.getUuid().toString(); + String datanodeId2 = datanodeDetails2.getUuid().toString(); + + // initialize container report + builder = ContainerReportsProto.newBuilder(); + for (long i = 1L; i <= 10L; i++) { + if (i >= 1L && i < 6L) { + builder.addReports( + ContainerReplicaProto.newBuilder() + .setContainerID(i) + .setState(ContainerReplicaProto.State.OPEN) + .setOriginNodeId(datanodeId) + .build() + ); + } else { + builder.addReports( + ContainerReplicaProto.newBuilder() + .setContainerID(i) + .setState(ContainerReplicaProto.State.OPEN) + .setOriginNodeId(datanodeId2) + .build() + ); + } + } + containerReportsProto = builder.build(); + + UUID pipelineUuid = UUID.fromString(pipelineId); + HddsProtos.UUID uuid128 = HddsProtos.UUID.newBuilder() + .setMostSigBits(pipelineUuid.getMostSignificantBits()) + .setLeastSigBits(pipelineUuid.getLeastSignificantBits()) + .build(); + + PipelineReport pipelineReport = PipelineReport.newBuilder() + .setPipelineID( + PipelineID.newBuilder().setId(pipelineId).setUuid128(uuid128) + .build()) + .setIsLeader(true) + .build(); + DatanodeDetailsProto datanodeDetailsProto = + DatanodeDetailsProto.newBuilder() + .setHostName(HOST1) + .setUuid(datanodeId) + .setIpAddress(IP1) + .build(); + extendedDatanodeDetailsProto = + ExtendedDatanodeDetailsProto.newBuilder() + .setDatanodeDetails(datanodeDetailsProto) + .setVersion("0.6.0") + .setSetupTime(1596347628802L) + .setBuildDate("2020-08-01T08:50Z") + .setRevision("3346f493fa1690358add7bb9f3e5b52545993f36") + .build(); + StorageReportProto storageReportProto1 = + StorageReportProto.newBuilder().setStorageType(StorageTypeProto.DISK) + .setStorageLocation("/disk1").setScmUsed(10000).setRemaining(5400) + .setCapacity(25000) + .setStorageUuid(UUID.randomUUID().toString()) + .setFailed(false).build(); + StorageReportProto storageReportProto2 = + StorageReportProto.newBuilder().setStorageType(StorageTypeProto.DISK) + .setStorageLocation("/disk2").setScmUsed(25000).setRemaining(10000) + .setCapacity(50000) + .setStorageUuid(UUID.randomUUID().toString()) + .setFailed(false).build(); + NodeReportProto nodeReportProto = + NodeReportProto.newBuilder() + .addStorageReport(storageReportProto1) + .addStorageReport(storageReportProto2).build(); + + UUID pipelineUuid2 = UUID.fromString(pipelineId2); + uuid128 = HddsProtos.UUID.newBuilder() + .setMostSigBits(pipelineUuid2.getMostSignificantBits()) + .setLeastSigBits(pipelineUuid2.getLeastSignificantBits()) + .build(); + PipelineReport pipelineReport2 = PipelineReport.newBuilder() + .setPipelineID( + PipelineID.newBuilder().setId(pipelineId2).setUuid128(uuid128) + .build()).setIsLeader(false).build(); + PipelineReportsProto pipelineReportsProto = + PipelineReportsProto.newBuilder() + .addPipelineReport(pipelineReport) + .addPipelineReport(pipelineReport2) + .build(); + DatanodeDetailsProto datanodeDetailsProto2 = + DatanodeDetailsProto.newBuilder() + .setHostName(HOST2) + .setUuid(datanodeId2) + .setIpAddress(IP2) + .build(); + ExtendedDatanodeDetailsProto extendedDatanodeDetailsProto2 = + ExtendedDatanodeDetailsProto.newBuilder() + .setDatanodeDetails(datanodeDetailsProto2) + .setVersion("0.6.0") + .setSetupTime(1596347636802L) + .setBuildDate("2020-08-01T08:50Z") + .setRevision("3346f493fa1690358add7bb9f3e5b52545993f36") + .build(); + StorageReportProto storageReportProto3 = + StorageReportProto.newBuilder().setStorageType(StorageTypeProto.DISK) + .setStorageLocation("/disk1").setScmUsed(20000).setRemaining(7800) + .setCapacity(50000) + .setStorageUuid(UUID.randomUUID().toString()) + .setFailed(false).build(); + StorageReportProto storageReportProto4 = + StorageReportProto.newBuilder().setStorageType(StorageTypeProto.DISK) + .setStorageLocation("/disk2").setScmUsed(60000).setRemaining(10000) + .setCapacity(80000) + .setStorageUuid(UUID.randomUUID().toString()) + .setFailed(false).build(); + NodeReportProto nodeReportProto2 = + NodeReportProto.newBuilder() + .addStorageReport(storageReportProto3) + .addStorageReport(storageReportProto4).build(); + LayoutVersionProto layoutInfo = defaultLayoutVersionProto(); + + try { + reconScm.getDatanodeProtocolServer() + .register(extendedDatanodeDetailsProto, nodeReportProto, + containerReportsProto, pipelineReportsProto, layoutInfo); + reconScm.getDatanodeProtocolServer() + .register(extendedDatanodeDetailsProto2, + nodeReportProto2, containerReportsProto, pipelineReportsProto, + layoutInfo); + // Process all events in the event queue + reconScm.getEventQueue().processAll(1000); + } catch (Exception ex) { + Assertions.fail(ex.getMessage()); + } + } + + @Test + public void testOpenContainerCount() throws Exception { + + waitAndCheckConditionAfterHeartbeat(() -> { + Response response1 = clusterStateEndpoint.getClusterState(); + ClusterStateResponse clusterStateResponse1 = + (ClusterStateResponse) response1.getEntity(); + return (clusterStateResponse1.getContainers() == 10); + }); + + Response response = clusterStateEndpoint.getClusterState(); + response = nodeEndpoint.getDatanodes(); + DatanodesResponse datanodesResponse = + (DatanodesResponse) response.getEntity(); + Assertions.assertEquals(2, datanodesResponse.getTotalCount()); + AtomicInteger expectedCount = new AtomicInteger(); + + Response response1 = clusterStateEndpoint.getClusterState(); + ClusterStateResponse clusterStateResponse1 = + (ClusterStateResponse) response1.getEntity(); + // Get the total count of Open containers across all DataNodes + datanodesResponse.getDatanodes().forEach(datanodeMetadata -> { + expectedCount.set( + expectedCount.get() + + datanodeMetadata.getOpenContainers()); + }); + + Assertions.assertEquals(expectedCount.intValue(), + clusterStateResponse1.getOpenContainers()); + } + + private void waitAndCheckConditionAfterHeartbeat(Callable check) + throws Exception { + // if container report is processed first, and pipeline does not exist + // then container is not added until the next container report is processed + SCMHeartbeatRequestProto heartbeatRequestProto = + SCMHeartbeatRequestProto.newBuilder() + .setContainerReport(containerReportsProto) + .setDatanodeDetails(extendedDatanodeDetailsProto + .getDatanodeDetails()) + .setDataNodeLayoutVersion(defaultLayoutVersionProto()) + .build(); + reconScm.getDatanodeProtocolServer().sendHeartbeat(heartbeatRequestProto); + LambdaTestUtils.await(30000, 1000, check); + } +} diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/common/CommonUtils.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/common/CommonUtils.java new file mode 100644 index 000000000000..fe1b29c94f18 --- /dev/null +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/common/CommonUtils.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.common; + +import org.apache.hadoop.hdds.protocol.StorageType; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.ozone.OzoneAcl; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmPrefixInfo; +import org.apache.hadoop.ozone.recon.api.NSSummaryEndpoint; +import org.apache.hadoop.ozone.recon.api.types.BucketObjectDBInfo; +import org.apache.hadoop.ozone.recon.api.types.EntityType; +import org.apache.hadoop.ozone.recon.api.types.KeyObjectDBInfo; +import org.apache.hadoop.ozone.recon.api.types.NamespaceSummaryResponse; +import org.apache.hadoop.ozone.recon.api.types.ResponseStatus; +import org.apache.hadoop.ozone.recon.api.types.VolumeObjectDBInfo; +import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer; +import org.junit.Assert; + +import javax.ws.rs.core.Response; + +import java.util.Collections; +import java.util.HashMap; + +import static org.apache.hadoop.ozone.OzoneAcl.AclScope.ACCESS; + +/** + * This is a utility class for common code for test cases. + */ +public class CommonUtils { + private static final String ROOT_PATH = "/"; + private static final String VOL_PATH = "/vol"; + private static final String BUCKET_ONE_PATH = "/vol/bucket1"; + private static final String BUCKET_TWO_PATH = "/vol/bucket2"; + private static final String DIR_ONE_PATH = "/vol/bucket1/dir1"; + private static final String INVALID_PATH = "/vol/path/not/found"; + private static final String KEY_PATH = "/vol/bucket2/file4"; + + private OmPrefixInfo getOmPrefixInfoForTest( + String path, + IAccessAuthorizer.ACLIdentityType identityType, + String identityString, + IAccessAuthorizer.ACLType aclType, + OzoneAcl.AclScope scope) { + return new OmPrefixInfo(path, + Collections.singletonList(new OzoneAcl( + identityType, identityString, + aclType, scope)), new HashMap<>(), 10, 100); + } + + public void testNSSummaryBasicInfoRoot( + NSSummaryEndpoint nsSummaryEndpoint, + ReconOMMetadataManager reconOMMetadataManager) throws Exception { + String username = "myuser"; + OmPrefixInfo omPrefixInfo = getOmPrefixInfoForTest(ROOT_PATH, + IAccessAuthorizer.ACLIdentityType.USER, + username, + IAccessAuthorizer.ACLType.WRITE, + ACCESS); + omPrefixInfo.getMetadata().put("key", "value"); + reconOMMetadataManager.getPrefixTable() + .put(OzoneConsts.OM_KEY_PREFIX, omPrefixInfo); + // Test root basics + Response rootResponse = nsSummaryEndpoint.getBasicInfo(ROOT_PATH); + NamespaceSummaryResponse rootResponseObj = + (NamespaceSummaryResponse) rootResponse.getEntity(); + Assert.assertEquals(EntityType.ROOT, rootResponseObj.getEntityType()); + Assert.assertEquals(2, rootResponseObj.getCountStats().getNumVolume()); + Assert.assertEquals(4, rootResponseObj.getCountStats().getNumBucket()); + Assert.assertEquals(5, rootResponseObj.getCountStats().getNumTotalDir()); + Assert.assertEquals(10, rootResponseObj.getCountStats().getNumTotalKey()); + Assert.assertEquals(IAccessAuthorizer.ACLIdentityType.USER, + rootResponseObj.getObjectDBInfo().getAcls().get(0).getType()); + Assert.assertEquals(IAccessAuthorizer.ACLType.WRITE.toString(), + rootResponseObj.getObjectDBInfo().getAcls().get(0) + .getAclList().get(0).toString()); + Assert.assertEquals(username, + rootResponseObj.getObjectDBInfo().getAcls().get(0).getName()); + Assert.assertEquals("value", + rootResponseObj.getObjectDBInfo().getMetadata().get("key")); + Assert.assertEquals(ACCESS, + rootResponseObj.getObjectDBInfo().getAcls().get(0).getAclScope()); + } + + public void testNSSummaryBasicInfoVolume( + NSSummaryEndpoint nsSummaryEndpoint) throws Exception { + Response volResponse = nsSummaryEndpoint.getBasicInfo(VOL_PATH); + NamespaceSummaryResponse volResponseObj = + (NamespaceSummaryResponse) volResponse.getEntity(); + Assert.assertEquals(EntityType.VOLUME, + volResponseObj.getEntityType()); + Assert.assertEquals(2, + volResponseObj.getCountStats().getNumBucket()); + Assert.assertEquals(4, + volResponseObj.getCountStats().getNumTotalDir()); + Assert.assertEquals(6, + volResponseObj.getCountStats().getNumTotalKey()); + Assert.assertEquals("TestUser", + ((VolumeObjectDBInfo) volResponseObj. + getObjectDBInfo()).getAdmin()); + Assert.assertEquals("TestUser", + ((VolumeObjectDBInfo) volResponseObj. + getObjectDBInfo()).getOwner()); + Assert.assertEquals("vol", + volResponseObj.getObjectDBInfo().getName()); + Assert.assertEquals(2097152, + volResponseObj.getObjectDBInfo().getQuotaInBytes()); + Assert.assertEquals(-1, + volResponseObj.getObjectDBInfo().getQuotaInNamespace()); + } + + public void testNSSummaryBasicInfoBucketOne(BucketLayout bucketLayout, + NSSummaryEndpoint nsSummaryEndpoint) throws Exception { + Response bucketOneResponse = + nsSummaryEndpoint.getBasicInfo(BUCKET_ONE_PATH); + NamespaceSummaryResponse bucketOneObj = + (NamespaceSummaryResponse) bucketOneResponse.getEntity(); + Assert.assertEquals(EntityType.BUCKET, bucketOneObj.getEntityType()); + Assert.assertEquals(4, bucketOneObj.getCountStats().getNumTotalDir()); + Assert.assertEquals(4, bucketOneObj.getCountStats().getNumTotalKey()); + Assert.assertEquals("vol", + ((BucketObjectDBInfo) bucketOneObj.getObjectDBInfo()).getVolumeName()); + Assert.assertEquals(StorageType.DISK, + ((BucketObjectDBInfo) + bucketOneObj.getObjectDBInfo()).getStorageType()); + Assert.assertEquals(bucketLayout, + ((BucketObjectDBInfo) + bucketOneObj.getObjectDBInfo()).getBucketLayout()); + Assert.assertEquals("bucket1", + ((BucketObjectDBInfo) bucketOneObj.getObjectDBInfo()).getName()); + } + + public void testNSSummaryBasicInfoBucketTwo( + BucketLayout bucketLayout, + NSSummaryEndpoint nsSummaryEndpoint) throws Exception { + Response bucketTwoResponse = + nsSummaryEndpoint.getBasicInfo(BUCKET_TWO_PATH); + NamespaceSummaryResponse bucketTwoObj = + (NamespaceSummaryResponse) bucketTwoResponse.getEntity(); + Assert.assertEquals(EntityType.BUCKET, bucketTwoObj.getEntityType()); + Assert.assertEquals(0, bucketTwoObj.getCountStats().getNumTotalDir()); + Assert.assertEquals(2, bucketTwoObj.getCountStats().getNumTotalKey()); + Assert.assertEquals("vol", + ((BucketObjectDBInfo) bucketTwoObj.getObjectDBInfo()).getVolumeName()); + Assert.assertEquals(StorageType.DISK, + ((BucketObjectDBInfo) + bucketTwoObj.getObjectDBInfo()).getStorageType()); + Assert.assertEquals(bucketLayout, + ((BucketObjectDBInfo) + bucketTwoObj.getObjectDBInfo()).getBucketLayout()); + Assert.assertEquals("bucket2", + ((BucketObjectDBInfo) bucketTwoObj.getObjectDBInfo()).getName()); + } + + public void testNSSummaryBasicInfoDir( + NSSummaryEndpoint nsSummaryEndpoint) throws Exception { + Response dirOneResponse = nsSummaryEndpoint.getBasicInfo(DIR_ONE_PATH); + NamespaceSummaryResponse dirOneObj = + (NamespaceSummaryResponse) dirOneResponse.getEntity(); + Assert.assertEquals(EntityType.DIRECTORY, dirOneObj.getEntityType()); + Assert.assertEquals(3, + dirOneObj.getCountStats().getNumTotalDir()); + Assert.assertEquals(3, + dirOneObj.getCountStats().getNumTotalKey()); + Assert.assertEquals("dir1", + dirOneObj.getObjectDBInfo().getName()); + Assert.assertEquals(0, + dirOneObj.getObjectDBInfo().getMetadata().size()); + Assert.assertEquals(0, + dirOneObj.getObjectDBInfo().getQuotaInBytes()); + Assert.assertEquals(0, + dirOneObj.getObjectDBInfo().getQuotaInNamespace()); + Assert.assertEquals(0, + dirOneObj.getObjectDBInfo().getUsedNamespace()); + } + + public void testNSSummaryBasicInfoNoPath( + NSSummaryEndpoint nsSummaryEndpoint) throws Exception { + Response invalidResponse = nsSummaryEndpoint + .getBasicInfo(INVALID_PATH); + NamespaceSummaryResponse invalidObj = + (NamespaceSummaryResponse) invalidResponse.getEntity(); + Assert.assertEquals(ResponseStatus.PATH_NOT_FOUND, + invalidObj.getStatus()); + Assert.assertEquals(null, invalidObj.getCountStats()); + Assert.assertEquals(null, invalidObj.getObjectDBInfo()); + } + + public void testNSSummaryBasicInfoKey( + NSSummaryEndpoint nsSummaryEndpoint) throws Exception { + Response keyResponse = nsSummaryEndpoint.getBasicInfo(KEY_PATH); + NamespaceSummaryResponse keyResObj = + (NamespaceSummaryResponse) keyResponse.getEntity(); + Assert.assertEquals(EntityType.KEY, keyResObj.getEntityType()); + Assert.assertEquals("vol", + ((KeyObjectDBInfo) keyResObj.getObjectDBInfo()).getVolumeName()); + Assert.assertEquals("bucket2", + ((KeyObjectDBInfo) keyResObj.getObjectDBInfo()).getBucketName()); + Assert.assertEquals("file4", + ((KeyObjectDBInfo) keyResObj.getObjectDBInfo()).getKeyName()); + Assert.assertEquals(2049, + ((KeyObjectDBInfo) keyResObj.getObjectDBInfo()).getDataSize()); + Assert.assertEquals(HddsProtos.ReplicationType.STAND_ALONE, + ((KeyObjectDBInfo) keyResObj.getObjectDBInfo()). + getReplicationConfig().getReplicationType()); + } +} diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java index d60bbe980162..51e6f4a2052d 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java @@ -30,6 +30,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; @@ -343,7 +344,8 @@ private ContainerInfo getMockDeletedContainer(int containerID) { * of a datanode via setMisRepWhenDnPresent. If a DN with that UUID is passed * to validateContainerPlacement, then it will return an invalid placement. */ - private static class MockPlacementPolicy implements PlacementPolicy { + private static class MockPlacementPolicy implements + PlacementPolicy { private UUID misRepWhenDnPresent = null; @@ -370,6 +372,19 @@ public ContainerPlacementStatus validateContainerPlacement( } } + @Override + public Set replicasToCopyToFixMisreplication( + Map replicas) { + return Collections.emptySet(); + } + + + @Override + public Set replicasToRemoveToFixOverreplication( + Set replicas, int expectedCountPerUniqueReplica) { + return null; + } + private boolean isDnPresent(List dns) { for (DatanodeDetails dn : dns) { if (misRepWhenDnPresent != null diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTaskRecordGenerator.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTaskRecordGenerator.java index c88c2440f955..4e86ca905672 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTaskRecordGenerator.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTaskRecordGenerator.java @@ -76,20 +76,25 @@ public void testMissingRecordRetained() { new ContainerHealthStatus(container, replicas, placementPolicy); // Missing record should be retained assertTrue(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, missingRecord())); + .retainOrUpdateRecord(status, missingRecord() + )); // Under / Over / Mis replicated should not be retained as if a container is // missing then it is not in any other category. assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, underReplicatedRecord())); + .retainOrUpdateRecord(status, underReplicatedRecord() + )); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, overReplicatedRecord())); + .retainOrUpdateRecord(status, overReplicatedRecord() + )); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, misReplicatedRecord())); + .retainOrUpdateRecord(status, misReplicatedRecord() + )); replicas = generateReplicas(container, CLOSED, CLOSED, CLOSED); status = new ContainerHealthStatus(container, replicas, placementPolicy); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, missingRecord())); + .retainOrUpdateRecord(status, missingRecord() + )); } @Test @@ -109,11 +114,14 @@ public void testUnderReplicatedRecordRetainedAndUpdated() { // Missing / Over / Mis replicated should not be retained assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, missingRecord())); + .retainOrUpdateRecord(status, missingRecord() + )); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, overReplicatedRecord())); + .retainOrUpdateRecord(status, overReplicatedRecord() + )); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, misReplicatedRecord())); + .retainOrUpdateRecord(status, misReplicatedRecord() + )); // Container is now replicated OK - should be removed. replicas = generateReplicas(container, CLOSED, CLOSED, CLOSED); @@ -139,11 +147,14 @@ public void testOverReplicatedRecordRetainedAndUpdated() { // Missing / Over / Mis replicated should not be retained assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, missingRecord())); + .retainOrUpdateRecord(status, missingRecord() + )); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, underReplicatedRecord())); + .retainOrUpdateRecord(status, underReplicatedRecord() + )); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, misReplicatedRecord())); + .retainOrUpdateRecord(status, misReplicatedRecord() + )); // Container is now replicated OK - should be removed. replicas = generateReplicas(container, CLOSED, CLOSED, CLOSED); @@ -173,11 +184,14 @@ public void testMisReplicatedRecordRetainedAndUpdated() { // Missing / Over / Mis replicated should not be retained assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, missingRecord())); + .retainOrUpdateRecord(status, missingRecord() + )); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, underReplicatedRecord())); + .retainOrUpdateRecord(status, underReplicatedRecord() + )); assertFalse(ContainerHealthTask.ContainerHealthRecords - .retainOrUpdateRecord(status, overReplicatedRecord())); + .retainOrUpdateRecord(status, overReplicatedRecord() + )); // Container is now placed OK - should be removed. when(placementPolicy.validateContainerPlacement( diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/AbstractReconContainerManagerTest.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/AbstractReconContainerManagerTest.java index 1708dea00b35..5baec5c669e2 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/AbstractReconContainerManagerTest.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/AbstractReconContainerManagerTest.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; +import java.time.Clock; import java.time.ZoneId; import java.util.LinkedList; import java.util.List; @@ -48,7 +49,6 @@ import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; import org.apache.hadoop.hdds.utils.db.Table; -import org.apache.hadoop.ozone.common.MonotonicClock; import org.apache.hadoop.ozone.recon.ReconUtils; import org.apache.hadoop.ozone.recon.persistence.ContainerHealthSchemaManager; import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; @@ -112,7 +112,7 @@ public void setUp(@TempDir File tempDir) throws Exception { scmhaManager, scmContext); ContainerReplicaPendingOps pendingOps = new ContainerReplicaPendingOps( - conf, new MonotonicClock(ZoneId.systemDefault())); + conf, Clock.system(ZoneId.systemDefault())); containerManager = new ReconContainerManager( conf, diff --git a/hadoop-ozone/s3gateway/pom.xml b/hadoop-ozone/s3gateway/pom.xml index 20a0b7c722ea..40021f42fb02 100644 --- a/hadoop-ozone/s3gateway/pom.xml +++ b/hadoop-ozone/s3gateway/pom.xml @@ -19,12 +19,12 @@ org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-s3gateway Apache Ozone S3 Gateway jar - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT UTF-8 true @@ -39,7 +39,6 @@ org.javassist javassist - 3.21.0-GA org.apache.ozone diff --git a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/ClientProtocolStub.java b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/ClientProtocolStub.java index 7c1daeca0b30..e8da93b366de 100644 --- a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/ClientProtocolStub.java +++ b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/ClientProtocolStub.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.io.Text; import org.apache.hadoop.ozone.OzoneAcl; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.client.protocol.ClientProtocol; @@ -583,4 +584,27 @@ Map> getKeysEveryReplicas( String volumeName, String bucketName, String keyName) throws IOException { return null; } + + @Override + public OzoneDataStreamOutput createStreamKey( + String volumeName, String bucketName, String keyName, long size, + ReplicationConfig replicationConfig, Map metadata) + throws IOException { + return null; + } + + @Override + public OzoneDataStreamOutput createMultipartStreamKey( + String volumeName, String bucketName, String keyName, long size, + int partNumber, String uploadID) throws IOException { + return null; + } + + @Override + public OzoneDataStreamOutput createStreamFile( + String volumeName, String bucketName, String keyName, long size, + ReplicationConfig replicationConf, boolean overWrite, boolean recursive) + throws IOException { + return null; + } } diff --git a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneBucketStub.java b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneBucketStub.java index f8a48c470062..a150e50ebb58 100644 --- a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneBucketStub.java +++ b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneBucketStub.java @@ -118,7 +118,7 @@ public void close() throws IOException { super.close(); } }; - return new OzoneOutputStream(byteArrayOutputStream); + return new OzoneOutputStream(byteArrayOutputStream, null); } @Override @@ -150,7 +150,7 @@ public void close() throws IOException { super.close(); } }; - return new OzoneOutputStream(byteArrayOutputStream); + return new OzoneOutputStream(byteArrayOutputStream, null); } @Override diff --git a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneOutputStreamStub.java b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneOutputStreamStub.java index 83cb90799f1c..326d388b88e9 100644 --- a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneOutputStreamStub.java +++ b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneOutputStreamStub.java @@ -41,7 +41,7 @@ public class OzoneOutputStreamStub extends OzoneOutputStream { * @param name - partName */ public OzoneOutputStreamStub(OutputStream outputStream, String name) { - super(outputStream); + super(outputStream, null); this.partName = name; } diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index a7a6bd8093ca..5bbde746b067 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT ozone-tools - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Tools Apache Ozone Tools jar diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/nssummary/NSSummaryAdmin.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/nssummary/NSSummaryAdmin.java index 727be27670a9..14f38d71e828 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/nssummary/NSSummaryAdmin.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/nssummary/NSSummaryAdmin.java @@ -87,7 +87,8 @@ public Class getParentType() { } public boolean isFileSystemOptimizedBucket(String path) throws IOException { - OFSPath ofsPath = new OFSPath(path); + OFSPath ofsPath = new OFSPath(path, + OzoneConfiguration.of(getOzoneConfig())); OzoneClient ozoneClient = OzoneClientFactory.getRpcClient(getOzoneConfig()); ObjectStore objectStore = ozoneClient.getObjectStore(); @@ -111,7 +112,8 @@ public boolean isFileSystemOptimizedBucket(String path) throws IOException { } public boolean isObjectStoreBucket(String path) throws IOException { - OFSPath ofsPath = new OFSPath(path); + OFSPath ofsPath = new OFSPath(path, + OzoneConfiguration.of(getOzoneConfig())); boolean enableFileSystemPaths = getOzoneConfig() .getBoolean(OMConfigKeys.OZONE_OM_ENABLE_FILESYSTEM_PATHS, @@ -147,7 +149,8 @@ public boolean isObjectStoreBucket(String path) throws IOException { * @throws IOException */ public boolean bucketIsPresentInThePath(String path) throws IOException { - OFSPath ofsPath = new OFSPath(path); + OFSPath ofsPath = new OFSPath(path, + OzoneConfiguration.of(getOzoneConfig())); OzoneClient ozoneClient = OzoneClientFactory.getRpcClient(getOzoneConfig()); ObjectStore objectStore = ozoneClient.getObjectStore(); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java index ef1a094599f5..a45648ce2b5a 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hdds.utils.db.DBColumnFamilyDefinition; import org.apache.hadoop.hdds.utils.db.DBDefinition; import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils; +import org.apache.hadoop.hdds.utils.db.RocksDatabase; import org.apache.hadoop.hdds.utils.db.managed.ManagedReadOptions; import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksIterator; @@ -37,6 +38,9 @@ import org.kohsuke.MetaInfServices; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import picocli.CommandLine; import java.io.FileOutputStream; @@ -63,6 +67,9 @@ @MetaInfServices(SubcommandWithParent.class) public class DBScanner implements Callable, SubcommandWithParent { + public static final Logger LOG = + LoggerFactory.getLogger(DBScanner.class); + @CommandLine.Option(names = {"--column_family"}, required = true, description = "Table name") @@ -97,6 +104,14 @@ public class DBScanner implements Callable, SubcommandWithParent { defaultValue = "-1") private static long containerId; + @CommandLine.Option(names = { "--show-count", + "-count" }, description = "Get estimated key count for a" + + " given column family in the db", + defaultValue = "false", + showDefaultValue = CommandLine.Help.Visibility.ALWAYS) + private static boolean showCount; + + @CommandLine.ParentCommand private RDBParser parent; @@ -222,6 +237,10 @@ public static void setWithKey(boolean withKey) { DBScanner.withKey = withKey; } + public static void setShowCount(boolean showCount) { + DBScanner.showCount = showCount; + } + private static ColumnFamilyHandle getColumnFamilyHandle( byte[] name, List columnFamilyHandles) { return columnFamilyHandles @@ -247,8 +266,7 @@ private void constructColumnFamilyMap(DBDefinition dbDefinition) { DBColumnFamilyDefinition[] columnFamilyDefinitions = dbDefinition .getColumnFamilies(); for (DBColumnFamilyDefinition definition:columnFamilyDefinitions) { - System.out.println("Added definition for table:" + - definition.getTableName()); + LOG.info("Added definition for table: {}", definition.getTableName()); this.columnFamilyMap.put(definition.getTableName(), definition); } } @@ -269,7 +287,8 @@ public Void call() throws Exception { private void printAppropriateTable( List columnFamilyHandleList, - ManagedRocksDB rocksDB, String dbPath) throws IOException { + ManagedRocksDB rocksDB, String dbPath) + throws IOException, RocksDBException { if (limit < 1 && limit != -1) { throw new IllegalArgumentException( "List length should be a positive number. Only allowed negative" + @@ -292,6 +311,12 @@ private void printAppropriateTable( if (columnFamilyHandle == null) { throw new IllegalArgumentException("columnFamilyHandle is null"); } + if (showCount) { + long keyCount = rocksDB.get().getLongProperty(columnFamilyHandle, + RocksDatabase.ESTIMATE_NUM_KEYS); + System.out.println(keyCount); + return; + } ManagedRocksIterator iterator; if (containerId > 0 && dnDBSchemaVersion != null && dnDBSchemaVersion.equals("V3")) { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ExportSubcommand.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ExportSubcommand.java index 0a00959f7ec3..e2f0a255690b 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ExportSubcommand.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ExportSubcommand.java @@ -31,6 +31,7 @@ import java.io.FileOutputStream; import java.util.concurrent.Callable; +import static org.apache.commons.compress.compressors.CompressorStreamFactory.GZIP; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_NOT_FOUND; /** @@ -61,6 +62,7 @@ public class ExportSubcommand implements Callable { description = "Count of containers to export") private long containerCount = 1; + @Override public Void call() throws Exception { parent.loadContainersFromVolumes(); @@ -74,7 +76,7 @@ public Void call() throws Exception { new File(destination, "container-" + containerId + ".tar.gz"); try (FileOutputStream fos = new FileOutputStream(destinationFile)) { try { - replicationSource.copyData(containerId, fos); + replicationSource.copyData(containerId, fos, GZIP); } catch (StorageContainerException e) { if (e.getResult() == CONTAINER_NOT_FOUND) { continue; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/ClosedContainerReplicator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/ClosedContainerReplicator.java index a7d332e7839a..1c0ba2a80c49 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/ClosedContainerReplicator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/ClosedContainerReplicator.java @@ -37,9 +37,11 @@ import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; import org.apache.hadoop.ozone.container.replication.ContainerReplicator; import org.apache.hadoop.ozone.container.replication.DownloadAndImportReplicator; +import org.apache.hadoop.ozone.container.replication.ReplicationServer; import org.apache.hadoop.ozone.container.replication.ReplicationSupervisor; import org.apache.hadoop.ozone.container.replication.ReplicationTask; import org.apache.hadoop.ozone.container.replication.SimpleContainerDownloader; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; import org.jetbrains.annotations.NotNull; import picocli.CommandLine.Command; import picocli.CommandLine.Option; @@ -48,6 +50,8 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.time.Clock; +import java.time.ZoneId; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -123,8 +127,9 @@ public Void call() throws Exception { //if datanode is specified, replicate only container if it has a //replica. if (datanode.isEmpty() || datanodeUUIDs.contains(datanode)) { - replicationTasks.add(new ReplicationTask(container.getContainerID(), - datanodesWithContainer)); + replicationTasks.add(new ReplicationTask( + new ReplicateContainerCommand(container.getContainerID(), + datanodesWithContainer))); } } @@ -198,12 +203,15 @@ private void initializeReplicationSupervisor(ConfigurationSource conf) new ContainerController(containerSet, handlers); ContainerReplicator replicator = - new DownloadAndImportReplicator(containerSet, + new DownloadAndImportReplicator(conf, containerSet, controller, new SimpleContainerDownloader(conf, null), - new TarContainerPacker()); + new TarContainerPacker(), null); - supervisor = new ReplicationSupervisor(containerSet, replicator, 10); + ReplicationServer.ReplicationConfig replicationConfig + = conf.getObject(ReplicationServer.ReplicationConfig.class); + supervisor = new ReplicationSupervisor(containerSet, null, + replicator, replicationConfig, Clock.system(ZoneId.systemDefault())); } private void replicateContainer(long counter) throws Exception { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/ContentGenerator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/ContentGenerator.java index 92f7ae4b2ecd..b01c12f6b354 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/ContentGenerator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/ContentGenerator.java @@ -18,10 +18,12 @@ import java.io.IOException; import java.io.OutputStream; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; /** * Utility class to write random keys from a limited buffer. @@ -81,6 +83,22 @@ public void write(OutputStream outputStream) throws IOException { } } + /** + * Write the required bytes to the streaming output stream. + */ + public void write(OzoneDataStreamOutput out) throws IOException { + for (long nrRemaining = keySize; + nrRemaining > 0; nrRemaining -= bufferSize) { + int curSize = (int) Math.min(bufferSize, nrRemaining); + for (int i = 0; i < curSize; i += copyBufferSize) { + ByteBuffer bb = + ByteBuffer.wrap(buffer, i, Math.min(copyBufferSize, curSize - i)); + out.write(bb); + } + } + out.close(); + } + @VisibleForTesting byte[] getBuffer() { return buffer; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/OzoneClientKeyGenerator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/OzoneClientKeyGenerator.java index 3e07f3b22e48..b119e27ea6c1 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/OzoneClientKeyGenerator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/OzoneClientKeyGenerator.java @@ -24,10 +24,13 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; import com.codahale.metrics.Timer; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import picocli.CommandLine.Command; import picocli.CommandLine.Mixin; import picocli.CommandLine.Option; @@ -74,6 +77,12 @@ public class OzoneClientKeyGenerator extends BaseFreonGenerator @Mixin private FreonReplicationOptions replication; + @Option( + names = {"--enable-streaming", "--stream"}, + description = "Specify whether the write will be through ratis streaming" + ) + private boolean enableRatisStreaming = false; + private Timer timer; private OzoneBucket bucket; @@ -101,7 +110,11 @@ public Void call() throws Exception { timer = getMetrics().timer("key-create"); - runTests(this::createKey); + if (enableRatisStreaming) { + runTests(this::createStreamKey); + } else { + runTests(this::createKey); + } } return null; } @@ -118,4 +131,18 @@ private void createKey(long counter) throws Exception { return null; }); } + + private void createStreamKey(long counter) throws Exception { + final ReplicationConfig conf = ReplicationConfig.fromProtoTypeAndFactor( + ReplicationType.RATIS, ReplicationFactor.THREE); + final String key = generateObjectName(counter); + + timer.time(() -> { + try (OzoneDataStreamOutput stream = bucket.createStreamKey( + key, keySize, conf, metadata)) { + contentGenerator.write(stream); + } + return null; + }); + } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/Shell.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/Shell.java index ef7594333262..97e160651bbc 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/Shell.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/Shell.java @@ -29,11 +29,22 @@ */ public abstract class Shell extends GenericCli { - public static final String OZONE_URI_DESCRIPTION = "Ozone URI could start " - + "with o3:// or without prefix. URI may contain the host/serviceId " - + "and port of the OM server. Both are optional. " - + "If they are not specified it will be identified from " - + "the config files."; + public static final String OZONE_URI_DESCRIPTION = + "Ozone URI could either be a full URI or short URI.\n" + + "Full URI should start with o3://, in case of non-HA\nclusters it " + + "should be followed by the host name and\noptionally the port " + + "number. In case of HA clusters\nthe service id should be used. " + + "Service id provides a\nlogical name for multiple hosts and it is " + + "defined\nin the property ozone.om.service.ids.\n" + + "Example of a full URI with host name and port number\nfor a key:" + + "\no3://omhostname:9862/vol1/bucket1/key1\n" + + "With a service id for a volume:" + + "\no3://omserviceid/vol1/\n" + + "Short URI should start from the volume." + + "\nExample of a short URI for a bucket:" + + "\nvol1/bucket1\n" + + "Any unspecified information will be identified from\n" + + "the config files.\n"; public Shell() { } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/bucket/ClearQuotaHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/bucket/ClearQuotaHandler.java index 160475e19340..103f13841db9 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/bucket/ClearQuotaHandler.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/bucket/ClearQuotaHandler.java @@ -31,7 +31,8 @@ * clean quota of the bucket. */ @Command(name = "clrquota", - description = "clear quota of the bucket") + description = "clear quota of the bucket. At least one of the " + + "quota clear flag is mandatory.") public class ClearQuotaHandler extends BucketHandler { @CommandLine.Mixin @@ -44,12 +45,19 @@ protected void execute(OzoneClient client, OzoneAddress address) String bucketName = address.getBucketName(); OzoneBucket bucket = client.getObjectStore().getVolume(volumeName) .getBucket(bucketName); - + boolean isOptionPresent = false; if (clrSpaceQuota.getClrSpaceQuota()) { bucket.clearSpaceQuota(); + isOptionPresent = true; } if (clrSpaceQuota.getClrNamespaceQuota()) { bucket.clearNamespaceQuota(); + isOptionPresent = true; + } + + if (!isOptionPresent) { + throw new IOException( + "At least one of the quota clear flag is required."); } } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/bucket/SetQuotaHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/bucket/SetQuotaHandler.java index 7df37a327276..68d1bee1784d 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/bucket/SetQuotaHandler.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/bucket/SetQuotaHandler.java @@ -34,7 +34,8 @@ * set quota of the bucket. */ @Command(name = "setquota", - description = "Set quota of the buckets") + description = "Set quota of the buckets. At least one of the " + + "quota set flag is mandatory.") public class SetQuotaHandler extends BucketHandler { @CommandLine.Mixin @@ -50,15 +51,22 @@ public void execute(OzoneClient client, OzoneAddress address) .getBucket(bucketName); long spaceQuota = bucket.getQuotaInBytes(); long namespaceQuota = bucket.getQuotaInNamespace(); - + boolean isOptionPresent = false; if (!Strings.isNullOrEmpty(quotaOptions.getQuotaInBytes())) { spaceQuota = OzoneQuota.parseSpaceQuota( quotaOptions.getQuotaInBytes()).getQuotaInBytes(); + isOptionPresent = true; } if (!Strings.isNullOrEmpty(quotaOptions.getQuotaInNamespace())) { namespaceQuota = OzoneQuota.parseNameSpaceQuota( quotaOptions.getQuotaInNamespace()).getQuotaInNamespace(); + isOptionPresent = true; + } + + if (!isOptionPresent) { + throw new IOException( + "At least one of the quota set flag is required."); } if (bucket.getQuotaInNamespace() == OLD_QUOTA_DEFAULT || diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/GetKeyHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/GetKeyHandler.java index 5df236e3304d..501a64238f0d 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/GetKeyHandler.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/GetKeyHandler.java @@ -90,8 +90,8 @@ protected void execute(OzoneClient client, OzoneAddress address) if (isVerbose() && !"/dev/null".equals(dataFile.getAbsolutePath())) { try (InputStream stream = new FileInputStream(dataFile)) { - String hash = DigestUtils.md5Hex(stream); - out().printf("Downloaded file hash : %s%n", hash); + String hash = DigestUtils.sha256Hex(stream); + out().printf("Downloaded file sha256 checksum : %s%n", hash); } } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/PutKeyHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/PutKeyHandler.java index 7d7885d168d9..68beb6922804 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/PutKeyHandler.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/PutKeyHandler.java @@ -23,10 +23,15 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; import java.util.HashMap; import java.util.Map; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ozone.OzoneConsts; @@ -34,6 +39,7 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.client.io.OzoneDataStreamOutput; import org.apache.hadoop.ozone.shell.OzoneAddress; import org.apache.commons.codec.digest.DigestUtils; @@ -43,6 +49,7 @@ import org.apache.hadoop.ozone.shell.ShellReplicationOptions; import picocli.CommandLine.Command; import picocli.CommandLine.Mixin; +import picocli.CommandLine.Option; import picocli.CommandLine.Parameters; /** @@ -55,6 +62,9 @@ public class PutKeyHandler extends KeyHandler { @Parameters(index = "1", arity = "1..1", description = "File to upload") private String fileName; + @Option(names = "--stream") + private boolean stream; + @Mixin private ShellReplicationOptions replication; @@ -70,8 +80,8 @@ protected void execute(OzoneClient client, OzoneAddress address) if (isVerbose()) { try (InputStream stream = new FileInputStream(dataFile)) { - String hash = DigestUtils.md5Hex(stream); - out().printf("File Hash : %s%n", hash); + String hash = DigestUtils.sha256Hex(stream); + out().printf("File sha256 checksum : %s%n", hash); } } @@ -89,11 +99,60 @@ protected void execute(OzoneClient client, OzoneAddress address) int chunkSize = (int) getConf().getStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, OZONE_SCM_CHUNK_SIZE_DEFAULT, StorageUnit.BYTES); + + if (stream) { + stream(dataFile, bucket, keyName, keyMetadata, + replicationConfig, chunkSize); + } else { + async(dataFile, bucket, keyName, keyMetadata, + replicationConfig, chunkSize); + } + } + + void async( + File dataFile, OzoneBucket bucket, + String keyName, Map keyMetadata, + ReplicationConfig replicationConfig, int chunkSize) + throws IOException { + if (isVerbose()) { + out().println("API: async"); + } try (InputStream input = new FileInputStream(dataFile); - OutputStream output = bucket.createKey(keyName, dataFile.length(), - replicationConfig, keyMetadata)) { + OutputStream output = bucket.createKey(keyName, dataFile.length(), + replicationConfig, keyMetadata)) { IOUtils.copyBytes(input, output, chunkSize); } } + void stream( + File dataFile, OzoneBucket bucket, + String keyName, Map keyMetadata, + ReplicationConfig replicationConfig, int chunkSize) + throws IOException { + if (isVerbose()) { + out().println("API: streaming"); + } + // In streaming mode, always resolve replication config at client side, + // because streaming is not compatible for writing EC keys. + replicationConfig = ReplicationConfig.resolve(replicationConfig, + bucket.getReplicationConfig(), getConf()); + Preconditions.checkArgument( + !(replicationConfig instanceof ECReplicationConfig), + "Can not put EC key by streaming"); + + try (RandomAccessFile raf = new RandomAccessFile(dataFile, "r"); + OzoneDataStreamOutput out = bucket.createStreamKey(keyName, + dataFile.length(), replicationConfig, keyMetadata)) { + FileChannel ch = raf.getChannel(); + long len = raf.length(); + long off = 0; + while (len > 0) { + long writeLen = Math.min(len, chunkSize); + ByteBuffer bb = ch.map(FileChannel.MapMode.READ_ONLY, off, writeLen); + out.write(bb); + off += writeLen; + len -= writeLen; + } + } + } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/ClearQuotaHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/ClearQuotaHandler.java index fc5dc9615023..b36e77e90867 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/ClearQuotaHandler.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/ClearQuotaHandler.java @@ -31,7 +31,8 @@ * clear quota of the volume. */ @Command(name = "clrquota", - description = "clear quota of the volume") + description = "clear quota of the volume. At least one of the " + + "quota clear flag is mandatory.") public class ClearQuotaHandler extends VolumeHandler { @CommandLine.Mixin @@ -42,12 +43,19 @@ protected void execute(OzoneClient client, OzoneAddress address) throws IOException { String volumeName = address.getVolumeName(); OzoneVolume volume = client.getObjectStore().getVolume(volumeName); - + boolean isOptionPresent = false; if (clrSpaceQuota.getClrSpaceQuota()) { volume.clearSpaceQuota(); + isOptionPresent = true; } if (clrSpaceQuota.getClrNamespaceQuota()) { volume.clearNamespaceQuota(); + isOptionPresent = true; + } + + if (!isOptionPresent) { + throw new IOException( + "At least one of the quota clear flag is required."); } } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/SetQuotaHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/SetQuotaHandler.java index 018a213ecb53..40d4b9f19324 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/SetQuotaHandler.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/SetQuotaHandler.java @@ -35,7 +35,8 @@ * Executes set volume quota calls. */ @Command(name = "setquota", - description = "Set quota of the volumes") + description = "Set quota of the volumes. At least one of the " + + "quota set flag is mandatory.") public class SetQuotaHandler extends VolumeHandler { @CommandLine.Mixin @@ -49,14 +50,22 @@ protected void execute(OzoneClient client, OzoneAddress address) long spaceQuota = volume.getQuotaInBytes(); long namespaceQuota = volume.getQuotaInNamespace(); + boolean isOptionPresent = false; if (!Strings.isNullOrEmpty(quotaOptions.getQuotaInBytes())) { spaceQuota = OzoneQuota.parseSpaceQuota( quotaOptions.getQuotaInBytes()).getQuotaInBytes(); + isOptionPresent = true; } if (!Strings.isNullOrEmpty(quotaOptions.getQuotaInNamespace())) { namespaceQuota = OzoneQuota.parseNameSpaceQuota( quotaOptions.getQuotaInNamespace()).getQuotaInNamespace(); + isOptionPresent = true; + } + + if (!isOptionPresent) { + throw new IOException( + "At least one of the quota set flag is required."); } if (volume.getQuotaInNamespace() == OLD_QUOTA_DEFAULT) { diff --git a/pom.xml b/pom.xml index fa13d556e271..35e5bf4f5d31 100644 --- a/pom.xml +++ b/pom.xml @@ -18,7 +18,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 4.0.0 org.apache.ozone ozone-main - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT Apache Ozone Main Apache Ozone Main pom @@ -62,17 +62,18 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs + 2.7.3 3.3.4 ${ozone.version} - 1.3.0-SNAPSHOT - Grand Canyon + 1.4.0-SNAPSHOT + Hot Springs ${hdds.version} ${ozone.version} - 2.4.1 + 2.4.2-8b8bdda-SNAPSHOT 1.0.3 @@ -112,13 +113,14 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 3.2.2 1.21 2.1.1 + 1.5.2-5 1.0.13 2.11.0 3.7 1.2 1.1 3.1.1 - 3.6 + 3.9.0 2.6.0 1.4 1.6 @@ -128,7 +130,26 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs file:///dev/urandom + 1.2 + 3.12.2 + 5.0.4 + 0.8.0.RELEASE 1.67 + 3.2.0 + 10.14.2.0 + 3.0.2 + 3.2.4 + 0.8.5 + 3.21.0-GA + 1.1.1 + 2.3.0 + 2.3.0.1 + 0.1.54 + 1.2 + 3.1.0 + 3.1 + 2.1 + 1.1.1 1.19 @@ -136,6 +157,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 2.13.4.20221013 + 5.4.0 1.6.0 @@ -153,7 +175,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 3.4.2 1.2.22 + 1.0.1 + 1.6.21 + 1.8 + 4.6.1 0.7.0 + 0.9.11 1.1 @@ -168,16 +195,21 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 3.1.12 2.1.7 + 0.19 + 2.2.0 31.1-jre 4.0 2.9.0 + 1.0 2.7.5 1.10.19 2.28.2 1.3 1.6.5 2.0.4 + 1.24 + 4.13.1 5.8.2 1.8.2 @@ -186,9 +218,15 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 3.19.6 1.5.0.Final - 4.1.79.Final + + + 4.1.77.Final 1.48.1 + 7.7.3 + 3.25.2 + 2.4.7.Final + 1.8 @@ -218,12 +256,14 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 1.5 3.0.0-M1 3.0.1 - 2.4 + 3.4.2 0.12 2.8.1 1.9 3.0.2 + 0.29.0 1.3.1 + 1.10 1.0-beta-1 1.0-alpha-8 3.1.2 @@ -257,7 +297,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs info.picocli picocli - 4.6.1 + ${picocli.version} + + + org.apache.derby + derby + ${derby.version} org.apache.hadoop @@ -718,30 +763,35 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs commons-validator ${commons-validator.version} + + com.github.luben + zstd-jni + ${zstd-jni.version} + javax.activation activation - 1.1.1 + ${javax-activation.version} javax.annotation javax.annotation-api - 1.2 + ${annotation-api.version} javax.enterprise cdi-api - 1.2 + ${cdi-api.version} javax.servlet javax.servlet-api - 3.1.0 + ${servlet-api.version} javax.ws.rs jsr311-api - 1.1.1 + ${jsr311-api.version} org.eclipse.jetty @@ -777,12 +827,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs javax.servlet.jsp jsp-api - 2.1 + ${jsp-api.version} org.glassfish javax.servlet - 3.1 + ${glassfish-servlet.version} org.glassfish.hk2 @@ -854,7 +904,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.ow2.asm asm - 5.0.4 + ${asm.version} com.sun.jersey @@ -886,7 +936,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs com.google.errorprone error_prone_annotations - 2.2.0 + ${errorprone-annotations.version} true @@ -913,13 +963,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs com.jolbox bonecp - 0.8.0.RELEASE + ${bonecp.version} cglib cglib - 3.2.0 + ${cglib.version} @@ -1039,6 +1089,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs log4j-core ${log4j2.version} + + com.codahale.metrics + metrics-core + ${codahale-metrics.version} + test + com.lmax disruptor @@ -1057,7 +1113,18 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs junit junit - 4.13.1 + ${junit4.version} + + + org.jacoco + org.jacoco.core + provided + ${jacoco.version} + + + org.javassist + javassist + ${javassist.version} org.junit.jupiter @@ -1144,7 +1211,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs com.fasterxml.woodstox woodstox-core - 5.4.0 + ${woodstox.version} com.fasterxml.jackson @@ -1168,7 +1235,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.jmockit jmockit - 1.24 + ${jmockit.version} + test + + + org.mockito + mockito-all + ${mockito1-powermock.version} test @@ -1186,13 +1259,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs com.google.testing.compile compile-testing - 0.19 + ${compile-testing.version} test org.objenesis objenesis - 1.0 + ${objenesis.version} com.google.re2j @@ -1217,19 +1290,19 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs com.jcraft jsch - 0.1.54 + ${jsch.version} org.kohsuke.metainf-services metainf-services - 1.8 + ${metainf-services.version} true io.dropwizard.metrics metrics-core - 3.2.4 + ${dropwizard-metrics.version} io.jaegertracing @@ -1249,7 +1322,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.jetbrains.kotlin kotlin-stdlib - 1.6.21 + ${kotlin.version} io.opentracing @@ -1290,12 +1363,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs javax.xml.bind jaxb-api - 2.3.0 + ${jaxb-api.version} org.glassfish.jaxb jaxb-runtime - 2.3.0.1 + ${jaxb-runtime.version} com.sun.jersey @@ -1319,7 +1392,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.apache.kerby kerb-simplekdc - 1.0.1 + ${kerby.version} org.yaml @@ -1329,13 +1402,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.assertj assertj-core - 3.12.2 + ${assertj.version} test org.jboss.weld.servlet weld-servlet - 2.4.7.Final + ${weld-servlet.version} org.powermock @@ -1355,15 +1428,20 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs ${powermock2.version} test + + org.reflections + reflections + ${reflections.version} + org.rocksdb rocksdbjni - 7.7.3 + ${rocksdb.version} org.xerial sqlite-jdbc - 3.25.2 + ${sqlite.version} @@ -1411,7 +1489,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.codehaus.mojo license-maven-plugin - 1.10 + ${license-maven-plugin.version} false ${project.basedir} @@ -1616,12 +1694,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs org.jacoco jacoco-maven-plugin - 0.8.5 + ${jacoco.version} io.fabric8 docker-maven-plugin - 0.29.0 + ${docker-maven-plugin.version}