diff --git a/dev-support/pom.xml b/dev-support/pom.xml index 2da4ab5b8e38..5e47a0ec6105 100644 --- a/dev-support/pom.xml +++ b/dev-support/pom.xml @@ -17,7 +17,7 @@ org.apache.ozone ozone-main - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-dev-support Apache Ozone Dev Support diff --git a/hadoop-hdds/annotations/pom.xml b/hadoop-hdds/annotations/pom.xml index 35d0e63ef3de..49f759b9c662 100644 --- a/hadoop-hdds/annotations/pom.xml +++ b/hadoop-hdds/annotations/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-annotation-processing - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Annotation Processing Apache Ozone annotation processing tools for validating custom diff --git a/hadoop-hdds/client/pom.xml b/hadoop-hdds/client/pom.xml index 9c94e152a9f2..5e50aaabd942 100644 --- a/hadoop-hdds/client/pom.xml +++ b/hadoop-hdds/client/pom.xml @@ -17,12 +17,12 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../hadoop-dependency-client hdds-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Client Apache Ozone Distributed Data Store Client Library diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/BoundedElasticByteBufferPool.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/BoundedElasticByteBufferPool.java new file mode 100644 index 000000000000..17311ddb5da9 --- /dev/null +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/ozone/client/io/BoundedElasticByteBufferPool.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.client.io; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ComparisonChain; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.hadoop.io.ByteBufferPool; + +/** + * A bounded version of ElasticByteBufferPool that limits the total size + * of buffers that can be cached in the pool. This prevents unbounded memory + * growth in long-lived rpc clients like S3 Gateway. + * + * When the pool reaches its maximum size, newly returned buffers are not + * added back to the pool and will be garbage collected instead. + */ +public class BoundedElasticByteBufferPool implements ByteBufferPool { + private final TreeMap buffers = new TreeMap<>(); + private final TreeMap directBuffers = new TreeMap<>(); + private final long maxPoolSize; + private final AtomicLong currentPoolSize = new AtomicLong(0); + + /** + * A logical timestamp counter used for creating unique Keys in the TreeMap. + * This is used as the insertionTime for the Key instead of System.nanoTime() + * to guarantee uniqueness and avoid a potential spin-wait in putBuffer + * if two buffers of the same capacity are added at the same nanosecond. + */ + private long logicalTimestamp = 0; + + public BoundedElasticByteBufferPool(long maxPoolSize) { + super(); + this.maxPoolSize = maxPoolSize; + } + + private TreeMap getBufferTree(boolean direct) { + return direct ? this.directBuffers : this.buffers; + } + + @Override + public synchronized ByteBuffer getBuffer(boolean direct, int length) { + TreeMap tree = this.getBufferTree(direct); + Map.Entry entry = tree.ceilingEntry(new Key(length, 0L)); + if (entry == null) { + // Pool is empty or has no suitable buffer. Allocate a new one. + return direct ? ByteBuffer.allocateDirect(length) : ByteBuffer.allocate(length); + } + tree.remove(entry.getKey()); + ByteBuffer buffer = entry.getValue(); + + // Decrement the size because we are taking a buffer OUT of the pool. + currentPoolSize.addAndGet(-buffer.capacity()); + buffer.clear(); + return buffer; + } + + @Override + public synchronized void putBuffer(ByteBuffer buffer) { + if (buffer == null) { + return; + } + + if (currentPoolSize.get() + buffer.capacity() > maxPoolSize) { + // Pool is full, do not add the buffer back. + // It will be garbage collected by JVM. + return; + } + + buffer.clear(); + TreeMap tree = getBufferTree(buffer.isDirect()); + Key key = new Key(buffer.capacity(), logicalTimestamp++); + + tree.put(key, buffer); + // Increment the size because we have successfully added buffer back to the pool. + currentPoolSize.addAndGet(buffer.capacity()); + } + + /** + * Get the current size of buffers in the pool. + * + * @return Current pool size in bytes + */ + @VisibleForTesting + public synchronized long getCurrentPoolSize() { + return currentPoolSize.get(); + } + + /** + * The Key for the buffer TreeMaps. + * This is copied directly from the original ElasticByteBufferPool. + */ + protected static final class Key implements Comparable { + private final int capacity; + private final long insertionTime; + + Key(int capacity, long insertionTime) { + this.capacity = capacity; + this.insertionTime = insertionTime; + } + + @Override + public int compareTo(Key other) { + return ComparisonChain.start() + .compare(this.capacity, other.capacity) + .compare(this.insertionTime, other.insertionTime) + .result(); + } + + @Override + public boolean equals(Object rhs) { + if (rhs == null) { + return false; + } + try { + Key o = (Key) rhs; + return compareTo(o) == 0; + } catch (ClassCastException e) { + return false; + } + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(capacity).append(insertionTime) + .toHashCode(); + } + } +} diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/TestBoundedElasticByteBufferPool.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/TestBoundedElasticByteBufferPool.java new file mode 100644 index 000000000000..f32b81bfe8cb --- /dev/null +++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/ozone/client/io/TestBoundedElasticByteBufferPool.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.client.io; + +import java.nio.ByteBuffer; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for BoundedElasticByteBufferPool. + */ +public class TestBoundedElasticByteBufferPool { + + private static final int MB = 1024 * 1024; + private static final long MAX_POOL_SIZE = 3L * MB; // 3MB + + @Test + public void testLogicalTimestampOrdering() { + // Pool with plenty of capacity + BoundedElasticByteBufferPool pool = new BoundedElasticByteBufferPool(MAX_POOL_SIZE); + int bufferSize = 5 * 1024; // 5KB + + // Create and add three distinct buffers of the same size + ByteBuffer buffer1 = ByteBuffer.allocate(bufferSize); + ByteBuffer buffer2 = ByteBuffer.allocate(bufferSize); + ByteBuffer buffer3 = ByteBuffer.allocate(bufferSize); + + // Store their unique identity hash codes + int hash1 = System.identityHashCode(buffer1); + int hash2 = System.identityHashCode(buffer2); + int hash3 = System.identityHashCode(buffer3); + + pool.putBuffer(buffer1); + pool.putBuffer(buffer2); + pool.putBuffer(buffer3); + + // The pool should now contain 15KB data + Assertions.assertEquals(bufferSize * 3L, pool.getCurrentPoolSize()); + + // Get the buffers back. They should come back in the same + // order they were put in (FIFO). + ByteBuffer retrieved1 = pool.getBuffer(false, bufferSize); + ByteBuffer retrieved2 = pool.getBuffer(false, bufferSize); + ByteBuffer retrieved3 = pool.getBuffer(false, bufferSize); + + // Verify we got the exact same buffer instances back in FIFO order + Assertions.assertEquals(hash1, System.identityHashCode(retrieved1)); + Assertions.assertEquals(hash2, System.identityHashCode(retrieved2)); + Assertions.assertEquals(hash3, System.identityHashCode(retrieved3)); + + // The pool should now be empty + Assertions.assertEquals(0, pool.getCurrentPoolSize()); + } + + /** + * Verifies the core feature: the pool stops caching buffers + * once its maximum size is reached. + */ + @Test + public void testPoolBoundingLogic() { + BoundedElasticByteBufferPool pool = new BoundedElasticByteBufferPool(MAX_POOL_SIZE); + + ByteBuffer buffer1 = ByteBuffer.allocate(2 * MB); + ByteBuffer buffer2 = ByteBuffer.allocate(1 * MB); + ByteBuffer buffer3 = ByteBuffer.allocate(3 * MB); + + int hash1 = System.identityHashCode(buffer1); + int hash2 = System.identityHashCode(buffer2); + int hash3 = System.identityHashCode(buffer3); + + // 1. Put buffer 1 (Pool size: 2MB, remaining: 1MB) + pool.putBuffer(buffer1); + Assertions.assertEquals(2 * MB, pool.getCurrentPoolSize()); + + // 2. Put buffer 2 (Pool size: 2MB + 1MB = 3MB, remaining: 0) + // The check is (current(2MB) + new(1MB)) > max(3MB), which is false. + // So, the buffer IS added. + pool.putBuffer(buffer2); + Assertions.assertEquals(3 * MB, pool.getCurrentPoolSize()); + + // 3. Put buffer 3 (Capacity 3MB) + // The check is (current(3MB) + new(3MB)) > max(3MB), which is true. + // This buffer should be REJECTED. + pool.putBuffer(buffer3); + // The pool size should NOT change. + Assertions.assertEquals(3 * MB, pool.getCurrentPoolSize()); + + // 4. Get buffers back + ByteBuffer retrieved1 = pool.getBuffer(false, 2 * MB); + ByteBuffer retrieved2 = pool.getBuffer(false, 1 * MB); + + // The pool should now be empty + Assertions.assertEquals(0, pool.getCurrentPoolSize()); + + // 5. Ask for a third buffer. + // Since buffer3 was rejected, this should be a NEWLY allocated buffer. + ByteBuffer retrieved3 = pool.getBuffer(false, 3 * MB); + + // Verify that we got the first two buffers from the pool + Assertions.assertEquals(hash1, System.identityHashCode(retrieved1)); + Assertions.assertEquals(hash2, System.identityHashCode(retrieved2)); + + // Verify that the third buffer is a NEW instance, not buffer3 + Assertions.assertNotEquals(hash3, System.identityHashCode(retrieved3)); + } +} diff --git a/hadoop-hdds/common/pom.xml b/hadoop-hdds/common/pom.xml index 6fdf1dffa45e..64d332bdb0c9 100644 --- a/hadoop-hdds/common/pom.xml +++ b/hadoop-hdds/common/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../hadoop-dependency-client hdds-common - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Common Apache Ozone Distributed Data Store Common diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/upgrade/HDDSLayoutFeature.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/upgrade/HDDSLayoutFeature.java index f6ac0a4872cc..30a574ddcbad 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/upgrade/HDDSLayoutFeature.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/upgrade/HDDSLayoutFeature.java @@ -42,7 +42,8 @@ public enum HDDSLayoutFeature implements LayoutFeature { "to DatanodeDetails."), HBASE_SUPPORT(8, "Datanode RocksDB Schema Version 3 has an extra table " + "for the last chunk of blocks to support HBase.)"), - WITNESSED_CONTAINER_DB_PROTO_VALUE(9, "ContainerID table schema to use value type as proto"); + WITNESSED_CONTAINER_DB_PROTO_VALUE(9, "ContainerID table schema to use value type as proto"), + STORAGE_DATA_DISTRIBUTION(10, "ContainerID table schema to use value type as proto"); ////////////////////////////// ////////////////////////////// diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index db66fed22fe9..ceca7d0c8824 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -690,6 +690,10 @@ public final class OzoneConfigKeys { "ozone.security.crypto.compliance.mode"; public static final String OZONE_SECURITY_CRYPTO_COMPLIANCE_MODE_UNRESTRICTED = "unrestricted"; + public static final String OZONE_CLIENT_ELASTIC_BYTE_BUFFER_POOL_MAX_SIZE = + "ozone.client.elastic.byte.buffer.pool.max.size"; + public static final String OZONE_CLIENT_ELASTIC_BYTE_BUFFER_POOL_MAX_SIZE_DEFAULT = "16GB"; + /** * There is no need to instantiate this class. */ diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index cb4490c2c1db..aecbdfae615d 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -221,6 +221,7 @@ public final class OzoneConsts { public static final String OM_SST_FILE_INFO_START_KEY = "startKey"; public static final String OM_SST_FILE_INFO_END_KEY = "endKey"; public static final String OM_SST_FILE_INFO_COL_FAMILY = "columnFamily"; + public static final String OM_SLD_TXN_INFO = "transactionInfo"; // YAML fields for .container files public static final String CONTAINER_ID = "containerID"; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index a6c8d61fff9e..0bfa98f991b9 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -465,6 +465,18 @@ Socket timeout for Ozone client. Unit could be defined with postfix (ns,ms,s,m,h,d) + + ozone.client.elastic.byte.buffer.pool.max.size + 16GB + OZONE, CLIENT + + The maximum total size of buffers that can be cached in the client-side + ByteBufferPool. This pool is used heavily during EC read and write operations. + Setting a limit prevents unbounded memory growth in long-lived rpc clients + like the S3 Gateway. Once this limit is reached, used buffers are not + put back to the pool and will be garbage collected. + + ozone.key.deleting.limit.per.task 50000 diff --git a/hadoop-hdds/config/pom.xml b/hadoop-hdds/config/pom.xml index 45e32b47db23..44c7d02253c6 100644 --- a/hadoop-hdds/config/pom.xml +++ b/hadoop-hdds/config/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-config - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Config Apache Ozone Distributed Data Store Config Tools diff --git a/hadoop-hdds/container-service/pom.xml b/hadoop-hdds/container-service/pom.xml index 0c455d269591..ce6c7863b94c 100644 --- a/hadoop-hdds/container-service/pom.xml +++ b/hadoop-hdds/container-service/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-container-service - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Container Service Apache Ozone Distributed Data Store Container Service diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index baf6d48a9492..97b958d42e5b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -205,6 +205,10 @@ private boolean addContainer(Container container, boolean overwrite) throws recoveringContainerMap.put( clock.millis() + recoveringTimeout, containerId); } + HddsVolume volume = container.getContainerData().getVolume(); + if (volume != null) { + volume.addContainer(containerId); + } return true; } else { LOG.warn("Container already exists with container Id {}", containerId); @@ -299,6 +303,10 @@ private boolean removeContainer(long containerId, boolean markMissing, boolean r "containerMap", containerId); return false; } else { + HddsVolume volume = removed.getContainerData().getVolume(); + if (volume != null) { + volume.removeContainer(containerId); + } LOG.debug("Container with containerId {} is removed from containerMap", containerId); return true; @@ -409,13 +417,19 @@ public Iterator> getRecoveringContainerIterator() { */ public Iterator> getContainerIterator(HddsVolume volume) { Preconditions.checkNotNull(volume); - Preconditions.checkNotNull(volume.getStorageID()); - String volumeUuid = volume.getStorageID(); - return containerMap.values().stream() - .filter(x -> volumeUuid.equals(x.getContainerData().getVolume() - .getStorageID())) - .sorted(ContainerDataScanOrder.INSTANCE) - .iterator(); + Iterator containerIdIterator = volume.getContainerIterator(); + + List> containers = new ArrayList<>(); + while (containerIdIterator.hasNext()) { + Long containerId = containerIdIterator.next(); + Container container = containerMap.get(containerId); + if (container != null) { + containers.add(container); + } + } + containers.sort(ContainerDataScanOrder.INSTANCE); + + return containers.iterator(); } /** @@ -426,11 +440,7 @@ public Iterator> getContainerIterator(HddsVolume volume) { */ public long containerCount(HddsVolume volume) { Preconditions.checkNotNull(volume); - Preconditions.checkNotNull(volume.getStorageID()); - String volumeUuid = volume.getStorageID(); - return containerMap.values().stream() - .filter(x -> volumeUuid.equals(x.getContainerData().getVolume() - .getStorageID())).count(); + return volume.getContainerCount(); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeQueueMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeQueueMetrics.java index d47e0c0936ac..c0ed734da692 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeQueueMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeQueueMetrics.java @@ -25,6 +25,7 @@ import java.util.Map; import org.apache.commons.text.WordUtils; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto; +import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsRecordBuilder; @@ -114,20 +115,20 @@ private void initializeQueues() { public void getMetrics(MetricsCollector collector, boolean b) { MetricsRecordBuilder builder = collector.addRecord(METRICS_SOURCE_NAME); - Map tmpMap = + EnumCounters tmpEnum = datanodeStateMachine.getContext().getCommandQueueSummary(); for (Map.Entry entry: stateContextCommandQueueMap.entrySet()) { builder.addGauge(entry.getValue(), - (long) tmpMap.getOrDefault(entry.getKey(), 0)); + tmpEnum.get(entry.getKey())); } - tmpMap = datanodeStateMachine.getCommandDispatcher() + tmpEnum = datanodeStateMachine.getCommandDispatcher() .getQueuedCommandCount(); for (Map.Entry entry: commandDispatcherQueueMap.entrySet()) { builder.addGauge(entry.getValue(), - (long) tmpMap.getOrDefault(entry.getKey(), 0)); + tmpEnum.get(entry.getKey())); } for (Map.Entry entry: diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java index 1bd888c84a61..3b61050c4af4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java @@ -23,7 +23,6 @@ import java.io.IOException; import java.time.Clock; import java.time.ZoneId; -import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; @@ -48,6 +47,7 @@ import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.hdds.utils.NettyMetrics; +import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.HddsDatanodeStopService; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; @@ -620,23 +620,21 @@ public void join() throws InterruptedException { * (single) thread, or queues it in the handler where a thread pool executor * will process it. The total commands queued in the datanode is therefore * the sum those in the CommandQueue and the dispatcher queues. - * @return A map containing a count for each known command. + * @return EnumCounters containing a count for each known command. */ - public Map getQueuedCommandCount() { - // This is a "sparse map" - there is not guaranteed to be an entry for - // every command type - Map commandQSummary = + public EnumCounters getQueuedCommandCount() { + // Get command counts from StateContext command queue + EnumCounters commandQSummary = context.getCommandQueueSummary(); - // This map will contain an entry for every command type which is registered + // This EnumCounters will contain an entry for every command type which is registered // with the dispatcher, and that should be all command types the DN knows - // about. Any commands with nothing in the queue will return a count of + // about. Any commands with nothing in the queue will have a count of // zero. - Map dispatcherQSummary = + EnumCounters dispatcherQSummary = commandDispatcher.getQueuedCommandCount(); - // Merge the "sparse" map into the fully populated one returning a count + // Merge the two EnumCounters into the fully populated one having a count // for all known command types. - commandQSummary.forEach((k, v) - -> dispatcherQSummary.merge(k, v, Integer::sum)); + dispatcherQSummary.add(commandQSummary); return dispatcherQSummary; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java index 305b7b55a229..a7ea469f0c82 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto; +import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.ClosePipelineCommandHandler; import org.apache.hadoop.ozone.container.common.states.DatanodeState; import org.apache.hadoop.ozone.container.common.states.datanode.InitDatanodeState; @@ -796,12 +797,12 @@ public void addCommand(SCMCommand command) { this.addCmdStatus(command); } - public Map getCommandQueueSummary() { - Map summary = new HashMap<>(); + public EnumCounters getCommandQueueSummary() { + EnumCounters summary = new EnumCounters<>(SCMCommandProto.Type.class); lock.lock(); try { for (SCMCommand cmd : commandQueue) { - summary.put(cmd.getType(), summary.getOrDefault(cmd.getType(), 0) + 1); + summary.add(cmd.getType(), 1); } } finally { lock.unlock(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java index ece91ffdd1c2..482878e6f58a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; +import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.ozone.container.common.helpers.CommandHandlerMetrics; import org.apache.hadoop.ozone.container.common.statemachine.SCMConnectionManager; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; @@ -115,15 +116,15 @@ public void stop() { /** * For each registered handler, call its getQueuedCount method to retrieve the - * number of queued commands. The returned map will contain an entry for every + * number of queued commands. The returned EnumCounters will contain an entry for every * registered command in the dispatcher, with a value of zero if there are no * queued commands. - * @return A Map of CommandType where the value is the queued command count. + * @return EnumCounters of CommandType with the queued command count. */ - public Map getQueuedCommandCount() { - Map counts = new HashMap<>(); + public EnumCounters getQueuedCommandCount() { + EnumCounters counts = new EnumCounters<>(Type.class); for (Map.Entry entry : handlerMap.entrySet()) { - counts.put(entry.getKey(), entry.getValue().getQueuedCount()); + counts.set(entry.getKey(), entry.getValue().getQueuedCount()); } return counts; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java index 2681cdf90d5e..0959d78bdb20 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java @@ -30,7 +30,6 @@ import java.time.ZonedDateTime; import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.concurrent.Callable; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -45,6 +44,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto; import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager; +import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.ozone.container.common.helpers.DeletedContainerBlocksSummary; import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine; import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine.EndPointStates; @@ -242,14 +242,16 @@ private void addPipelineActions( */ private void addQueuedCommandCounts( SCMHeartbeatRequestProto.Builder requestBuilder) { - Map commandCount = + EnumCounters commandCount = context.getParent().getQueuedCommandCount(); CommandQueueReportProto.Builder reportProto = CommandQueueReportProto.newBuilder(); - for (Map.Entry entry - : commandCount.entrySet()) { - reportProto.addCommand(entry.getKey()) - .addCount(entry.getValue()); + for (SCMCommandProto.Type type : SCMCommandProto.Type.values()) { + long count = commandCount.get(type); + if (count > 0) { + reportProto.addCommand(type) + .addCount((int) count); + } } requestBuilder.setCommandQueueReport(reportProto.build()); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index d6f404dd17ea..0988064e5fe8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -25,9 +25,11 @@ import jakarta.annotation.Nullable; import java.io.File; import java.io.IOException; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Queue; +import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -93,6 +95,8 @@ public class HddsVolume extends StorageVolume { private final AtomicLong committedBytes = new AtomicLong(); // till Open containers become full private Function gatherContainerUsages = (K) -> 0L; + private final ConcurrentSkipListSet containerIds = new ConcurrentSkipListSet<>(); + // Mentions the type of volume private final VolumeType type = VolumeType.DATA_VOLUME; // The dedicated DbVolume that the db instance of this HddsVolume resides. @@ -529,6 +533,22 @@ public long getContainers() { return 0; } + public void addContainer(long containerId) { + containerIds.add(containerId); + } + + public void removeContainer(long containerId) { + containerIds.remove(containerId); + } + + public Iterator getContainerIterator() { + return containerIds.iterator(); + } + + public long getContainerCount() { + return containerIds.size(); + } + /** * Pick a DbVolume for HddsVolume and init db instance. * Use the HddsVolume directly if no DbVolume found. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index df45715d9962..a77eec922776 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -610,11 +610,13 @@ public ContainerSet getContainerSet() { public Long gatherContainerUsages(HddsVolume storageVolume) { AtomicLong usages = new AtomicLong(); - containerSet.getContainerMapIterator().forEachRemaining(e -> { - if (e.getValue().getContainerData().getVolume().getStorageID().equals(storageVolume.getStorageID())) { - usages.addAndGet(e.getValue().getContainerData().getBytesUsed()); + Iterator containerIdIterator = storageVolume.getContainerIterator(); + while (containerIdIterator.hasNext()) { + Container container = containerSet.getContainer(containerIdIterator.next()); + if (container != null) { + usages.addAndGet(container.getContainerData().getBytesUsed()); } - }); + } return usages.get(); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java index 8c54dd848af4..efb4be86e8dc 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java @@ -28,6 +28,7 @@ import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -40,6 +41,7 @@ import java.util.Optional; import java.util.Random; import java.util.UUID; +import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.LongStream; import org.apache.hadoop.conf.StorageUnit; @@ -69,6 +71,33 @@ private void setLayoutVersion(ContainerLayoutVersion layoutVersion) { this.layoutVersion = layoutVersion; } + /** + * Create a mock {@link HddsVolume} to track container IDs. + */ + private HddsVolume mockHddsVolume(String storageId) { + HddsVolume volume = mock(HddsVolume.class); + when(volume.getStorageID()).thenReturn(storageId); + + ConcurrentSkipListSet containerIds = new ConcurrentSkipListSet<>(); + + doAnswer(inv -> { + Long containerId = inv.getArgument(0); + containerIds.add(containerId); + return null; + }).when(volume).addContainer(any(Long.class)); + + doAnswer(inv -> { + Long containerId = inv.getArgument(0); + containerIds.remove(containerId); + return null; + }).when(volume).removeContainer(any(Long.class)); + + when(volume.getContainerIterator()).thenAnswer(inv -> containerIds.iterator()); + when(volume.getContainerCount()).thenAnswer(inv -> (long) containerIds.size()); + + return volume; + } + @ContainerLayoutTestInfo.ContainerTest public void testAddGetRemoveContainer(ContainerLayoutVersion layout) throws StorageContainerException { @@ -157,10 +186,8 @@ public void testIteratorsAndCount(ContainerLayoutVersion layout) public void testIteratorPerVolume(ContainerLayoutVersion layout) throws StorageContainerException { setLayoutVersion(layout); - HddsVolume vol1 = mock(HddsVolume.class); - when(vol1.getStorageID()).thenReturn("uuid-1"); - HddsVolume vol2 = mock(HddsVolume.class); - when(vol2.getStorageID()).thenReturn("uuid-2"); + HddsVolume vol1 = mockHddsVolume("uuid-1"); + HddsVolume vol2 = mockHddsVolume("uuid-2"); ContainerSet containerSet = newContainerSet(); for (int i = 0; i < 10; i++) { @@ -202,8 +229,7 @@ public void testIteratorPerVolume(ContainerLayoutVersion layout) public void iteratorIsOrderedByScanTime(ContainerLayoutVersion layout) throws StorageContainerException { setLayoutVersion(layout); - HddsVolume vol = mock(HddsVolume.class); - when(vol.getStorageID()).thenReturn("uuid-1"); + HddsVolume vol = mockHddsVolume("uuid-1"); Random random = new Random(); ContainerSet containerSet = newContainerSet(); int containerCount = 50; @@ -375,4 +401,102 @@ private ContainerSet createContainerSet() throws StorageContainerException { return containerSet; } + /** + * Test that containerCount per volume returns correct count. + */ + @ContainerLayoutTestInfo.ContainerTest + public void testContainerCountPerVolume(ContainerLayoutVersion layout) + throws StorageContainerException { + setLayoutVersion(layout); + HddsVolume vol1 = mockHddsVolume("uuid-1"); + HddsVolume vol2 = mockHddsVolume("uuid-2"); + HddsVolume vol3 = mockHddsVolume("uuid-3"); + + ContainerSet containerSet = newContainerSet(); + + // Add 100 containers to vol1, 50 to vol2, 0 to vol3 + for (int i = 0; i < 100; i++) { + KeyValueContainerData kvData = new KeyValueContainerData(i, + layout, + (long) StorageUnit.GB.toBytes(5), UUID.randomUUID().toString(), + UUID.randomUUID().toString()); + kvData.setVolume(vol1); + kvData.setState(ContainerProtos.ContainerDataProto.State.CLOSED); + containerSet.addContainer(new KeyValueContainer(kvData, new OzoneConfiguration())); + } + + for (int i = 100; i < 150; i++) { + KeyValueContainerData kvData = new KeyValueContainerData(i, + layout, + (long) StorageUnit.GB.toBytes(5), UUID.randomUUID().toString(), + UUID.randomUUID().toString()); + kvData.setVolume(vol2); + kvData.setState(ContainerProtos.ContainerDataProto.State.CLOSED); + containerSet.addContainer(new KeyValueContainer(kvData, new OzoneConfiguration())); + } + + // Verify counts + assertEquals(100, containerSet.containerCount(vol1)); + assertEquals(50, containerSet.containerCount(vol2)); + assertEquals(0, containerSet.containerCount(vol3)); + + // Remove some containers and verify counts are updated + containerSet.removeContainer(0); + containerSet.removeContainer(1); + containerSet.removeContainer(100); + assertEquals(98, containerSet.containerCount(vol1)); + assertEquals(49, containerSet.containerCount(vol2)); + } + + /** + * Test that per-volume iterator only returns containers from that volume. + */ + @ContainerLayoutTestInfo.ContainerTest + public void testContainerIteratorPerVolume(ContainerLayoutVersion layout) + throws StorageContainerException { + setLayoutVersion(layout); + HddsVolume vol1 = mockHddsVolume("uuid-11"); + HddsVolume vol2 = mockHddsVolume("uuid-12"); + + ContainerSet containerSet = newContainerSet(); + + // Add containers with specific IDs to each volume + List vol1Ids = new ArrayList<>(); + List vol2Ids = new ArrayList<>(); + + for (int i = 0; i < 20; i++) { + KeyValueContainerData kvData = new KeyValueContainerData(i, + layout, + (long) StorageUnit.GB.toBytes(5), UUID.randomUUID().toString(), + UUID.randomUUID().toString()); + if (i % 2 == 0) { + kvData.setVolume(vol1); + vol1Ids.add((long) i); + } else { + kvData.setVolume(vol2); + vol2Ids.add((long) i); + } + kvData.setState(ContainerProtos.ContainerDataProto.State.CLOSED); + containerSet.addContainer(new KeyValueContainer(kvData, new OzoneConfiguration())); + } + + // Verify iterator only returns containers from vol1 + Iterator> iter1 = containerSet.getContainerIterator(vol1); + List foundVol1Ids = new ArrayList<>(); + while (iter1.hasNext()) { + foundVol1Ids.add(iter1.next().getContainerData().getContainerID()); + } + assertEquals(vol1Ids.size(), foundVol1Ids.size()); + assertTrue(foundVol1Ids.containsAll(vol1Ids)); + + // Verify iterator only returns containers from vol2 + Iterator> iter2 = containerSet.getContainerIterator(vol2); + List foundVol2Ids = new ArrayList<>(); + while (iter2.hasNext()) { + foundVol2Ids.add(iter2.next().getContainerData().getContainerID()); + } + assertEquals(vol2Ids.size(), foundVol2Ids.size()); + assertTrue(foundVol2Ids.containsAll(vol2Ids)); + } + } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java index 73e4b8f43686..8d79335591b9 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine.DatanodeStates; import org.apache.hadoop.ozone.container.common.states.DatanodeState; @@ -709,15 +710,15 @@ public void testCommandQueueSummary() throws IOException { ctx.addCommand(new CloseContainerCommand(1, PipelineID.randomId())); ctx.addCommand(new ReconcileContainerCommand(4, Collections.emptySet())); - Map summary = ctx.getCommandQueueSummary(); + EnumCounters summary = ctx.getCommandQueueSummary(); assertEquals(3, - summary.get(SCMCommandProto.Type.replicateContainerCommand).intValue()); + summary.get(SCMCommandProto.Type.replicateContainerCommand)); assertEquals(2, - summary.get(SCMCommandProto.Type.closePipelineCommand).intValue()); + summary.get(SCMCommandProto.Type.closePipelineCommand)); assertEquals(1, - summary.get(SCMCommandProto.Type.closeContainerCommand).intValue()); + summary.get(SCMCommandProto.Type.closeContainerCommand)); assertEquals(1, - summary.get(SCMCommandProto.Type.reconcileContainerCommand).intValue()); + summary.get(SCMCommandProto.Type.reconcileContainerCommand)); } @Test diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteBlocksCommandHandler.java index b2e1c72e487a..50b08c7aa2f2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteBlocksCommandHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteBlocksCommandHandler.java @@ -49,6 +49,7 @@ import java.util.Map; import java.util.UUID; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -104,13 +105,31 @@ private void prepareTest(ContainerTestVersionInfo versionInfo) setup(); } + /** + * Create a mock {@link HddsVolume} to track container IDs. + */ + private HddsVolume mockHddsVolume(String storageId) { + HddsVolume volume = mock(HddsVolume.class); + when(volume.getStorageID()).thenReturn(storageId); + + ConcurrentSkipListSet containerIds = new ConcurrentSkipListSet<>(); + + doAnswer(inv -> { + Long containerId = inv.getArgument(0); + containerIds.add(containerId); + return null; + }).when(volume).addContainer(any(Long.class)); + + when(volume.getContainerIterator()).thenAnswer(inv -> containerIds.iterator()); + return volume; + } + private void setup() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); ContainerLayoutVersion layout = ContainerLayoutVersion.FILE_PER_BLOCK; OzoneContainer ozoneContainer = mock(OzoneContainer.class); containerSet = newContainerSet(); - volume1 = mock(HddsVolume.class); - when(volume1.getStorageID()).thenReturn("uuid-1"); + volume1 = mockHddsVolume("uuid-1"); for (int i = 0; i <= 10; i++) { KeyValueContainerData data = new KeyValueContainerData(i, diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java index 11c145ee38ae..c04d2c758842 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java @@ -32,10 +32,8 @@ import com.google.protobuf.Proto2Utils; import java.net.InetSocketAddress; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.OptionalLong; import java.util.Set; import java.util.UUID; @@ -53,6 +51,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto; import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager; +import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine.DatanodeStates; import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine; @@ -102,13 +101,16 @@ public void handlesReconstructContainerCommand() throws Exception { StateContext context = new StateContext(conf, DatanodeStates.RUNNING, datanodeStateMachine, ""); + when(datanodeStateMachine.getQueuedCommandCount()) + .thenReturn(new EnumCounters<>(SCMCommandProto.Type.class)); + // WHEN HeartbeatEndpointTask task = getHeartbeatEndpointTask(conf, context, scm); task.call(); // THEN assertEquals(1, context.getCommandQueueSummary() - .get(reconstructECContainersCommand).intValue()); + .get(reconstructECContainersCommand)); } @Test @@ -138,13 +140,16 @@ public void testHandlesReconcileContainerCommand() throws Exception { StateContext context = new StateContext(conf, DatanodeStates.RUNNING, datanodeStateMachine, ""); + when(datanodeStateMachine.getQueuedCommandCount()) + .thenReturn(new EnumCounters<>(SCMCommandProto.Type.class)); + // WHEN HeartbeatEndpointTask task = getHeartbeatEndpointTask(conf, context, scm); task.call(); // THEN assertEquals(1, context.getCommandQueueSummary() - .get(reconcileContainerCommand).intValue()); + .get(reconcileContainerCommand)); } @Test @@ -165,8 +170,12 @@ public void testheartbeatWithoutReports() throws Exception { .build()); OzoneConfiguration conf = new OzoneConfiguration(); + DatanodeStateMachine datanodeStateMachine = mock(DatanodeStateMachine.class); StateContext context = new StateContext(conf, DatanodeStates.RUNNING, - mock(DatanodeStateMachine.class), ""); + datanodeStateMachine, ""); + + when(datanodeStateMachine.getQueuedCommandCount()) + .thenReturn(new EnumCounters<>(SCMCommandProto.Type.class)); context.setTermOfLeaderSCM(1); HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask( conf, context, scm); @@ -185,9 +194,12 @@ public void testheartbeatWithoutReports() throws Exception { @Test public void testheartbeatWithNodeReports() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); + DatanodeStateMachine datanodeStateMachine = mock(DatanodeStateMachine.class); StateContext context = new StateContext(conf, DatanodeStates.RUNNING, - mock(DatanodeStateMachine.class), ""); + datanodeStateMachine, ""); + when(datanodeStateMachine.getQueuedCommandCount()) + .thenReturn(new EnumCounters<>(SCMCommandProto.Type.class)); StorageContainerDatanodeProtocolClientSideTranslatorPB scm = mock( StorageContainerDatanodeProtocolClientSideTranslatorPB.class); @@ -217,8 +229,12 @@ public void testheartbeatWithNodeReports() throws Exception { @Test public void testheartbeatWithContainerReports() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); + DatanodeStateMachine datanodeStateMachine = mock(DatanodeStateMachine.class); StateContext context = new StateContext(conf, DatanodeStates.RUNNING, - mock(DatanodeStateMachine.class), ""); + datanodeStateMachine, ""); + + when(datanodeStateMachine.getQueuedCommandCount()) + .thenReturn(new EnumCounters<>(SCMCommandProto.Type.class)); StorageContainerDatanodeProtocolClientSideTranslatorPB scm = mock( @@ -249,8 +265,12 @@ public void testheartbeatWithContainerReports() throws Exception { @Test public void testheartbeatWithCommandStatusReports() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); + DatanodeStateMachine datanodeStateMachine = mock(DatanodeStateMachine.class); StateContext context = new StateContext(conf, DatanodeStates.RUNNING, - mock(DatanodeStateMachine.class), ""); + datanodeStateMachine, ""); + + when(datanodeStateMachine.getQueuedCommandCount()) + .thenReturn(new EnumCounters<>(SCMCommandProto.Type.class)); StorageContainerDatanodeProtocolClientSideTranslatorPB scm = mock( @@ -282,8 +302,12 @@ public void testheartbeatWithCommandStatusReports() throws Exception { @Test public void testheartbeatWithContainerActions() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); + DatanodeStateMachine datanodeStateMachine = mock(DatanodeStateMachine.class); StateContext context = new StateContext(conf, DatanodeStates.RUNNING, - mock(DatanodeStateMachine.class), ""); + datanodeStateMachine, ""); + + when(datanodeStateMachine.getQueuedCommandCount()) + .thenReturn(new EnumCounters<>(SCMCommandProto.Type.class)); StorageContainerDatanodeProtocolClientSideTranslatorPB scm = mock( @@ -320,10 +344,10 @@ public void testheartbeatWithAllReports() throws Exception { datanodeStateMachine, ""); // Return a Map of command counts when the heartbeat logic requests it - final Map commands = new HashMap<>(); + final EnumCounters commands = new EnumCounters<>(SCMCommandProto.Type.class); int count = 1; for (SCMCommandProto.Type cmd : SCMCommandProto.Type.values()) { - commands.put(cmd, count++); + commands.set(cmd, count++); } when(datanodeStateMachine.getQueuedCommandCount()) .thenReturn(commands); @@ -358,10 +382,16 @@ public void testheartbeatWithAllReports() throws Exception { assertTrue(heartbeat.hasContainerActions()); assertTrue(heartbeat.hasCommandQueueReport()); CommandQueueReportProto queueCount = heartbeat.getCommandQueueReport(); - assertEquals(queueCount.getCommandCount(), commands.size()); - assertEquals(queueCount.getCountCount(), commands.size()); - for (int i = 0; i < commands.size(); i++) { - assertEquals(commands.get(queueCount.getCommand(i)).intValue(), + int commandCount = 0; + for (SCMCommandProto.Type type : SCMCommandProto.Type.values()) { + if (commands.get(type) > 0) { + commandCount++; + } + } + assertEquals(queueCount.getCommandCount(), commandCount); + assertEquals(queueCount.getCountCount(), commandCount); + for (int i = 0; i < commandCount; i++) { + assertEquals(commands.get(queueCount.getCommand(i)), queueCount.getCount(i)); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java index 4cca1dd21cd0..91c3f8ed58c2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java @@ -21,6 +21,10 @@ import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import com.google.common.base.Preconditions; import java.io.File; @@ -30,9 +34,11 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Random; import java.util.Set; import java.util.UUID; +import java.util.concurrent.ConcurrentSkipListSet; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.hdds.HddsConfigKeys; @@ -110,6 +116,25 @@ public void cleanUp() { } } + /** + * Create a mock {@link HddsVolume} to track container IDs. + */ + private HddsVolume mockHddsVolume(String storageId) { + HddsVolume volume = mock(HddsVolume.class); + when(volume.getStorageID()).thenReturn(storageId); + + ConcurrentSkipListSet containerIds = new ConcurrentSkipListSet<>(); + + doAnswer(inv -> { + Long containerId = inv.getArgument(0); + containerIds.add(containerId); + return null; + }).when(volume).addContainer(any(Long.class)); + + when(volume.getContainerIterator()).thenAnswer(inv -> containerIds.iterator()); + return volume; + } + @ContainerTestVersionInfo.ContainerTest public void testBuildContainerMap(ContainerTestVersionInfo versionInfo) throws Exception { @@ -117,9 +142,14 @@ public void testBuildContainerMap(ContainerTestVersionInfo versionInfo) // Format the volumes List volumes = StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()); + + // Create mock volumes with tracking, mapped by storage ID + Map mockVolumeMap = new HashMap<>(); for (HddsVolume volume : volumes) { volume.format(clusterId); commitSpaceMap.put(getVolumeKey(volume), Long.valueOf(0)); + // Create mock for each real volume + mockVolumeMap.put(volume.getStorageID(), mockHddsVolume(volume.getStorageID())); } List containerDatas = new ArrayList<>(); // Add containers to disk @@ -140,6 +170,12 @@ public void testBuildContainerMap(ContainerTestVersionInfo versionInfo) keyValueContainerData, conf); keyValueContainer.create(volumeSet, volumeChoosingPolicy, clusterId); myVolume = keyValueContainer.getContainerData().getVolume(); + + // Track container in mock volume + HddsVolume mockVolume = mockVolumeMap.get(myVolume.getStorageID()); + if (mockVolume != null) { + mockVolume.addContainer(i); + } freeBytes = addBlocks(keyValueContainer, 2, 3, 65536); @@ -158,7 +194,13 @@ public void testBuildContainerMap(ContainerTestVersionInfo versionInfo) assertEquals(numTestContainers, containerset.containerCount()); verifyCommittedSpace(ozoneContainer); // container usage here, nrOfContainer * blocks * chunksPerBlock * datalen - assertEquals(10 * 2 * 3 * 65536, ozoneContainer.gatherContainerUsages(volumes.get(0))); + // Use mock volumes to verify container usage + long totalUsage = 0; + for (HddsVolume volume : volumes) { + HddsVolume mockVolume = mockVolumeMap.get(volume.getStorageID()); + totalUsage += ozoneContainer.gatherContainerUsages(mockVolume); + } + assertEquals(10 * 2 * 3 * 65536, totalUsage); Set missingContainers = new HashSet<>(); for (int i = 0; i < numTestContainers; i++) { if (i % 2 == 0) { diff --git a/hadoop-hdds/crypto-api/pom.xml b/hadoop-hdds/crypto-api/pom.xml index 474359f916b0..801c7b0d036c 100644 --- a/hadoop-hdds/crypto-api/pom.xml +++ b/hadoop-hdds/crypto-api/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-crypto-api - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT Apache Ozone HDDS Crypto Apache Ozone Distributed Data Store cryptographic functions diff --git a/hadoop-hdds/crypto-default/pom.xml b/hadoop-hdds/crypto-default/pom.xml index 7194bb7e6e54..49e7065476ef 100644 --- a/hadoop-hdds/crypto-default/pom.xml +++ b/hadoop-hdds/crypto-default/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-crypto-default - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT Apache Ozone HDDS Crypto - Default Default implementation of Apache Ozone Distributed Data Store's cryptographic functions diff --git a/hadoop-hdds/docs/content/feature/Snapshot.md b/hadoop-hdds/docs/content/feature/Snapshot.md index d6dcf7ea8da3..2e013e3a3c41 100644 --- a/hadoop-hdds/docs/content/feature/Snapshot.md +++ b/hadoop-hdds/docs/content/feature/Snapshot.md @@ -100,6 +100,32 @@ Manage snapshots using `ozone sh` or `ozone fs` (Hadoop-compatible) commands: Output prefixes: `+` (add), `-` (delete), `M` (modify), `R` (rename). Use `-p`, `-t` for pagination. Manage diff jobs: `ozone sh snapshot listDiff /vol1/bucket1`, `ozone sh snapshot cancelDiff `. +* **List Snapshot Diff Jobs:** Lists snapshot diff jobs for a bucket. + ```shell + ozone sh snapshot listDiff /vol1/bucket1 + ``` + By default, lists jobs with `in_progress` status. Use `--job-status` to filter by specific status: + ```shell + # List jobs with specific status (queued, in_progress, done, failed, rejected) + ozone sh snapshot listDiff /vol1/bucket1 --job-status done + ``` + Use `--all-status` to list all jobs regardless of status: + ```shell + # List all snapshot diff jobs regardless of status + ozone sh snapshot listDiff /vol1/bucket1 --all-status + ``` + **Note:** The difference between `--all-status` and `-all` (or `-a`): + * `--all-status`: Controls which jobs to show based on status (lists all jobs regardless of status) + * `-all` (or `-a`): Controls the number of results returned (pagination option, removes pagination limit, **not related to snapshot diff job status**) + + For example: + ```shell + # List all jobs regardless of status, with pagination limit removed + ozone sh snapshot listDiff /vol1/bucket1 --all-status -all + # Or limit results to 10 items + ozone sh snapshot listDiff /vol1/bucket1 --all-status -l 10 + ``` + * **Rename Snapshot:** ```shell ozone sh snapshot rename /vol1/bucket1 diff --git a/hadoop-hdds/docs/content/interface/HttpFS.md b/hadoop-hdds/docs/content/interface/HttpFS.md index cebe0d315b02..a4eb7271a115 100644 --- a/hadoop-hdds/docs/content/interface/HttpFS.md +++ b/hadoop-hdds/docs/content/interface/HttpFS.md @@ -45,24 +45,102 @@ HttpFS has built-in security supporting Hadoop pseudo authentication and Kerbero HttpFS service itself is a Jetty based web-application that uses the Hadoop FileSystem API to talk to the cluster, it is a separate service which provides access to Ozone via a REST APIs. It should be started in addition to other regular Ozone components. -To try it out, you can start a Docker Compose dev cluster that has an HttpFS gateway. +To try it out, follow the instructions from the link below to start the Ozone cluster with Docker Compose. -Extract the release tarball, go to the `compose/ozone` directory and start the cluster: +https://ozone.apache.org/docs/edge/start/startfromdockerhub.html ```bash -docker-compose up -d --scale datanode=3 +docker compose up -d --scale datanode=3 ``` -You can/should find now the HttpFS gateway in docker with the name `ozone_httpfs`. -HttpFS HTTP web-service API calls are HTTP REST calls that map to an Ozone file system operation. For example, using the `curl` Unix command. +You can/should find now the HttpFS gateway in docker with the name like `ozone_httpfs`, +and it can be accessed through `localhost:14000`. +HttpFS HTTP web-service API calls are HTTP REST calls that map to an Ozone file system operation. -E.g. in the docker cluster you can execute commands like these: +Here's some example usage: -* `curl -i -X PUT "http://httpfs:14000/webhdfs/v1/vol1?op=MKDIRS&user.name=hdfs"` creates a volume called `vol1`. +### Create a volume +```bash +# creates a volume called `volume1`. +curl -i -X PUT "http://localhost:14000/webhdfs/v1/volume1?op=MKDIRS&user.name=hdfs" +``` + +Example Output: + +```bash +HTTP/1.1 200 OK +Date: Sat, 18 Oct 2025 07:51:21 GMT +Cache-Control: no-cache +Expires: Sat, 18 Oct 2025 07:51:21 GMT +Pragma: no-cache +Content-Type: application/json +X-Content-Type-Options: nosniff +X-XSS-Protection: 1; mode=block +Set-Cookie: hadoop.auth="u=hdfs&p=hdfs&t=simple-dt&e=1760809881100&s=OCdVOi8eyMguFySkmEJxm5EkRfj6NbAM9agi5Gue1Iw="; Path=/; HttpOnly +Content-Length: 17 + +{"boolean":true} +``` + +### Create a bucket + +```bash +# creates a bucket called `bucket1`. +curl -i -X PUT "http://localhost:14000/webhdfs/v1/volume1/bucket1?op=MKDIRS&user.name=hdfs" +``` + +Example Output: + +```bash +HTTP/1.1 200 OK +Date: Sat, 18 Oct 2025 07:52:06 GMT +Cache-Control: no-cache +Expires: Sat, 18 Oct 2025 07:52:06 GMT +Pragma: no-cache +Content-Type: application/json +X-Content-Type-Options: nosniff +X-XSS-Protection: 1; mode=block +Set-Cookie: hadoop.auth="u=hdfs&p=hdfs&t=simple-dt&e=1760809926682&s=yvOaeaRCVJZ+z+nZQ/rM/Y01pzEmS9Pe2mE9f0b+TWw="; Path=/; HttpOnly +Content-Length: 17 + +{"boolean":true} +``` + +### Upload a file -* `$ curl 'http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt?op=OPEN&user.name=foo'` returns the content of the key `/user/foo/README.txt`. +```bash +echo "hello" >> ./README.txt +curl -i -X PUT "http://localhost:14000/webhdfs/v1/volume1/bucket1/user/foo/README.txt?op=CREATE&data=true&user.name=hdfs" -T ./README.txt -H "Content-Type: application/octet-stream" +``` +Example Output: + +```bash +HTTP/1.1 100 Continue + +HTTP/1.1 201 Created +Date: Sat, 18 Oct 2025 08:33:33 GMT +Cache-Control: no-cache +Expires: Sat, 18 Oct 2025 08:33:33 GMT +Pragma: no-cache +X-Content-Type-Options: nosniff +X-XSS-Protection: 1; mode=block +Set-Cookie: hadoop.auth="u=hdfs&p=hdfs&t=simple-dt&e=1760812413286&s=09t7xKu/p/fjCJiQNL3bvW/Q7mTw28IbeNqDGlslZ6w="; Path=/; HttpOnly +Location: http://localhost:14000/webhdfs/v1/volume1/bucket1/user/foo/README.txt +Content-Type: application/json +Content-Length: 84 + +{"Location":"http://localhost:14000/webhdfs/v1/volume1/bucket1/user/foo/README.txt"} +``` + +### Read the file content + +```bash +# returns the content of the key `/user/foo/README.txt`. +curl 'http://localhost:14000/webhdfs/v1/volume1/bucket1/user/foo/README.txt?op=OPEN&user.name=foo' +hello +``` ## Supported operations @@ -110,10 +188,8 @@ Set ACL | not implemented in Ozone FileSystem API Get ACL Status | not implemented in Ozone FileSystem API Check access | not implemented in Ozone FileSystem API - - ## Hadoop user and developer documentation about HttpFS * [HttpFS Server Setup](https://hadoop.apache.org/docs/stable/hadoop-hdfs-httpfs/ServerSetup.html) -* [Using HTTP Tools](https://hadoop.apache.org/docs/stable/hadoop-hdfs-httpfs/ServerSetup.html) \ No newline at end of file +* [Using HTTP Tools](https://hadoop.apache.org/docs/stable/hadoop-hdfs-httpfs/ServerSetup.html) diff --git a/hadoop-hdds/docs/content/security/SecuringS3.md b/hadoop-hdds/docs/content/security/SecuringS3.md index 85c064c407fd..561531d2d8bc 100644 --- a/hadoop-hdds/docs/content/security/SecuringS3.md +++ b/hadoop-hdds/docs/content/security/SecuringS3.md @@ -37,18 +37,32 @@ The user needs to `kinit` first and once they have authenticated via kerberos ## Obtain Secrets -* S3 clients can get the secret access id and user secret from OzoneManager. +S3 clients can get the secret access id and user secret from OzoneManager. +### Using the command line + +For a regular user to get their own secret: ```bash ozone s3 getsecret ``` -* Or by sending request to /secret S3 REST endpoint. +An Ozone administrator can get a secret for a specific user by using the `-u` flag: +```bash +ozone s3 getsecret -u +``` + +### Using the REST API +A user can get their own secret by making a `PUT` request to the `/secret` endpoint: ```bash curl -X PUT --negotiate -u : https://localhost:9879/secret ``` +An Ozone administrator can get a secret for a specific user by appending the username to the path: +```bash +curl -X PUT --negotiate -u : https://localhost:9879/secret/ +``` + This command will talk to ozone, validate the user via Kerberos and generate the AWS credentials. The values will be printed out on the screen. You can set these values up in your _.aws_ file for automatic access while working @@ -114,3 +128,112 @@ curl -X DELETE --negotiate -u : -v "http://localhost:9879/secret?username=testus For a working example of these operations, refer to the [Secret Revoke Robot Test](https://raw.githubusercontent.com/apache/ozone/refs/heads/master/hadoop-ozone/dist/src/main/smoketest/s3/secretrevoke.robot). This test demonstrates both the default secret revocation and the revocation by username. > **Note:** Ensure your Kerberos authentication is correctly configured, as secret revocation is a privileged operation. + +## External S3 Secret Storage with HashiCorp Vault + +By default, S3 secrets are stored in the Ozone Manager's RocksDB. For enhanced security, Ozone can be configured to use HashiCorp Vault as an external secret storage backend. + +### Configuration + +To enable Vault integration, you need to configure the following properties in `ozone-site.xml`: + +| Property | Description | +| -------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| `ozone.secret.s3.store.provider` | The S3 secret storage provider to use. Set this to `org.apache.hadoop.ozone.s3.remote.vault.VaultS3SecretStorageProvider` to enable Vault. | +| `ozone.secret.s3.store.remote.vault.address` | The address of the Vault server (e.g., `http://vault:8200`). | +| `ozone.secret.s3.store.remote.vault.namespace` | The Vault namespace to use. | +| `ozone.secret.s3.store.remote.vault.enginever` | The version of the Vault secrets engine (e.g., `2`). | +| `ozone.secret.s3.store.remote.vault.secretpath` | The path where the secrets are stored in Vault. | +| `ozone.secret.s3.store.remote.vault.auth` | The authentication method to use with Vault. Supported values are `TOKEN` and `APPROLE`. | +| `ozone.secret.s3.store.remote.vault.auth.token` | The Vault authentication token. Required if `ozone.secret.s3.store.remote.vault.auth` is set to `TOKEN`. | +| `ozone.secret.s3.store.remote.vault.auth.approle.id` | The AppRole RoleID. Required if `ozone.secret.s3.store.remote.vault.auth` is set to `APPROLE`. | +| `ozone.secret.s3.store.remote.vault.auth.approle.secret` | The AppRole SecretID. Required if `ozone.secret.s3.store.remote.vault.auth` is set to `APPROLE`. | +| `ozone.secret.s3.store.remote.vault.auth.approle.path` | The AppRole path. Required if `ozone.secret.s3.store.remote.vault.auth` is set to `APPROLE`. | +| `ozone.secret.s3.store.remote.vault.trust.store.type` | The type of the trust store (e.g., `JKS`). | +| `ozone.secret.s3.store.remote.vault.trust.store.path` | The path to the trust store file. | +| `ozone.secret.s3.store.remote.vault.trust.store.password` | The password for the trust store. | +| `ozone.secret.s3.store.remote.vault.key.store.type` | The type of the key store (e.g., `JKS`). | +| `ozone.secret.s3.store.remote.vault.key.store.path` | The path to the key store file. | +| `ozone.secret.s3.store.remote.vault.key.store.password` | The password for the key store. | + +### Example + +Here is an example of how to configure Ozone to use Vault for S3 secret storage with token authentication: + +```xml + + ozone.secret.s3.store.provider + org.apache.hadoop.ozone.s3.remote.vault.VaultS3SecretStorageProvider + + + ozone.secret.s3.store.remote.vault.address + http://localhost:8200 + + + ozone.secret.s3.store.remote.vault.enginever + 2 + + + ozone.secret.s3.store.remote.vault.secretpath + secret + + + ozone.secret.s3.store.remote.vault.auth + TOKEN + + + ozone.secret.s3.store.remote.vault.auth.token + your-vault-token + +``` + +### Example with SSL + +Here is an example of how to configure Ozone to use Vault for S3 secret storage with SSL: + +```xml + + ozone.secret.s3.store.provider + org.apache.hadoop.ozone.s3.remote.vault.VaultS3SecretStorageProvider + + + ozone.secret.s3.store.remote.vault.address + https://localhost:8200 + + + ozone.secret.s3.store.remote.vault.enginever + 2 + + + ozone.secret.s3.store.remote.vault.secretpath + secret + + + ozone.secret.s3.store.remote.vault.auth + TOKEN + + + ozone.secret.s3.store.remote.vault.auth.token + your-vault-token + + + ozone.secret.s3.store.remote.vault.trust.store.path + /path/to/truststore.jks + + + ozone.secret.s3.store.remote.vault.trust.store.password + truststore-password + + + ozone.secret.s3.store.remote.vault.key.store.path + /path/to/keystore.jks + + + ozone.secret.s3.store.remote.vault.key.store.password + keystore-password + +``` + +### References + +* [HashiCorp Vault Documentation](https://developer.hashicorp.com/vault/docs) diff --git a/hadoop-hdds/docs/content/tools/Admin.md b/hadoop-hdds/docs/content/tools/Admin.md index e89331230fbd..c2f6093180a0 100644 --- a/hadoop-hdds/docs/content/tools/Admin.md +++ b/hadoop-hdds/docs/content/tools/Admin.md @@ -172,3 +172,49 @@ $ ozone admin om lof --service-id=om-service-test1 --length=3 --prefix=/volumelo ``` Note in JSON output mode, field `contToken` won't show up at all in the result if there are no more entries after the batch (i.e. when `hasMore` is `false`). + + +## Snapshot Defragmentation Trigger + +The snapshot defrag command triggers the Snapshot Defragmentation Service to run immediately on a specific Ozone Manager node. +This command manually initiates the snapshot defragmentation process which compacts snapshot data and removes fragmentation to improve storage efficiency. + +This command only works on Ozone Manager HA clusters. + +```bash +$ ozone admin om snapshot defrag --help +Usage: ozone admin om snapshot defrag [-hV] [--no-wait] [--node-id=] + [-id=] +Triggers the Snapshot Defragmentation Service to run immediately. This command +manually initiates the snapshot defragmentation process which compacts +snapshot data and removes fragmentation to improve storage efficiency. This +command works only on OzoneManager HA cluster. + -h, --help Show this help message and exit. + --no-wait Do not wait for the defragmentation task to + complete. The command will return immediately + after triggering the task. + --node-id= NodeID of the OM to trigger snapshot + defragmentation on. + -id, --service-id= + Ozone Manager Service ID + -V, --version Print version information and exit. +``` + +### Example usages + +- Trigger snapshot defragmentation on OM node `om3` in service `omservice` and wait for completion: + +```bash +$ ozone admin om snapshot defrag --service-id=omservice --node-id=om3 +Triggering Snapshot Defrag Service ... +Snapshot defragmentation completed successfully. +``` + +- Trigger snapshot defragmentation without waiting for completion: + +```bash +$ ozone admin om snapshot defrag --service-id=omservice --node-id=om3 --no-wait +Triggering Snapshot Defrag Service ... +Snapshot defragmentation task has been triggered successfully and is running in the background. +``` + diff --git a/hadoop-hdds/docs/pom.xml b/hadoop-hdds/docs/pom.xml index 8bb357e19744..5215ecd635bf 100644 --- a/hadoop-hdds/docs/pom.xml +++ b/hadoop-hdds/docs/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-docs - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Documentation Apache Ozone Documentation diff --git a/hadoop-hdds/erasurecode/pom.xml b/hadoop-hdds/erasurecode/pom.xml index 53578449df23..c74e7c3f5524 100644 --- a/hadoop-hdds/erasurecode/pom.xml +++ b/hadoop-hdds/erasurecode/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../hadoop-dependency-client hdds-erasurecode - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Erasurecode Apache Ozone Distributed Data Store Earsurecode utils diff --git a/hadoop-hdds/framework/pom.xml b/hadoop-hdds/framework/pom.xml index 5c368525010a..c30cde6de28f 100644 --- a/hadoop-hdds/framework/pom.xml +++ b/hadoop-hdds/framework/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-server-framework - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Server Framework Apache Ozone Distributed Data Store Server Framework diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 7677ed58707f..5f91345d98f5 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; @@ -125,7 +126,7 @@ void deleteContainer(long containerId, Pipeline pipeline, boolean force) * @throws IOException */ ContainerListResult listContainer(long startContainerID, - int count) throws IOException; + int count) throws IOException; /** * Lists a range of containers and get their info. @@ -139,9 +140,9 @@ ContainerListResult listContainer(long startContainerID, * @throws IOException */ ContainerListResult listContainer(long startContainerID, int count, - HddsProtos.LifeCycleState state, - HddsProtos.ReplicationType replicationType, - ReplicationConfig replicationConfig) + HddsProtos.LifeCycleState state, + HddsProtos.ReplicationType replicationType, + ReplicationConfig replicationConfig) throws IOException; /** @@ -180,8 +181,8 @@ ContainerDataProto readContainer(long containerID) */ @Deprecated ContainerWithPipeline createContainer(HddsProtos.ReplicationType type, - HddsProtos.ReplicationFactor replicationFactor, - String owner) throws IOException; + HddsProtos.ReplicationFactor replicationFactor, + String owner) throws IOException; ContainerWithPipeline createContainer(ReplicationConfig replicationConfig, String owner) throws IOException; @@ -206,8 +207,8 @@ ContainerWithPipeline createContainer(HddsProtos.ReplicationType type, * @throws IOException */ List queryNode(HddsProtos.NodeOperationalState opState, - HddsProtos.NodeState nodeState, HddsProtos.QueryScope queryScope, - String poolName) throws IOException; + HddsProtos.NodeState nodeState, HddsProtos.QueryScope queryScope, + String poolName) throws IOException; /** * Returns a node with the given UUID. @@ -256,7 +257,7 @@ List recommissionNodes(List hosts) * @throws IOException */ List startMaintenanceNodes(List hosts, - int endHours, boolean force) throws IOException; + int endHours, boolean force) throws IOException; /** * Creates a specified replication pipeline. @@ -266,7 +267,7 @@ List startMaintenanceNodes(List hosts, * @throws IOException */ Pipeline createReplicationPipeline(HddsProtos.ReplicationType type, - HddsProtos.ReplicationFactor factor, HddsProtos.NodePool nodePool) + HddsProtos.ReplicationFactor factor, HddsProtos.NodePool nodePool) throws IOException; /** @@ -411,6 +412,12 @@ StartContainerBalancerResponseProto startContainerBalancer( */ void transferLeadership(String newLeaderId) throws IOException; + /** + * Get deleted block summary. + * @throws IOException + */ + DeletedBlocksTransactionSummary getDeletedBlockSummary() throws IOException; + /** * Get usage information of datanode by address or uuid. * diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 56411453fc8e..92ddfa7eb8dc 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -17,6 +17,7 @@ package org.apache.hadoop.hdds.scm.protocol; +import jakarta.annotation.Nullable; import java.io.Closeable; import java.io.IOException; import java.util.Collections; @@ -31,6 +32,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; @@ -361,6 +363,14 @@ List getFailedDeletedBlockTxn(int count, @Deprecated int resetDeletedBlockRetryCount(List txIDs) throws IOException; + + /** + * Get deleted block summary. + * @throws IOException + */ + @Nullable + DeletedBlocksTransactionSummary getDeletedBlockSummary() throws IOException; + /** * Check if SCM is in safe mode. * diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 2a85e6e40071..502d9a4fe98f 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -24,6 +24,7 @@ import com.google.common.base.Preconditions; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; +import jakarta.annotation.Nullable; import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; @@ -42,6 +43,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.GetScmInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipRequestProto; @@ -78,6 +80,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetDeletedBlocksTxnSummaryRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetDeletedBlocksTxnSummaryResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetExistContainerWithPipelinesInBatchRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetMetricsRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetMetricsResponseProto; @@ -801,6 +805,18 @@ public int resetDeletedBlockRetryCount(List txIDs) return 0; } + @Nullable + @Override + public DeletedBlocksTransactionSummary getDeletedBlockSummary() throws IOException { + GetDeletedBlocksTxnSummaryRequestProto request = + GetDeletedBlocksTxnSummaryRequestProto.newBuilder().build(); + ScmContainerLocationResponse scmContainerLocationResponse = submitRequest(Type.GetDeletedBlocksTransactionSummary, + builder -> builder.setGetDeletedBlocksTxnSummaryRequest(request)); + GetDeletedBlocksTxnSummaryResponseProto response = + scmContainerLocationResponse.getGetDeletedBlocksTxnSummaryResponse(); + return response.hasSummary() ? response.getSummary() : null; + } + /** * Check if SCM is in safe mode. * diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java index 9eeb69ece3d8..b93626060c80 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java @@ -159,8 +159,9 @@ static RocksDatabase open(File dbFile, ManagedDBOptions dbOptions, List descriptors = null; ManagedRocksDB db = null; final Map columnFamilies = new HashMap<>(); + List extra = null; try { - final List extra = getExtraColumnFamilies(dbFile, families); + extra = getExtraColumnFamilies(dbFile, families); descriptors = Stream.concat(families.stream(), extra.stream()) .map(TableConfig::getDescriptor) .collect(Collectors.toList()); @@ -178,6 +179,10 @@ static RocksDatabase open(File dbFile, ManagedDBOptions dbOptions, } catch (RocksDBException e) { close(columnFamilies, db, descriptors, writeOptions, dbOptions); throw toRocksDatabaseException(RocksDatabase.class, "open " + dbFile, e); + } finally { + if (extra != null) { + extra.forEach(TableConfig::close); + } } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/common/BlockGroup.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/common/BlockGroup.java index 5e8b0e1724be..4ef34193aa2b 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/common/BlockGroup.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/common/BlockGroup.java @@ -20,7 +20,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.KeyBlocks; /** @@ -29,15 +28,16 @@ public final class BlockGroup { private String groupID; - private List blockIDs; + private List deletedBlocks; + public static final long SIZE_NOT_AVAILABLE = -1; - private BlockGroup(String groupID, List blockIDs) { + private BlockGroup(String groupID, List deletedBlocks) { this.groupID = groupID; - this.blockIDs = blockIDs; + this.deletedBlocks = deletedBlocks; } - public List getBlockIDList() { - return blockIDs; + public List getDeletedBlocks() { + return deletedBlocks; } public String getGroupID() { @@ -46,8 +46,10 @@ public String getGroupID() { public KeyBlocks getProto() { KeyBlocks.Builder kbb = KeyBlocks.newBuilder(); - for (BlockID block : blockIDs) { - kbb.addBlocks(block.getProtobuf()); + for (DeletedBlock deletedBlock : deletedBlocks) { + kbb.addBlocks(deletedBlock.getBlockID().getProtobuf()); + kbb.addSize(deletedBlock.getSize()); + kbb.addReplicatedSize(deletedBlock.getReplicatedSize()); } return kbb.setKey(groupID).build(); } @@ -58,13 +60,23 @@ public KeyBlocks getProto() { * @return a group of blocks. */ public static BlockGroup getFromProto(KeyBlocks proto) { - List blockIDs = new ArrayList<>(); - for (HddsProtos.BlockID block : proto.getBlocksList()) { - blockIDs.add(new BlockID(block.getContainerBlockID().getContainerID(), - block.getContainerBlockID().getLocalID())); + List deletedBlocksList = new ArrayList<>(); + for (int i = 0; i < proto.getBlocksCount(); i++) { + long repSize = SIZE_NOT_AVAILABLE; + long size = SIZE_NOT_AVAILABLE; + if (proto.getSizeCount() > i) { + size = proto.getSize(i); + } + if (proto.getReplicatedSizeCount() > i) { + repSize = proto.getReplicatedSize(i); + } + BlockID block = new BlockID(proto.getBlocks(i).getContainerBlockID().getContainerID(), + proto.getBlocks(i).getContainerBlockID().getLocalID()); + deletedBlocksList.add(new DeletedBlock(block, size, repSize)); } return BlockGroup.newBuilder().setKeyName(proto.getKey()) - .addAllBlockIDs(blockIDs).build(); + .addAllDeletedBlocks(deletedBlocksList) + .build(); } public static Builder newBuilder() { @@ -75,7 +87,7 @@ public static Builder newBuilder() { public String toString() { return "BlockGroup[" + "groupID='" + groupID + '\'' + - ", blockIDs=" + blockIDs + + ", deletedBlocks=" + deletedBlocks + ']'; } @@ -85,21 +97,20 @@ public String toString() { public static class Builder { private String groupID; - private List blockIDs; + private List deletedBlocks; public Builder setKeyName(String blockGroupID) { this.groupID = blockGroupID; return this; } - public Builder addAllBlockIDs(List keyBlocks) { - this.blockIDs = keyBlocks; + public Builder addAllDeletedBlocks(List deletedBlockList) { + this.deletedBlocks = deletedBlockList; return this; } public BlockGroup build() { - return new BlockGroup(groupID, blockIDs); + return new BlockGroup(groupID, deletedBlocks); } } - } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/common/DeletedBlock.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/common/DeletedBlock.java new file mode 100644 index 000000000000..b611541578ea --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/ozone/common/DeletedBlock.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.common; + +import org.apache.hadoop.hdds.client.BlockID; + +/** + * DeletedBlock of Ozone (BlockID + usedBytes). + */ +public class DeletedBlock { + + private BlockID blockID; + private long size; + private long replicatedSize; + + public DeletedBlock(BlockID blockID, long size, long replicatedSize) { + this.blockID = blockID; + this.size = size; + this.replicatedSize = replicatedSize; + } + + public BlockID getBlockID() { + return this.blockID; + } + + public long getSize() { + return this.size; + } + + public long getReplicatedSize() { + return this.replicatedSize; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(64); + sb.append(" localID: ").append(blockID.getContainerBlockID().getLocalID()); + sb.append(" containerID: ").append(blockID.getContainerBlockID().getContainerID()); + sb.append(" size: ").append(size); + sb.append(" replicatedSize: ").append(replicatedSize); + return sb.toString(); + } +} diff --git a/hadoop-hdds/hadoop-dependency-client/pom.xml b/hadoop-hdds/hadoop-dependency-client/pom.xml index 747518f7960e..980d02531ad1 100644 --- a/hadoop-hdds/hadoop-dependency-client/pom.xml +++ b/hadoop-hdds/hadoop-dependency-client/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT pom Apache Ozone HDDS Hadoop Client dependencies Apache Ozone Distributed Data Store Hadoop client dependencies diff --git a/hadoop-hdds/interface-admin/pom.xml b/hadoop-hdds/interface-admin/pom.xml index 0d0dedb35c5b..e6887b955da4 100644 --- a/hadoop-hdds/interface-admin/pom.xml +++ b/hadoop-hdds/interface-admin/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-interface-admin - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Admin Interface Apache Ozone Distributed Data Store Admin interface diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index 3dfdea4c7324..f80a50a3be97 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -86,6 +86,7 @@ message ScmContainerLocationRequest { optional GetMetricsRequestProto getMetricsRequest = 47; optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48; optional ReconcileContainerRequestProto reconcileContainerRequest = 49; + optional GetDeletedBlocksTxnSummaryRequestProto getDeletedBlocksTxnSummaryRequest = 50; } message ScmContainerLocationResponse { @@ -143,6 +144,7 @@ message ScmContainerLocationResponse { optional GetMetricsResponseProto getMetricsResponse = 47; optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48; optional ReconcileContainerResponseProto reconcileContainerResponse = 49; + optional GetDeletedBlocksTxnSummaryResponseProto getDeletedBlocksTxnSummaryResponse = 50; enum Status { OK = 1; @@ -199,6 +201,7 @@ enum Type { GetMetrics = 43; GetContainerBalancerStatusInfo = 44; ReconcileContainer = 45; + GetDeletedBlocksTransactionSummary = 46; } /** @@ -545,6 +548,13 @@ message ResetDeletedBlockRetryCountResponseProto { required int32 resetCount = 1; } +message GetDeletedBlocksTxnSummaryRequestProto { +} + +message GetDeletedBlocksTxnSummaryResponseProto { + optional DeletedBlocksTransactionSummary summary = 1; +} + message FinalizeScmUpgradeRequestProto { required string upgradeClientId = 1; } diff --git a/hadoop-hdds/interface-client/pom.xml b/hadoop-hdds/interface-client/pom.xml index 18d02e29225a..d6e7c00fe0ed 100644 --- a/hadoop-hdds/interface-client/pom.xml +++ b/hadoop-hdds/interface-client/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-interface-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Client Interface Apache Ozone Distributed Data Store Client interface diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index 504f1a7ebdf1..284124696d6b 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -514,6 +514,14 @@ message DeletedBlocksTransactionInfo { optional int32 count = 4; } +message DeletedBlocksTransactionSummary { + optional int64 firstTxID = 1; // starting ID of transaction to be counted into summary + optional uint64 totalTransactionCount = 2; + optional uint64 totalBlockCount = 3; + optional uint64 totalBlockSize = 4; + optional uint64 totalBlockReplicatedSize = 5; +} + message CompactionFileInfoProto { optional string fileName = 1; optional string startKey = 2; diff --git a/hadoop-hdds/interface-server/pom.xml b/hadoop-hdds/interface-server/pom.xml index 90c462658d8e..d6a6353d9b2f 100644 --- a/hadoop-hdds/interface-server/pom.xml +++ b/hadoop-hdds/interface-server/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-interface-server - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Server Interface Apache Ozone Distributed Data Store Server interface diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto index e48ed4d1c595..e4b3a2de56fd 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto @@ -366,9 +366,9 @@ message DeletedBlocksTransaction { required int64 txID = 1; required int64 containerID = 2; repeated int64 localID = 3; - // the retry time of sending deleting command to datanode. - // We don't have to store the retry count in DB. - optional int32 count = 4 [deprecated=true]; + optional int32 count = 4; + optional uint64 totalBlockSize = 5; + optional uint64 totalBlockReplicatedSize = 6; } // ACK message datanode sent to SCM, contains the result of diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto index fc24d2562f9c..4c794fe7dc18 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto @@ -181,6 +181,8 @@ message DeleteScmKeyBlocksRequestProto { message KeyBlocks { required string key = 1; repeated BlockID blocks = 2; + repeated uint64 size = 3; + repeated uint64 replicatedSize = 4; } /** diff --git a/hadoop-hdds/managed-rocksdb/pom.xml b/hadoop-hdds/managed-rocksdb/pom.xml index c1c4685df40f..5e6976500f96 100644 --- a/hadoop-hdds/managed-rocksdb/pom.xml +++ b/hadoop-hdds/managed-rocksdb/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-managed-rocksdb - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Managed RocksDB Apache Ozone Managed RocksDB library diff --git a/hadoop-hdds/pom.xml b/hadoop-hdds/pom.xml index 220d1fe40917..ba60486c1463 100644 --- a/hadoop-hdds/pom.xml +++ b/hadoop-hdds/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone ozone-main - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT pom Apache Ozone HDDS Apache Ozone Distributed Data Store Project diff --git a/hadoop-hdds/rocks-native/pom.xml b/hadoop-hdds/rocks-native/pom.xml index 0c7e8fa7e2da..74fdb749d252 100644 --- a/hadoop-hdds/rocks-native/pom.xml +++ b/hadoop-hdds/rocks-native/pom.xml @@ -17,7 +17,7 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-rocks-native Apache Ozone HDDS RocksDB Tools diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/pom.xml b/hadoop-hdds/rocksdb-checkpoint-differ/pom.xml index 345be1b9fa82..e991b8702990 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/pom.xml +++ b/hadoop-hdds/rocksdb-checkpoint-differ/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT rocksdb-checkpoint-differ - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Checkpoint Differ for RocksDB Apache Ozone Checkpoint Differ for RocksDB diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/compaction/log/CompactionFileInfo.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/compaction/log/CompactionFileInfo.java index e44c2e8522e1..535bf115ea8e 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/compaction/log/CompactionFileInfo.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/compaction/log/CompactionFileInfo.java @@ -22,6 +22,7 @@ import java.util.Objects; import org.apache.hadoop.hdds.StringUtils; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.ozone.rocksdb.util.SstFileInfo; import org.rocksdb.LiveFileMetaData; /** diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/compaction/log/SstFileInfo.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdb/util/SstFileInfo.java similarity index 93% rename from hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/compaction/log/SstFileInfo.java rename to hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdb/util/SstFileInfo.java index b1887ec3d1e0..50f8c4c54d06 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/compaction/log/SstFileInfo.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdb/util/SstFileInfo.java @@ -15,7 +15,9 @@ * limitations under the License. */ -package org.apache.ozone.compaction.log; +package org.apache.ozone.rocksdb.util; + +import static org.apache.commons.io.FilenameUtils.getBaseName; import java.util.Objects; import org.apache.hadoop.hdds.StringUtils; @@ -39,7 +41,7 @@ public SstFileInfo(String fileName, String startRange, String endRange, String c } public SstFileInfo(LiveFileMetaData fileMetaData) { - this(fileMetaData.fileName(), StringUtils.bytes2String(fileMetaData.smallestKey()), + this(getBaseName(fileMetaData.fileName()), StringUtils.bytes2String(fileMetaData.smallestKey()), StringUtils.bytes2String(fileMetaData.largestKey()), StringUtils.bytes2String(fileMetaData.columnFamilyName())); } diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/CompactionNode.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/CompactionNode.java index 7dddb6a3b77b..969c0e0b00ed 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/CompactionNode.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/CompactionNode.java @@ -17,20 +17,17 @@ package org.apache.ozone.rocksdiff; +import java.util.Objects; import org.apache.ozone.compaction.log.CompactionFileInfo; +import org.apache.ozone.rocksdb.util.SstFileInfo; /** * Node in the compaction DAG that represents an SST file. */ -public class CompactionNode { - // Name of the SST file - private final String fileName; +public class CompactionNode extends SstFileInfo { private final long snapshotGeneration; private final long totalNumberOfKeys; private long cumulativeKeysReverseTraversal; - private final String startKey; - private final String endKey; - private final String columnFamily; /** * CompactionNode constructor. @@ -38,13 +35,10 @@ public class CompactionNode { * @param seqNum Snapshot generation (sequence number) */ public CompactionNode(String file, long seqNum, String startKey, String endKey, String columnFamily) { - fileName = file; + super(file, startKey, endKey, columnFamily); totalNumberOfKeys = 0L; snapshotGeneration = seqNum; cumulativeKeysReverseTraversal = 0L; - this.startKey = startKey; - this.endKey = endKey; - this.columnFamily = columnFamily; } public CompactionNode(CompactionFileInfo compactionFileInfo) { @@ -54,11 +48,7 @@ public CompactionNode(CompactionFileInfo compactionFileInfo) { @Override public String toString() { - return String.format("Node{%s}", fileName); - } - - public String getFileName() { - return fileName; + return String.format("Node{%s}", getFileName()); } public long getSnapshotGeneration() { @@ -73,18 +63,6 @@ public long getCumulativeKeysReverseTraversal() { return cumulativeKeysReverseTraversal; } - public String getStartKey() { - return startKey; - } - - public String getEndKey() { - return endKey; - } - - public String getColumnFamily() { - return columnFamily; - } - public void setCumulativeKeysReverseTraversal( long cumulativeKeysReverseTraversal) { this.cumulativeKeysReverseTraversal = cumulativeKeysReverseTraversal; @@ -93,4 +71,16 @@ public void setCumulativeKeysReverseTraversal( public void addCumulativeKeysReverseTraversal(long diff) { this.cumulativeKeysReverseTraversal += diff; } + + // Not changing previous behaviour. + @Override + public final boolean equals(Object o) { + return this == o; + } + + // Having hashcode only on the basis of the filename. + @Override + public int hashCode() { + return Objects.hash(getFileName()); + } } diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDiffUtils.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDiffUtils.java index 5bc14b1d9497..86577147b62b 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDiffUtils.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDiffUtils.java @@ -30,6 +30,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; import org.apache.ozone.compaction.log.CompactionFileInfo; +import org.apache.ozone.rocksdb.util.SstFileInfo; import org.rocksdb.LiveFileMetaData; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -106,7 +107,7 @@ public static void filterRelevantSstFiles(Set inputFiles, } @VisibleForTesting - static boolean shouldSkipNode(CompactionNode node, + static boolean shouldSkipNode(SstFileInfo node, Map columnFamilyToPrefixMap) { // This is for backward compatibility. Before the compaction log table // migration, startKey, endKey and columnFamily information is not persisted diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/compaction/log/TestSstFileInfo.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/compaction/log/TestSstFileInfo.java new file mode 100644 index 000000000000..660e3e75a1d7 --- /dev/null +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/compaction/log/TestSstFileInfo.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ozone.compaction.log; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import org.apache.hadoop.hdds.StringUtils; +import org.apache.ozone.rocksdb.util.SstFileInfo; +import org.junit.jupiter.api.Test; +import org.rocksdb.LiveFileMetaData; + +/** + * Test class for Base SstFileInfo class. + */ +public class TestSstFileInfo { + + @Test + public void testSstFileInfo() { + String smallestKey = "/smallestKey/1"; + String largestKey = "/largestKey/2"; + String columnFamily = "columnFamily/123"; + LiveFileMetaData lfm = mock(LiveFileMetaData.class); + when(lfm.fileName()).thenReturn("/1.sst"); + when(lfm.columnFamilyName()).thenReturn(StringUtils.string2Bytes(columnFamily)); + when(lfm.smallestKey()).thenReturn(StringUtils.string2Bytes(smallestKey)); + when(lfm.largestKey()).thenReturn(StringUtils.string2Bytes(largestKey)); + SstFileInfo expectedSstFileInfo = new SstFileInfo("1", smallestKey, largestKey, columnFamily); + assertEquals(expectedSstFileInfo, new SstFileInfo(lfm)); + } +} diff --git a/hadoop-hdds/server-scm/pom.xml b/hadoop-hdds/server-scm/pom.xml index 811d439f7dd8..68c17ecdf3ab 100644 --- a/hadoop-hdds/server-scm/pom.xml +++ b/hadoop-hdds/server-scm/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-server-scm - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS SCM Server Apache Ozone Distributed Data Store Storage Container Manager Server diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java index 9b46968424cc..6b0136abf664 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.ozone.common.BlockGroup; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,13 +93,13 @@ public BlockManagerImpl(final ConfigurationSource conf, this.writableContainerFactory = scm.getWritableContainerFactory(); mxBean = MBeans.register("BlockManager", "BlockManagerImpl", this); - metrics = ScmBlockDeletingServiceMetrics.create(); + metrics = ScmBlockDeletingServiceMetrics.create(this); // SCM block deleting transaction log and deleting service. deletedBlockLog = new DeletedBlockLogImpl(conf, scm, scm.getContainerManager(), - scm.getScmHAManager().getDBTransactionBuffer(), + scm.getScmHAManager().asSCMHADBTransactionBuffer(), metrics); @@ -219,21 +220,20 @@ public void deleteBlocks(List keyBlocksInfoList) throw new SCMException("SafeModePrecheck failed for deleteBlocks", SCMException.ResultCodes.SAFE_MODE_EXCEPTION); } - Map> containerBlocks = new HashMap<>(); - // TODO: track the block size info so that we can reclaim the container - // TODO: used space when the block is deleted. + Map> containerBlocks = new HashMap<>(); for (BlockGroup bg : keyBlocksInfoList) { if (LOG.isDebugEnabled()) { LOG.debug("Deleting blocks {}", - StringUtils.join(",", bg.getBlockIDList())); + StringUtils.join(",", bg.getDeletedBlocks())); } - for (BlockID block : bg.getBlockIDList()) { + for (DeletedBlock deletedBlock : bg.getDeletedBlocks()) { + BlockID block = deletedBlock.getBlockID(); long containerID = block.getContainerID(); if (containerBlocks.containsKey(containerID)) { - containerBlocks.get(containerID).add(block.getLocalID()); + containerBlocks.get(containerID).add(deletedBlock); } else { - List item = new ArrayList<>(); - item.add(block.getLocalID()); + List item = new ArrayList<>(); + item.add(deletedBlock); containerBlocks.put(containerID, item); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java index b1283ef773c9..63ab44de346a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java @@ -17,6 +17,8 @@ package org.apache.hadoop.hdds.scm.block; +import com.google.protobuf.ByteString; +import jakarta.annotation.Nullable; import java.io.Closeable; import java.io.IOException; import java.util.List; @@ -24,8 +26,10 @@ import java.util.Set; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeID; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; /** @@ -100,7 +104,7 @@ void recordTransactionCreated( * @param containerBlocksMap a map of containerBlocks. * @throws IOException */ - void addTransactions(Map> containerBlocksMap) + void addTransactions(Map> containerBlocksMap) throws IOException; /** @@ -115,8 +119,13 @@ void addTransactions(Map> containerBlocksMap) /** * Reinitialize the delete log from the db. * @param deletedBlocksTXTable delete transaction table + * @param statefulConfigTable stateful service config table */ - void reinitialize(Table deletedBlocksTXTable); + void reinitialize(Table deletedBlocksTXTable, + Table statefulConfigTable) throws IOException; int getTransactionToDNsCommitMapSize(); + + @Nullable + DeletedBlocksTransactionSummary getTransactionSummary(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java index 9418a8be024d..21fdd9cd0f49 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java @@ -23,6 +23,7 @@ import static org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator.DEL_TXN_ID; import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.ByteString; import java.io.IOException; import java.time.Duration; import java.util.ArrayList; @@ -36,6 +37,7 @@ import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeID; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; @@ -49,12 +51,15 @@ import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; import org.apache.hadoop.hdds.scm.ha.SCMContext; +import org.apache.hadoop.hdds.scm.ha.SCMHADBTransactionBuffer; import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; -import org.apache.hadoop.hdds.scm.metadata.DBTransactionBuffer; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.server.events.EventHandler; import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.common.DeletedBlock; +import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -83,8 +88,7 @@ public class DeletedBlockLogImpl private final SCMContext scmContext; private final SequenceIdGenerator sequenceIdGen; private final ScmBlockDeletingServiceMetrics metrics; - private final SCMDeletedBlockTransactionStatusManager - transactionStatusManager; + private SCMDeletedBlockTransactionStatusManager transactionStatusManager; private long scmCommandTimeoutMs = Duration.ofSeconds(300).toMillis(); private long lastProcessedTransactionId = -1; @@ -94,8 +98,8 @@ public class DeletedBlockLogImpl public DeletedBlockLogImpl(ConfigurationSource conf, StorageContainerManager scm, ContainerManager containerManager, - DBTransactionBuffer dbTxBuffer, - ScmBlockDeletingServiceMetrics metrics) { + SCMHADBTransactionBuffer dbTxBuffer, + ScmBlockDeletingServiceMetrics metrics) throws IOException { this.containerManager = containerManager; this.lock = new ReentrantLock(); @@ -106,12 +110,14 @@ public DeletedBlockLogImpl(ConfigurationSource conf, .setContainerManager(containerManager) .setRatisServer(scm.getScmHAManager().getRatisServer()) .setSCMDBTransactionBuffer(dbTxBuffer) + .setStatefulConfigTable(scm.getScmMetadataStore().getStatefulServiceConfigTable()) .build(); this.scmContext = scm.getScmContext(); this.sequenceIdGen = scm.getSequenceIdGen(); this.metrics = metrics; this.transactionStatusManager = new SCMDeletedBlockTransactionStatusManager(deletedBlockLogStateManager, + scm.getScmMetadataStore().getStatefulServiceConfigTable(), containerManager, metrics, scmCommandTimeoutMs); int limit = (int) conf.getStorageSize( ScmConfigKeys.OZONE_SCM_HA_RAFT_LOG_APPENDER_QUEUE_BYTE_LIMIT, @@ -125,7 +131,7 @@ public DeletedBlockLogImpl(ConfigurationSource conf, } @VisibleForTesting - void setDeletedBlockLogStateManager(DeletedBlockLogStateManager manager) { + public void setDeletedBlockLogStateManager(DeletedBlockLogStateManager manager) { this.deletedBlockLogStateManager = manager; } @@ -133,6 +139,10 @@ void setDeletedBlockLogStateManager(DeletedBlockLogStateManager manager) { void setDeleteBlocksFactorPerDatanode(int deleteBlocksFactorPerDatanode) { this.deletionFactorPerDatanode = deleteBlocksFactorPerDatanode; } + + public DeletedBlockLogStateManager getDeletedBlockLogStateManager() { + return deletedBlockLogStateManager; + } /** * {@inheritDoc} @@ -147,13 +157,23 @@ public void incrementCount(List txIDs) } private DeletedBlocksTransaction constructNewTransaction( - long txID, long containerID, List blocks) { - return DeletedBlocksTransaction.newBuilder() + long txID, long containerID, List blocks) { + List localIdList = blocks.stream().map(b -> b.getBlockID().getLocalID()).collect(Collectors.toList()); + DeletedBlocksTransaction.Builder builder = DeletedBlocksTransaction.newBuilder() .setTxID(txID) .setContainerID(containerID) - .addAllLocalID(blocks) - .setCount(0) - .build(); + .addAllLocalID(localIdList) + .setCount(0); + + if (VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION)) { + long replicatedSize = blocks.stream().mapToLong(DeletedBlock::getReplicatedSize).sum(); + // even when HDDSLayoutFeature.DATA_DISTRIBUTION is finalized, old OM can still call the old API + if (replicatedSize >= 0) { + builder.setTotalBlockReplicatedSize(replicatedSize); + builder.setTotalBlockSize(blocks.stream().mapToLong(DeletedBlock::getSize).sum()); + } + } + return builder.build(); } @Override @@ -176,11 +196,13 @@ public int getNumOfValidTransactions() throws IOException { @Override public void reinitialize( - Table deletedTable) { + Table deletedTable, Table statefulConfigTable) + throws IOException { // we don't need to handle SCMDeletedBlockTransactionStatusManager and // deletedBlockLogStateManager, since they will be cleared // when becoming leader. - deletedBlockLogStateManager.reinitialize(deletedTable); + deletedBlockLogStateManager.reinitialize(deletedTable, statefulConfigTable); + transactionStatusManager.reinitialize(statefulConfigTable); } /** @@ -206,13 +228,13 @@ public void onFlush() { * @throws IOException */ @Override - public void addTransactions(Map> containerBlocksMap) + public void addTransactions(Map> containerBlocksMap) throws IOException { lock.lock(); try { ArrayList txsToBeAdded = new ArrayList<>(); long currentBatchSizeBytes = 0; - for (Map.Entry< Long, List< Long > > entry : + for (Map.Entry> entry : containerBlocksMap.entrySet()) { long nextTXID = sequenceIdGen.getNextId(DEL_TXN_ID); DeletedBlocksTransaction tx = constructNewTransaction(nextTXID, @@ -222,14 +244,14 @@ public void addTransactions(Map> containerBlocksMap) currentBatchSizeBytes += txSize; if (currentBatchSizeBytes >= logAppenderQueueByteLimit) { - deletedBlockLogStateManager.addTransactionsToDB(txsToBeAdded); + transactionStatusManager.addTransactions(txsToBeAdded); metrics.incrBlockDeletionTransactionCreated(txsToBeAdded.size()); txsToBeAdded.clear(); currentBatchSizeBytes = 0; } } if (!txsToBeAdded.isEmpty()) { - deletedBlockLogStateManager.addTransactionsToDB(txsToBeAdded); + transactionStatusManager.addTransactions(txsToBeAdded); metrics.incrBlockDeletionTransactionCreated(txsToBeAdded.size()); } } finally { @@ -367,6 +389,12 @@ public DatanodeDeletedBlockTransactions getTransactions( DeletedBlocksTransaction txn = keyValue.getValue(); final ContainerID id = ContainerID.valueOf(txn.getContainerID()); final ContainerInfo container = containerManager.getContainer(id); + if (VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION) && + txn.hasTotalBlockReplicatedSize()) { + transactionStatusManager.getTxSizeMap().put(txn.getTxID(), + new SCMDeletedBlockTransactionStatusManager.TxBlockInfo(txn.getLocalIDCount(), + txn.getTotalBlockSize(), txn.getTotalBlockReplicatedSize())); + } try { // HDDS-7126. When container is under replicated, it is possible // that container is deleted, but transactions are not deleted. @@ -411,6 +439,7 @@ public DatanodeDeletedBlockTransactions getTransactions( deletedBlockLogStateManager.removeTransactionsFromDB(txIDs); getSCMDeletedBlockTransactionStatusManager().removeTransactionFromDNsCommitMap(txIDs); getSCMDeletedBlockTransactionStatusManager().removeTransactionFromDNsRetryCountMap(txIDs); + transactionStatusManager.removeTransactions(txIDs); metrics.incrBlockDeletionTransactionCompleted(txIDs.size()); } } @@ -430,6 +459,11 @@ public void setScmCommandTimeoutMs(long scmCommandTimeoutMs) { return transactionStatusManager; } + @VisibleForTesting + public void setSCMDeletedBlockTransactionStatusManager(SCMDeletedBlockTransactionStatusManager manager) { + this.transactionStatusManager = manager; + } + @Override public void recordTransactionCreated(DatanodeID dnId, long scmCmdId, Set dnTxSet) { @@ -442,6 +476,11 @@ public int getTransactionToDNsCommitMapSize() { return getSCMDeletedBlockTransactionStatusManager().getTransactionToDNsCommitMapSize(); } + @Override + public DeletedBlocksTransactionSummary getTransactionSummary() { + return transactionStatusManager.getTransactionSummary(); + } + @Override public void onDatanodeDead(DatanodeID dnId) { getSCMDeletedBlockTransactionStatusManager().onDatanodeDead(dnId); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java index 060b07bbdf93..f22718ce9ef2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java @@ -17,8 +17,10 @@ package org.apache.hadoop.hdds.scm.block; +import com.google.protobuf.ByteString; import java.io.IOException; import java.util.ArrayList; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.scm.metadata.Replicate; import org.apache.hadoop.hdds.utils.db.Table; @@ -29,7 +31,14 @@ */ public interface DeletedBlockLogStateManager { @Replicate - void addTransactionsToDB(ArrayList txs) + void addTransactionsToDB(ArrayList txs, + DeletedBlocksTransactionSummary summary) throws IOException; + + @Replicate + void addTransactionsToDB(ArrayList txs) throws IOException; + + @Replicate + void removeTransactionsFromDB(ArrayList txIDs, DeletedBlocksTransactionSummary summary) throws IOException; @Replicate @@ -49,7 +58,10 @@ int resetRetryCountOfTransactionInDB(ArrayList txIDs) Table.KeyValueIterator getReadOnlyIterator() throws IOException; + ArrayList getTransactionsFromDB(ArrayList txIDs) throws IOException; + void onFlush(); - void reinitialize(Table deletedBlocksTXTable); + void reinitialize(Table deletedBlocksTXTable, + Table statefulConfigTable); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java index b6976c3c3f38..533bd59ba2d4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java @@ -18,20 +18,23 @@ package org.apache.hadoop.hdds.scm.block; import com.google.common.base.Preconditions; +import com.google.protobuf.ByteString; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; +import org.apache.hadoop.hdds.scm.ha.SCMHADBTransactionBuffer; import org.apache.hadoop.hdds.scm.ha.SCMRatisServer; -import org.apache.hadoop.hdds.scm.metadata.DBTransactionBuffer; import org.apache.hadoop.hdds.utils.db.CodecException; import org.apache.hadoop.hdds.utils.db.RocksDatabaseException; import org.apache.hadoop.hdds.utils.db.Table; @@ -50,17 +53,23 @@ public class DeletedBlockLogStateManagerImpl LoggerFactory.getLogger(DeletedBlockLogStateManagerImpl.class); private Table deletedTable; + private Table statefulConfigTable; private ContainerManager containerManager; - private final DBTransactionBuffer transactionBuffer; + private final SCMHADBTransactionBuffer transactionBuffer; private final Set deletingTxIDs; + private final Set skippingRetryTxIDs; + public static final String SERVICE_NAME = DeletedBlockLogStateManager.class.getSimpleName(); public DeletedBlockLogStateManagerImpl(ConfigurationSource conf, Table deletedTable, - ContainerManager containerManager, DBTransactionBuffer txBuffer) { + Table statefulServiceConfigTable, + ContainerManager containerManager, SCMHADBTransactionBuffer txBuffer) { this.deletedTable = deletedTable; this.containerManager = containerManager; this.transactionBuffer = txBuffer; this.deletingTxIDs = ConcurrentHashMap.newKeySet(); + this.skippingRetryTxIDs = ConcurrentHashMap.newKeySet(); + this.statefulConfigTable = statefulServiceConfigTable; } @Override @@ -139,8 +148,26 @@ public void removeFromDB() { } @Override - public void addTransactionsToDB(ArrayList txs) - throws IOException { + public void addTransactionsToDB(ArrayList txs, + DeletedBlocksTransactionSummary summary) throws IOException { + Map containerIdToTxnIdMap = new HashMap<>(); + transactionBuffer.pauseAutoFlush(); + try { + for (DeletedBlocksTransaction tx : txs) { + long tid = tx.getTxID(); + containerIdToTxnIdMap.compute(ContainerID.valueOf(tx.getContainerID()), + (k, v) -> v != null && v > tid ? v : tid); + transactionBuffer.addToBuffer(deletedTable, tx.getTxID(), tx); + } + transactionBuffer.addToBuffer(statefulConfigTable, SERVICE_NAME, summary.toByteString()); + } finally { + transactionBuffer.resumeAutoFlush(); + } + containerManager.updateDeleteTransactionId(containerIdToTxnIdMap); + } + + @Override + public void addTransactionsToDB(ArrayList txs) throws IOException { Map containerIdToTxnIdMap = new HashMap<>(); for (DeletedBlocksTransaction tx : txs) { long tid = tx.getTxID(); @@ -152,11 +179,27 @@ public void addTransactionsToDB(ArrayList txs) } @Override - public void removeTransactionsFromDB(ArrayList txIDs) + public void removeTransactionsFromDB(ArrayList txIDs, DeletedBlocksTransactionSummary summary) throws IOException { if (deletingTxIDs != null) { deletingTxIDs.addAll(txIDs); } + transactionBuffer.pauseAutoFlush(); + try { + for (Long txID : txIDs) { + transactionBuffer.removeFromBuffer(deletedTable, txID); + } + transactionBuffer.addToBuffer(statefulConfigTable, SERVICE_NAME, summary.toByteString()); + } finally { + transactionBuffer.resumeAutoFlush(); + } + } + + @Override + public void removeTransactionsFromDB(ArrayList txIDs) throws IOException { + if (deletingTxIDs != null) { + deletingTxIDs.addAll(txIDs); + } for (Long txID : txIDs) { transactionBuffer.removeFromBuffer(deletedTable, txID); } @@ -181,6 +224,27 @@ public int resetRetryCountOfTransactionInDB(ArrayList txIDs) return 0; } + @Override + public ArrayList getTransactionsFromDB(ArrayList txIDs) throws IOException { + Objects.requireNonNull(txIDs, "txIds cannot be null."); + ArrayList transactions = new ArrayList<>(); + for (long txId: txIDs) { + try { + DeletedBlocksTransaction transaction = deletedTable.get(txId); + if (transaction == null) { + LOG.debug("txId {} is not found in deletedTable.", txId); + continue; + } + transactions.add(transaction); + } catch (IOException ex) { + LOG.error("Could not get deleted block transaction {}.", txId, ex); + throw ex; + } + } + LOG.debug("Get {} DeletedBlocksTransactions for {} input txIDs", transactions.size(), txIDs.size()); + return transactions; + } + @Override public void onFlush() { // onFlush() can be invoked only when ratis is enabled. @@ -190,7 +254,7 @@ public void onFlush() { @Override public void reinitialize( - Table deletedBlocksTXTable) { + Table deletedBlocksTXTable, Table configTable) { // Before Reinitialization, flush will be called from Ratis StateMachine. // Just the DeletedDb will be loaded here. @@ -199,6 +263,7 @@ public void reinitialize( // before reinitialization. Just update deletedTable here. Preconditions.checkArgument(deletingTxIDs.isEmpty()); this.deletedTable = deletedBlocksTXTable; + this.statefulConfigTable = configTable; } public static Builder newBuilder() { @@ -211,9 +276,10 @@ public static Builder newBuilder() { public static class Builder { private ConfigurationSource conf; private SCMRatisServer scmRatisServer; - private Table table; - private DBTransactionBuffer transactionBuffer; + private Table deletedBlocksTransactionTable; + private SCMHADBTransactionBuffer transactionBuffer; private ContainerManager containerManager; + private Table statefulServiceConfigTable; public Builder setConfiguration(final ConfigurationSource config) { conf = config; @@ -227,11 +293,11 @@ public Builder setRatisServer(final SCMRatisServer ratisServer) { public Builder setDeletedBlocksTable( final Table deletedBlocksTable) { - table = deletedBlocksTable; + deletedBlocksTransactionTable = deletedBlocksTable; return this; } - public Builder setSCMDBTransactionBuffer(DBTransactionBuffer buffer) { + public Builder setSCMDBTransactionBuffer(SCMHADBTransactionBuffer buffer) { this.transactionBuffer = buffer; return this; } @@ -241,12 +307,17 @@ public Builder setContainerManager(ContainerManager contManager) { return this; } - public DeletedBlockLogStateManager build() { + public Builder setStatefulConfigTable(final Table table) { + this.statefulServiceConfigTable = table; + return this; + } + + public DeletedBlockLogStateManager build() throws IOException { Preconditions.checkNotNull(conf); - Preconditions.checkNotNull(table); + Preconditions.checkNotNull(deletedBlocksTransactionTable); final DeletedBlockLogStateManager impl = new DeletedBlockLogStateManagerImpl( - conf, table, containerManager, transactionBuffer); + conf, deletedBlocksTransactionTable, statefulServiceConfigTable, containerManager, transactionBuffer); return scmRatisServer.getProxyHandler(RequestType.BLOCK, DeletedBlockLogStateManager.class, impl); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java index f6136322f630..09d20afb4a96 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java @@ -18,11 +18,14 @@ package org.apache.hadoop.hdds.scm.block; import static java.lang.Math.min; +import static org.apache.hadoop.hdds.scm.block.DeletedBlockLogStateManagerImpl.SERVICE_NAME; import static org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.SCMDeleteBlocksCommandStatusManager.CmdStatus; import static org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.SCMDeleteBlocksCommandStatusManager.CmdStatus.SENT; import static org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.SCMDeleteBlocksCommandStatusManager.CmdStatus.TO_BE_SENT; import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.ByteString; +import jakarta.annotation.Nullable; import java.io.IOException; import java.time.Duration; import java.time.Instant; @@ -36,15 +39,22 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeID; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto.DeleteBlockTransactionResult; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,6 +71,9 @@ public class SCMDeletedBlockTransactionStatusManager { private final Map> transactionToDNsCommitMap; // Maps txId to its retry counts; private final Map transactionToRetryCountMap; + // an in memory map to cache the size of each transaction sending to DN. + private Map txSizeMap; + // The access to DeletedBlocksTXTable is protected by // DeletedBlockLogStateManager. private final DeletedBlockLogStateManager deletedBlockLogStateManager; @@ -68,6 +81,23 @@ public class SCMDeletedBlockTransactionStatusManager { private final ScmBlockDeletingServiceMetrics metrics; private final long scmCommandTimeoutMs; + private Table statefulConfigTable; + public static final HddsProtos.DeletedBlocksTransactionSummary EMPTY_SUMMARY = + HddsProtos.DeletedBlocksTransactionSummary.newBuilder() + .setFirstTxID(Long.MAX_VALUE) + .setTotalTransactionCount(0) + .setTotalBlockCount(0) + .setTotalBlockSize(0) + .setTotalBlockReplicatedSize(0) + .build(); + private final AtomicLong totalTxCount = new AtomicLong(0); + private final AtomicLong totalBlockCount = new AtomicLong(0); + private final AtomicLong totalBlocksSize = new AtomicLong(0); + private final AtomicLong totalReplicatedBlocksSize = new AtomicLong(0); + private long firstTxIdForDataDistribution = Long.MAX_VALUE; + private boolean isFirstTxIdForDataDistributionSet = false; + private static boolean disableDataDistributionForTest; + /** * Before the DeletedBlockTransaction is executed on DN and reported to * SCM, it is managed by this {@link SCMDeleteBlocksCommandStatusManager}. @@ -80,17 +110,21 @@ public class SCMDeletedBlockTransactionStatusManager { public SCMDeletedBlockTransactionStatusManager( DeletedBlockLogStateManager deletedBlockLogStateManager, + Table statefulServiceConfigTable, ContainerManager containerManager, - ScmBlockDeletingServiceMetrics metrics, long scmCommandTimeoutMs) { + ScmBlockDeletingServiceMetrics metrics, long scmCommandTimeoutMs) throws IOException { // maps transaction to dns which have committed it. this.deletedBlockLogStateManager = deletedBlockLogStateManager; + this.statefulConfigTable = statefulServiceConfigTable; this.metrics = metrics; this.containerManager = containerManager; this.scmCommandTimeoutMs = scmCommandTimeoutMs; this.transactionToDNsCommitMap = new ConcurrentHashMap<>(); this.transactionToRetryCountMap = new ConcurrentHashMap<>(); + this.txSizeMap = new ConcurrentHashMap<>(); this.scmDeleteBlocksCommandStatusManager = new SCMDeleteBlocksCommandStatusManager(metrics); + this.initDataDistributionData(); } /** @@ -392,6 +426,7 @@ public void clear() { transactionToRetryCountMap.clear(); scmDeleteBlocksCommandStatusManager.clear(); transactionToDNsCommitMap.clear(); + txSizeMap.clear(); } public void cleanAllTimeoutSCMCommand(long timeoutMs) { @@ -415,6 +450,76 @@ private boolean alreadyExecuted(DatanodeID dnId, long txId) { .contains(dnId); } + @VisibleForTesting + public void addTransactions(ArrayList txList) throws IOException { + if (txList.isEmpty()) { + return; + } + if (VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION) && + !disableDataDistributionForTest) { + for (DeletedBlocksTransaction tx: txList) { + if (tx.hasTotalBlockSize()) { + if (!isFirstTxIdForDataDistributionSet) { + // set the first transaction ID for data distribution + isFirstTxIdForDataDistributionSet = true; + firstTxIdForDataDistribution = tx.getTxID(); + } + incrDeletedBlocksSummary(tx); + } + } + deletedBlockLogStateManager.addTransactionsToDB(txList, getSummary()); + return; + } + deletedBlockLogStateManager.addTransactionsToDB(txList); + } + + private void incrDeletedBlocksSummary(DeletedBlocksTransaction tx) { + totalTxCount.addAndGet(1); + totalBlockCount.addAndGet(tx.getLocalIDCount()); + totalBlocksSize.addAndGet(tx.getTotalBlockSize()); + totalReplicatedBlocksSize.addAndGet(tx.getTotalBlockReplicatedSize()); + } + + @VisibleForTesting + public void removeTransactions(ArrayList txIDs) throws IOException { + if (txIDs.isEmpty()) { + return; + } + if (VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION) && + !disableDataDistributionForTest) { + ArrayList txToQueryList = new ArrayList<>(); + for (Long txID: txIDs) { + if (txID >= firstTxIdForDataDistribution) { + TxBlockInfo txBlockInfo = txSizeMap.remove(txID); + if (txBlockInfo != null) { + // txBlockInfosToBeDeleted.add(txBlockInfo); + descDeletedBlocksSummary(txBlockInfo); + metrics.incrBlockDeletionTransactionSizeFromCache(); + } else { + // Fetch the transaction from DB to get the size. This happens during + // 1. SCM leader transfer, deletion command send by one SCM, + // while the deletion ack received by a different SCM + // 2. SCM restarts, txBlockInfoMap is empty, while receiving the deletion ack from DN + txToQueryList.add(txID); + metrics.incrBlockDeletionTransactionSizeFromDB(); + } + } + } + if (!txToQueryList.isEmpty()) { + ArrayList txList = + deletedBlockLogStateManager.getTransactionsFromDB(txToQueryList); + if (txList.size() != txToQueryList.size()) { + LOG.info("Failed to get all transactions from DB: " + txToQueryList.size() + ", got: " + txList.size()); + } + txList.stream().filter(t -> t.hasTotalBlockSize()).forEach(t -> descDeletedBlocksSummary(t)); + } + deletedBlockLogStateManager.removeTransactionsFromDB(txIDs, getSummary()); + return; + } + + deletedBlockLogStateManager.removeTransactionsFromDB(txIDs); + } + /** * Commits a transaction means to delete all footprints of a transaction * from the log. This method doesn't guarantee all transactions can be @@ -483,7 +588,7 @@ public void commitTransactions(List transactionRes } } try { - deletedBlockLogStateManager.removeTransactionsFromDB(txIDsToBeDeleted); + removeTransactions(txIDsToBeDeleted); metrics.incrBlockDeletionTransactionCompleted(txIDsToBeDeleted.size()); } catch (IOException e) { LOG.warn("Could not commit delete block transactions: " @@ -491,6 +596,30 @@ public void commitTransactions(List transactionRes } } + public DeletedBlocksTransactionSummary getSummary() { + return DeletedBlocksTransactionSummary.newBuilder() + .setFirstTxID(firstTxIdForDataDistribution) + .setTotalTransactionCount(totalTxCount.get()) + .setTotalBlockCount(totalBlockCount.get()) + .setTotalBlockSize(totalBlocksSize.get()) + .setTotalBlockReplicatedSize(totalReplicatedBlocksSize.get()) + .build(); + } + + private void descDeletedBlocksSummary(TxBlockInfo txBlockInfo) { + totalTxCount.addAndGet(-1); + totalBlockCount.addAndGet(-txBlockInfo.getTotalBlockCount()); + totalBlocksSize.addAndGet(-txBlockInfo.getTotalBlockSize()); + totalReplicatedBlocksSize.addAndGet(-txBlockInfo.getTotalReplicatedBlockSize()); + } + + private void descDeletedBlocksSummary(DeletedBlocksTransaction tx) { + totalTxCount.addAndGet(-1); + totalBlockCount.addAndGet(-tx.getLocalIDCount()); + totalBlocksSize.addAndGet(-tx.getTotalBlockSize()); + totalReplicatedBlocksSize.addAndGet(-tx.getTotalBlockReplicatedSize()); + } + @VisibleForTesting void commitSCMCommandStatus(List deleteBlockStatus, DatanodeID dnId) { processSCMCommandStatus(deleteBlockStatus, dnId); @@ -545,4 +674,96 @@ public void removeTransactionFromDNsCommitMap(List txIds) { public void removeTransactionFromDNsRetryCountMap(List txIds) { txIds.forEach(transactionToRetryCountMap::remove); } + + public void reinitialize(Table configTable) throws IOException { + // DB onFlush() will be called before reinitialization. + this.statefulConfigTable = configTable; + this.initDataDistributionData(); + } + + @VisibleForTesting + public Map getTxSizeMap() { + return txSizeMap; + } + + @VisibleForTesting + public static void setDisableDataDistributionForTest(boolean disabled) { + disableDataDistributionForTest = disabled; + } + + @Nullable + public DeletedBlocksTransactionSummary getTransactionSummary() { + if (!VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION)) { + return null; + } + return DeletedBlocksTransactionSummary.newBuilder() + .setFirstTxID(firstTxIdForDataDistribution) + .setTotalTransactionCount(totalTxCount.get()) + .setTotalBlockCount(totalBlockCount.get()) + .setTotalBlockSize(totalBlocksSize.get()) + .setTotalBlockReplicatedSize(totalReplicatedBlocksSize.get()) + .build(); + } + + private void initDataDistributionData() throws IOException { + if (VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION)) { + DeletedBlocksTransactionSummary summary = loadDeletedBlocksSummary(); + if (summary != null) { + isFirstTxIdForDataDistributionSet = true; + firstTxIdForDataDistribution = summary.getFirstTxID(); + totalTxCount.set(summary.getTotalTransactionCount()); + totalBlockCount.set(summary.getTotalBlockCount()); + totalBlocksSize.set(summary.getTotalBlockSize()); + totalReplicatedBlocksSize.set(summary.getTotalBlockReplicatedSize()); + LOG.info("Data distribution is enabled with totalBlockCount {} totalBlocksSize {} lastTxIdBeforeUpgrade {}", + totalBlockCount.get(), totalBlocksSize.get(), firstTxIdForDataDistribution); + } + } else { + LOG.info(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION + " is not finalized"); + } + } + + private DeletedBlocksTransactionSummary loadDeletedBlocksSummary() throws IOException { + String propertyName = DeletedBlocksTransactionSummary.class.getSimpleName(); + try { + ByteString byteString = statefulConfigTable.get(SERVICE_NAME); + if (byteString == null) { + // for a new Ozone cluster, property not found is an expected state. + LOG.info("Property {} for service {} not found. ", propertyName, SERVICE_NAME); + return null; + } + return DeletedBlocksTransactionSummary.parseFrom(byteString); + } catch (IOException e) { + LOG.error("Failed to get property {} for service {}. DataDistribution function will be disabled.", + propertyName, SERVICE_NAME, e); + throw new IOException("Failed to get property " + propertyName, e); + } + } + + /** + * Block size information of a transaction. + */ + public static class TxBlockInfo { + private long totalBlockCount; + private long totalBlockSize; + private long totalReplicatedBlockSize; + + public TxBlockInfo(long blockCount, long blockSize, long replicatedSize) { + this.totalBlockCount = blockCount; + this.totalBlockSize = blockSize; + this.totalReplicatedBlockSize = replicatedSize; + } + + public long getTotalBlockCount() { + return totalBlockCount; + } + + public long getTotalBlockSize() { + return totalBlockSize; + } + + public long getTotalReplicatedBlockSize() { + return totalReplicatedBlockSize; + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java index cbfdddda7ca9..154d2915b95d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java @@ -20,6 +20,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hdds.protocol.DatanodeID; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsRecordBuilder; @@ -45,6 +46,7 @@ public final class ScmBlockDeletingServiceMetrics implements MetricsSource { public static final String SOURCE_NAME = SCMBlockDeletingService.class.getSimpleName(); private final MetricsRegistry registry; + private final BlockManager blockManager; /** * Given all commands are finished and no new coming deletes from OM. @@ -100,15 +102,38 @@ public final class ScmBlockDeletingServiceMetrics implements MetricsSource { private final Map numCommandsDatanode = new ConcurrentHashMap<>(); - private ScmBlockDeletingServiceMetrics() { + @Metric(about = "The number of transactions whose totalBlockSize is fetched from in memory cache") + private MutableGaugeLong numBlockDeletionTransactionSizeFromCache; + + @Metric(about = "The number of transactions whose totalBlockSize is fetched from DB") + private MutableGaugeLong numBlockDeletionTransactionSizeFromDB; + + private static final MetricsInfo NUM_BLOCK_DELETION_TRANSACTIONS = Interns.info( + "numBlockDeletionTransactions", + "The number of transactions in DB."); + + private static final MetricsInfo NUM_BLOCK_OF_ALL_DELETION_TRANSACTIONS = Interns.info( + "numBlockOfAllDeletionTransactions", + "The number of blocks in all transactions in DB."); + + private static final MetricsInfo BLOCK_SIZE_OF_ALL_DELETION_TRANSACTIONS = Interns.info( + "blockSizeOfAllDeletionTransactions", + "The size of all blocks in all transactions in DB."); + + private static final MetricsInfo REPLICATED_BLOCK_SIZE_OF_ALL_DELETION_TRANSACTIONS = Interns.info( + "replicatedBlockSizeOfAllDeletionTransactions", + "The replicated size of all blocks in all transactions in DB."); + + private ScmBlockDeletingServiceMetrics(BlockManager blockManager) { this.registry = new MetricsRegistry(SOURCE_NAME); + this.blockManager = blockManager; } - public static synchronized ScmBlockDeletingServiceMetrics create() { + public static synchronized ScmBlockDeletingServiceMetrics create(BlockManager blockManager) { if (instance == null) { MetricsSystem ms = DefaultMetricsSystem.instance(); instance = ms.register(SOURCE_NAME, "SCMBlockDeletingService", - new ScmBlockDeletingServiceMetrics()); + new ScmBlockDeletingServiceMetrics(blockManager)); } return instance; @@ -163,6 +188,14 @@ public void incrProcessedTransaction() { this.numProcessedTransactions.incr(); } + public void incrBlockDeletionTransactionSizeFromCache() { + this.numBlockDeletionTransactionSizeFromCache.incr(); + } + + public void incrBlockDeletionTransactionSizeFromDB() { + this.numBlockDeletionTransactionSizeFromDB.incr(); + } + public void setNumBlockDeletionTransactionDataNodes(long dataNodes) { this.numBlockDeletionTransactionDataNodes.set(dataNodes); } @@ -240,6 +273,14 @@ public long getNumBlockDeletionTransactionDataNodes() { return numBlockDeletionTransactionDataNodes.value(); } + public long getNumBlockDeletionTransactionSizeFromCache() { + return numBlockDeletionTransactionSizeFromCache.value(); + } + + public long getNumBlockDeletionTransactionSizeFromDB() { + return numBlockDeletionTransactionSizeFromDB.value(); + } + @Override public void getMetrics(MetricsCollector metricsCollector, boolean all) { MetricsRecordBuilder builder = metricsCollector.addRecord(SOURCE_NAME); @@ -256,6 +297,21 @@ public void getMetrics(MetricsCollector metricsCollector, boolean all) { numBlockDeletionTransactionDataNodes.snapshot(builder, all); numBlockAddedForDeletionToDN.snapshot(builder, all); + // add metrics for deleted block transaction summary + HddsProtos.DeletedBlocksTransactionSummary summary = blockManager.getDeletedBlockLog().getTransactionSummary(); + if (summary != null) { + numBlockDeletionTransactionSizeFromCache.snapshot(builder, all); + numBlockDeletionTransactionSizeFromDB.snapshot(builder, all); + builder = builder.endRecord().addRecord(SOURCE_NAME) + .addGauge(NUM_BLOCK_DELETION_TRANSACTIONS, summary.getTotalTransactionCount()); + builder = builder.endRecord().addRecord(SOURCE_NAME) + .addGauge(NUM_BLOCK_OF_ALL_DELETION_TRANSACTIONS, summary.getTotalBlockCount()); + builder = builder.endRecord().addRecord(SOURCE_NAME) + .addGauge(BLOCK_SIZE_OF_ALL_DELETION_TRANSACTIONS, summary.getTotalBlockSize()); + builder = builder.endRecord().addRecord(SOURCE_NAME) + .addGauge(REPLICATED_BLOCK_SIZE_OF_ALL_DELETION_TRANSACTIONS, summary.getTotalBlockReplicatedSize()); + } + MetricsRecordBuilder recordBuilder = builder; for (Map.Entry e : numCommandsDatanode.entrySet()) { recordBuilder = recordBuilder.endRecord().addRecord(SOURCE_NAME) @@ -399,7 +455,10 @@ public String toString() { .append(numBlockDeletionTransactionFailureOnDatanodes.value()).append('\t') .append("numBlockAddedForDeletionToDN = ") .append(numBlockAddedForDeletionToDN.value()).append('\t') - .append("numDeletionCommandsPerDatanode = ").append(numCommandsDatanode); + .append("numDeletionCommandsPerDatanode = ").append(numCommandsDatanode) + .append("numBlockDeletionTransactionSizeReFetch = ") + .append(numBlockDeletionTransactionSizeFromCache.value()).append('\t') + .append(numBlockDeletionTransactionSizeFromDB.value()).append('\t'); return buffer.toString(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java index a01effa3a20b..5bb6e01b28de 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java @@ -117,5 +117,13 @@ public void updateDeleteKeySuccessBlocks(long keys) { public void updateDeleteKeyFailedBlocks(long keys) { deleteKeyBlocksFailure.incr(keys); } + + public long getDeleteKeySuccessBlocks() { + return deleteKeyBlocksSuccess.value(); + } + + public long getDeleteKeyFailedBlocks() { + return deleteKeyBlocksFailure.value(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBuffer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBuffer.java index f404fd03f1d3..579d24ee7a5c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBuffer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBuffer.java @@ -42,6 +42,10 @@ public interface SCMHADBTransactionBuffer AtomicReference getLatestSnapshotRef(); void flush() throws IOException; + + void pauseAutoFlush(); + + void resumeAutoFlush(); boolean shouldFlush(long snapshotWaitTime); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java index 387b1001c2b1..23dd76e50735 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferImpl.java @@ -54,6 +54,7 @@ public class SCMHADBTransactionBufferImpl implements SCMHADBTransactionBuffer { private final AtomicLong txFlushPending = new AtomicLong(0); private long lastSnapshotTimeMs = 0; private final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(); + private boolean autoFlushEnabled = true; public SCMHADBTransactionBufferImpl(StorageContainerManager scm) throws IOException { @@ -121,6 +122,28 @@ public AtomicReference getLatestSnapshotRef() { return latestSnapshot; } + @Override + public void pauseAutoFlush() { + rwLock.writeLock().lock(); + try { + autoFlushEnabled = false; + LOG.debug("Auto flush is paused for SCM HA DB transaction buffer."); + } finally { + rwLock.writeLock().unlock(); + } + } + + @Override + public void resumeAutoFlush() { + rwLock.writeLock().lock(); + try { + autoFlushEnabled = true; + LOG.debug("Auto flush is resumed for SCM HA DB transaction buffer."); + } finally { + rwLock.writeLock().unlock(); + } + } + @Override public void flush() throws IOException { rwLock.writeLock().lock(); @@ -179,7 +202,7 @@ public boolean shouldFlush(long snapshotWaitTime) { rwLock.readLock().lock(); try { long timeDiff = scm.getSystemClock().millis() - lastSnapshotTimeMs; - return txFlushPending.get() > 0 && timeDiff > snapshotWaitTime; + return autoFlushEnabled && txFlushPending.get() > 0 && timeDiff > snapshotWaitTime; } finally { rwLock.readLock().unlock(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferStub.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferStub.java index 64a16d335b2a..7dca3718dec2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferStub.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHADBTransactionBufferStub.java @@ -117,6 +117,14 @@ public void flush() throws IOException { } } + @Override + public void pauseAutoFlush() { + } + + @Override + public void resumeAutoFlush() { + } + @Override public boolean shouldFlush(long snapshotWaitTime) { return true; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java index 00915406a4ce..a3f20476dc38 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java @@ -445,7 +445,7 @@ public void startServices() throws IOException { scm.getPipelineManager().reinitialize(metadataStore.getPipelineTable()); scm.getContainerManager().reinitialize(metadataStore.getContainerTable()); scm.getScmBlockManager().getDeletedBlockLog().reinitialize( - metadataStore.getDeletedBlocksTXTable()); + metadataStore.getDeletedBlocksTXTable(), metadataStore.getStatefulServiceConfigTable()); scm.getStatefulServiceStateManager().reinitialize( metadataStore.getStatefulServiceConfigTable()); if (OzoneSecurityUtil.isSecurityEnabled(conf)) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index b9d4b9d6aef5..3b061aa10c01 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -83,6 +83,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetDeletedBlocksTxnSummaryRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetDeletedBlocksTxnSummaryResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetExistContainerWithPipelinesInBatchRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetExistContainerWithPipelinesInBatchResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetFailedDeletedBlocksTxnRequestProto; @@ -712,6 +714,14 @@ public ScmContainerLocationResponse processRequest( getResetDeletedBlockRetryCount( request.getResetDeletedBlockRetryCountRequest())) .build(); + case GetDeletedBlocksTransactionSummary: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setGetDeletedBlocksTxnSummaryResponse( + getDeletedBlocksTxnSummary( + request.getGetDeletedBlocksTxnSummaryRequest())) + .build(); case TransferLeadership: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) @@ -1344,6 +1354,18 @@ public GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxn( .build(); } + public GetDeletedBlocksTxnSummaryResponseProto getDeletedBlocksTxnSummary( + GetDeletedBlocksTxnSummaryRequestProto request) throws IOException { + HddsProtos.DeletedBlocksTransactionSummary summary = impl.getDeletedBlockSummary(); + if (summary == null) { + return GetDeletedBlocksTxnSummaryResponseProto.newBuilder().build(); + } else { + return GetDeletedBlocksTxnSummaryResponseProto.newBuilder() + .setSummary(summary) + .build(); + } + } + public TransferLeadershipResponseProto transferScmLeadership( TransferLeadershipRequestProto request) throws IOException { String newLeaderId = request.getNewLeaderId(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/OzoneStorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/OzoneStorageContainerManager.java index c4089bbca5c1..31230f071d59 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/OzoneStorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/OzoneStorageContainerManager.java @@ -24,7 +24,10 @@ import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.balancer.ContainerBalancer; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; +import org.apache.hadoop.hdds.scm.ha.SCMHAManager; import org.apache.hadoop.hdds.scm.ha.SCMNodeDetails; +import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; +import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; @@ -59,4 +62,10 @@ public interface OzoneStorageContainerManager { SCMNodeDetails getScmNodeDetails(); ReconfigurationHandler getReconfigurationHandler(); + + SCMMetadataStore getScmMetadataStore(); + + SCMHAManager getScmHAManager(); + + SequenceIdGenerator getSequenceIdGen(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index 60c6384ba822..d2b2b6cbe43c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -45,7 +45,6 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; -import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -80,6 +79,7 @@ import org.apache.hadoop.ozone.audit.SCMAction; import org.apache.hadoop.ozone.common.BlockGroup; import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -268,7 +268,7 @@ public List deleteKeyBlocks( List keyBlocksInfoList) throws IOException { long totalBlocks = 0; for (BlockGroup bg : keyBlocksInfoList) { - totalBlocks += bg.getBlockIDList().size(); + totalBlocks += bg.getDeletedBlocks().size(); } if (LOG.isDebugEnabled()) { LOG.debug("SCM is informed by OM to delete {} keys. Total blocks to deleted {}.", @@ -312,8 +312,8 @@ public List deleteKeyBlocks( } for (BlockGroup bg : keyBlocksInfoList) { List blockResult = new ArrayList<>(); - for (BlockID b : bg.getBlockIDList()) { - blockResult.add(new DeleteBlockResult(b, resultCode)); + for (DeletedBlock b : bg.getDeletedBlocks()) { + blockResult.add(new DeleteBlockResult(b.getBlockID(), resultCode)); } results.add(new DeleteBlockGroupResult(bg.getGroupID(), blockResult)); } @@ -478,4 +478,8 @@ public AuditMessage buildAuditMessageForFailure(AuditAction op, Map txIDs) throws IOException { return 0; } + @Nullable + @Override + public DeletedBlocksTransactionSummary getDeletedBlockSummary() { + final Map auditMap = Maps.newHashMap(); + try { + DeletedBlocksTransactionSummary summary = + scm.getScmBlockManager().getDeletedBlockLog().getTransactionSummary(); + AUDIT.logReadSuccess(buildAuditMessageForSuccess( + SCMAction.GET_DELETED_BLOCK_SUMMARY, auditMap)); + return summary; + } catch (Exception ex) { + AUDIT.logReadFailure(buildAuditMessageForFailure( + SCMAction.GET_DELETED_BLOCK_SUMMARY, auditMap, ex)); + throw ex; + } + } + /** * Check if SCM is in safe mode. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 4253e7b1c114..6b0d6bb97e60 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -184,6 +184,7 @@ import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneSecurityUtil; import org.apache.hadoop.ozone.common.Storage.StorageState; +import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.ozone.lease.LeaseManager; import org.apache.hadoop.ozone.lease.LeaseManagerNotRunningException; import org.apache.hadoop.ozone.upgrade.DefaultUpgradeFinalizationExecutor; @@ -679,6 +680,7 @@ private void initializeSystemManagers(OzoneConfiguration conf, scmLayoutVersionManager = new HDDSLayoutVersionManager( scmStorageConfig.getLayoutVersion()); + VersionedDatanodeFeatures.initialize(scmLayoutVersionManager); UpgradeFinalizationExecutor finalizationExecutor; @@ -1805,6 +1807,7 @@ public NodeDecommissionManager getScmDecommissionManager() { /** * Returns SCMHAManager. */ + @Override public SCMHAManager getScmHAManager() { return scmHAManager; } @@ -1957,6 +1960,7 @@ public SCMContext getScmContext() { /** * Returns SequenceIdGen. */ + @Override public SequenceIdGenerator getSequenceIdGen() { return sequenceIdGen; } @@ -1995,6 +1999,7 @@ public Map getContainerStateCount() { * Returns the SCM metadata Store. * @return SCMMetadataStore */ + @Override public SCMMetadataStore getScmMetadataStore() { return scmMetadataStore; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java index 95e13146deed..52cd943c4dbb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java @@ -67,7 +67,8 @@ public enum SCMAction implements AuditAction { GET_METRICS, QUERY_NODE, GET_PIPELINE, - RECONCILE_CONTAINER; + RECONCILE_CONTAINER, + GET_DELETED_BLOCK_SUMMARY; @Override public String getAction() { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java index bc0c5cba4d1a..6db9504c7ad2 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java @@ -17,9 +17,12 @@ package org.apache.hadoop.hdds.scm.block; +import static org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.EMPTY_SUMMARY; +import static org.apache.hadoop.ozone.common.BlockGroup.SIZE_NOT_AVAILABLE; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.params.provider.Arguments.arguments; import static org.mockito.Mockito.any; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doAnswer; @@ -44,8 +47,10 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -61,6 +66,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.hdds.scm.HddsTestUtils; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.TxBlockInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; @@ -75,14 +81,18 @@ import org.apache.hadoop.hdds.scm.server.SCMConfigurator; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.ozone.protocol.commands.CommandStatus; import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; /** @@ -96,6 +106,7 @@ public class TestDeletedBlockLog { @TempDir private File testDir; private ContainerManager containerManager; + private BlockManager blockManager; private Table containerTable; private StorageContainerManager scm; private List dnList; @@ -121,7 +132,9 @@ public void setup() throws Exception { containerTable = scm.getScmMetadataStore().getContainerTable(); scmHADBTransactionBuffer = new SCMHADBTransactionBufferStub(scm.getScmMetadataStore().getStore()); - metrics = mock(ScmBlockDeletingServiceMetrics.class); + blockManager = mock(BlockManager.class); + when(blockManager.getDeletedBlockLog()).thenReturn(deletedBlockLog); + metrics = ScmBlockDeletingServiceMetrics.create(blockManager); deletedBlockLog = new DeletedBlockLogImpl(conf, scm, containerManager, @@ -198,34 +211,36 @@ private void updateContainerMetadata(long cid, @AfterEach public void tearDown() throws Exception { + ScmBlockDeletingServiceMetrics.unRegister(); deletedBlockLog.close(); scm.stop(); scm.join(); } - private Map> generateData(int dataSize) throws IOException { + private Map> generateData(int dataSize) throws IOException { return generateData(dataSize, HddsProtos.LifeCycleState.CLOSED); } - private Map> generateData(int dataSize, + private Map> generateData(int txCount, HddsProtos.LifeCycleState state) throws IOException { - Map> blockMap = new HashMap<>(); - int continerIDBase = RandomUtils.secure().randomInt(0, 100); + Map> blockMap = new HashMap<>(); + long continerIDBase = RandomUtils.secure().randomLong(0, 100); int localIDBase = RandomUtils.secure().randomInt(0, 1000); - for (int i = 0; i < dataSize; i++) { + long blockSize = 1024 * 1024 * 64; + for (int i = 0; i < txCount; i++) { + List blocks = new ArrayList<>(); long containerID = continerIDBase + i; updateContainerMetadata(containerID, state); - List blocks = new ArrayList<>(); for (int j = 0; j < BLOCKS_PER_TXN; j++) { long localID = localIDBase + j; - blocks.add(localID); + blocks.add(new DeletedBlock(new BlockID(containerID, localID), blockSize + j, blockSize + j)); } blockMap.put(containerID, blocks); } return blockMap; } - private void addTransactions(Map> containerBlocksMap, + private void addTransactions(Map> containerBlocksMap, boolean shouldFlush) throws IOException { deletedBlockLog.addTransactions(containerBlocksMap); if (shouldFlush) { @@ -338,15 +353,15 @@ private void mockContainerHealthResult(Boolean healthy) { public void testAddTransactionsIsBatched() throws Exception { conf.setStorageSize(ScmConfigKeys.OZONE_SCM_HA_RAFT_LOG_APPENDER_QUEUE_BYTE_LIMIT, 1, StorageUnit.KB); - DeletedBlockLogStateManager mockStateManager = mock(DeletedBlockLogStateManager.class); + SCMDeletedBlockTransactionStatusManager mockStatusManager = mock(SCMDeletedBlockTransactionStatusManager.class); DeletedBlockLogImpl log = new DeletedBlockLogImpl(conf, scm, containerManager, scmHADBTransactionBuffer, metrics); - log.setDeletedBlockLogStateManager(mockStateManager); + log.setSCMDeletedBlockTransactionStatusManager(mockStatusManager); - Map> containerBlocksMap = generateData(100); + Map> containerBlocksMap = generateData(100); log.addTransactions(containerBlocksMap); - verify(mockStateManager, atLeast(2)).addTransactionsToDB(any()); + verify(mockStatusManager, atLeast(2)).addTransactions(any()); } @Test @@ -576,7 +591,7 @@ public void testFailedAndTimeoutSCMCommandCanBeResend() throws Exception { @Test public void testDNOnlyOneNodeHealthy() throws Exception { - Map> deletedBlocks = generateData(50); + Map> deletedBlocks = generateData(50); addTransactions(deletedBlocks, true); mockContainerHealthResult(false); DatanodeDeletedBlockTransactions transactions @@ -588,12 +603,12 @@ public void testDNOnlyOneNodeHealthy() throws Exception { @Test public void testInadequateReplicaCommit() throws Exception { - Map> deletedBlocks = generateData(50); + Map> deletedBlocks = generateData(50); addTransactions(deletedBlocks, true); long containerID; // let the first 30 container only consisting of only two unhealthy replicas int count = 0; - for (Map.Entry> entry : deletedBlocks.entrySet()) { + for (Map.Entry> entry : deletedBlocks.entrySet()) { containerID = entry.getKey(); mockInadequateReplicaUnhealthyContainerInfo(containerID, count); count += 1; @@ -695,9 +710,9 @@ public void testDeletedBlockTransactions() throws IOException { long containerID; // Creates {TXNum} TX in the log. - Map> deletedBlocks = generateData(txNum); + Map> deletedBlocks = generateData(txNum); addTransactions(deletedBlocks, true); - for (Map.Entry> entry :deletedBlocks.entrySet()) { + for (Map.Entry> entry :deletedBlocks.entrySet()) { count++; containerID = entry.getKey(); // let the container replication factor to be ONE @@ -717,10 +732,11 @@ public void testDeletedBlockTransactions() throws IOException { // add two transactions for same container containerID = blocks.get(0).getContainerID(); - Map> deletedBlocksMap = new HashMap<>(); + Map> deletedBlocksMap = new HashMap<>(); long localId = RandomUtils.secure().randomLong(); - deletedBlocksMap.put(containerID, new LinkedList<>( - Collections.singletonList(localId))); + List blockIDList = new ArrayList<>(); + blockIDList.add(new DeletedBlock(new BlockID(containerID, localId), SIZE_NOT_AVAILABLE, SIZE_NOT_AVAILABLE)); + deletedBlocksMap.put(containerID, blockIDList); addTransactions(deletedBlocksMap, true); blocks = getTransactions(txNum * BLOCKS_PER_TXN * ONE); // Only newly added Blocks will be sent, as previously sent transactions @@ -747,7 +763,7 @@ public void testGetTransactionsWithMaxBlocksPerDatanode(int maxAllowedBlockNum) DatanodeDetails dnId1 = dnList.get(0), dnId2 = dnList.get(1); // Creates {TXNum} TX in the log. - Map> deletedBlocks = generateData(txNum); + Map> deletedBlocks = generateData(txNum); addTransactions(deletedBlocks, true); List containerIds = new ArrayList<>(deletedBlocks.keySet()); for (int i = 0; i < containerIds.size(); i++) { @@ -778,7 +794,7 @@ public void testDeletedBlockTransactionsOfDeletedContainer() throws IOException List blocks; // Creates {TXNum} TX in the log. - Map> deletedBlocks = generateData(txNum, + Map> deletedBlocks = generateData(txNum, HddsProtos.LifeCycleState.DELETED); addTransactions(deletedBlocks, true); @@ -787,6 +803,147 @@ public void testDeletedBlockTransactionsOfDeletedContainer() throws IOException assertEquals(0, blocks.size()); } + @ParameterizedTest + @ValueSource(ints = {1, 10, 25, 50, 100}) + public void testTransactionSerializedSize(int blockCount) { + long txID = 10000000; + long containerID = 1000000; + List blocks = new ArrayList<>(); + for (int i = 0; i < blockCount; i++) { + blocks.add(new DeletedBlock(new BlockID(containerID, 100000000 + i), 128 * 1024 * 1024, 128 * 1024 * 1024)); + } + List localIdList = blocks.stream().map(b -> b.getBlockID().getLocalID()).collect(Collectors.toList()); + DeletedBlocksTransaction tx1 = DeletedBlocksTransaction.newBuilder() + .setTxID(txID) + .setContainerID(containerID) + .addAllLocalID(localIdList) + .setCount(0) + .setTotalBlockSize(blocks.stream().mapToLong(DeletedBlock::getSize).sum()) + .setTotalBlockReplicatedSize(blocks.stream().mapToLong(DeletedBlock::getReplicatedSize).sum()) + .build(); + DeletedBlocksTransaction tx2 = DeletedBlocksTransaction.newBuilder() + .setTxID(txID) + .setContainerID(containerID) + .addAllLocalID(localIdList) + .setCount(0) + .build(); + /* + * 1 blocks tx with totalBlockSize size is 26 + * 1 blocks tx without totalBlockSize size is 16 + * 10 blocks tx with totalBlockSize size is 73 + * 10 blocks tx without totalBlockSize size is 61 + * 25 blocks tx with totalBlockSize size is 148 + * 25 blocks tx without totalBlockSize size is 136 + * 50 blocks tx with totalBlockSize size is 273 + * 50 blocks tx without totalBlockSize size is 261 + * 100 blocks tx with totalBlockSize size is 523 + * 100 blocks tx without totalBlockSize size is 511 + */ + System.out.println(blockCount + " blocks tx with totalBlockSize size is " + tx1.getSerializedSize()); + System.out.println(blockCount + " blocks tx without totalBlockSize size is " + tx2.getSerializedSize()); + } + + public static Stream values() { + return Stream.of( + arguments(100, false, false), + arguments(100, true, false), + arguments(100, true, true), + arguments(1000, false, false), + arguments(1000, true, false), + arguments(1000, true, true), + arguments(1000, false, false), + arguments(1000, true, false), + arguments(1000, true, true), + arguments(100000, false, false), + arguments(100000, true, false), + arguments(100000, true, true) + ); + } + + @ParameterizedTest + @MethodSource("values") + public void testAddRemoveTransactionPerformance(int txCount, boolean dataDistributionFinalized, boolean cacheEnabled) + throws Exception { + Map> data = generateData(txCount); + SCMDeletedBlockTransactionStatusManager statusManager = + deletedBlockLog.getSCMDeletedBlockTransactionStatusManager(); + HddsProtos.DeletedBlocksTransactionSummary summary = statusManager.getTransactionSummary(); + assertEquals(EMPTY_SUMMARY, summary); + + SCMDeletedBlockTransactionStatusManager.setDisableDataDistributionForTest(!dataDistributionFinalized); + long startTime = System.nanoTime(); + deletedBlockLog.addTransactions(data); + scmHADBTransactionBuffer.flush(); + /** + * Before DataDistribution is enabled + * - 979 ms to add 100 txs to DB + * - 275 ms to add 1000 txs to DB + * - 1106 ms to add 10000 txs to DB + * - 11103 ms to add 100000 txs to DB + * After DataDistribution is enabled + * - 908 ms to add 100 txs to DB + * - 351 ms to add 1000 txs to DB + * - 2875 ms to add 10000 txs to DB + * - 12446 ms to add 100000 txs to DB + */ + System.out.println((System.nanoTime() - startTime) / 100000 + " ms to add " + txCount + " txs to DB, " + + "dataDistributionFinalized " + dataDistributionFinalized); + summary = statusManager.getTransactionSummary(); + if (dataDistributionFinalized) { + assertEquals(txCount, summary.getTotalTransactionCount()); + } else { + assertEquals(0, summary.getTotalTransactionCount()); + } + + ArrayList txIdList = data.keySet().stream().collect(Collectors.toCollection(ArrayList::new)); + long initialHitFromCacheCount = metrics.getNumBlockDeletionTransactionSizeFromCache(); + long initialHitFromDBCount = metrics.getNumBlockDeletionTransactionSizeFromDB(); + + if (dataDistributionFinalized && cacheEnabled) { + Map txSizeMap = statusManager.getTxSizeMap(); + for (Map.Entry> entry : data.entrySet()) { + List deletedBlockList = entry.getValue(); + TxBlockInfo txBlockInfo = new TxBlockInfo(deletedBlockList.size(), + deletedBlockList.stream().map(DeletedBlock::getSize).reduce(0L, Long::sum), + deletedBlockList.stream().map(DeletedBlock::getReplicatedSize).reduce(0L, Long::sum)); + txSizeMap.put(entry.getKey(), txBlockInfo); + } + } + startTime = System.nanoTime(); + statusManager.removeTransactions(txIdList); + scmHADBTransactionBuffer.flush(); + /** + * Before DataDistribution is enabled + * - 19 ms to remove 100 txs from DB + * - 26 ms to remove 1000 txs from DB + * - 142 ms to remove 10000 txs from DB + * - 2571 ms to remove 100000 txs from DB + * After DataDistribution is enabled (all cache miss) + * - 62 ms to remove 100 txs from DB + * - 186 ms to remove 1000 txs from DB + * - 968 ms to remove 10000 txs from DB + * - 8635 ms to remove 100000 txs from DB + * After DataDistribution is enabled (all cache hit) + * - 40 ms to remove 100 txs from DB + * - 112 ms to remove 1000 txs from DB + * - 412 ms to remove 10000 txs from DB + * - 3499 ms to remove 100000 txs from DB + */ + System.out.println((System.nanoTime() - startTime) / 100000 + " ms to remove " + txCount + " txs from DB, " + + "dataDistributionFinalized " + dataDistributionFinalized + ", cacheEnabled " + cacheEnabled); + if (dataDistributionFinalized) { + if (cacheEnabled) { + GenericTestUtils.waitFor(() -> + metrics.getNumBlockDeletionTransactionSizeFromCache() - initialHitFromCacheCount == txCount, 100, 5000); + assertEquals(0, metrics.getNumBlockDeletionTransactionSizeFromDB() - initialHitFromDBCount); + } else { + GenericTestUtils.waitFor(() -> + metrics.getNumBlockDeletionTransactionSizeFromDB() - initialHitFromDBCount == txCount, 100, 5000); + assertEquals(0, metrics.getNumBlockDeletionTransactionSizeFromCache() - initialHitFromCacheCount); + } + } + } + private void mockStandAloneContainerInfo(long containerID, DatanodeDetails dd) throws IOException { List dns = Collections.singletonList(dd); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestSCMBlockDeletingService.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestSCMBlockDeletingService.java index bc60c8c4ff28..de4b13e5b7d0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestSCMBlockDeletingService.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestSCMBlockDeletingService.java @@ -72,7 +72,7 @@ public void setup() throws Exception { nodeManager = mock(NodeManager.class); eventPublisher = mock(EventPublisher.class); conf = new OzoneConfiguration(); - metrics = ScmBlockDeletingServiceMetrics.create(); + metrics = ScmBlockDeletingServiceMetrics.create(mock(BlockManager.class)); when(nodeManager.getTotalDatanodeCommandCount(any(), any())).thenReturn(0); SCMServiceManager scmServiceManager = mock(SCMServiceManager.class); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRuleTest.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRuleTest.java new file mode 100644 index 000000000000..7bfdecc71964 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/AbstractContainerSafeModeRuleTest.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.safemode; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.DatanodeID; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerManager; +import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +/** + * Abstract base class for container safe mode rule tests. + */ +public abstract class AbstractContainerSafeModeRuleTest { + private List containers; + private AbstractContainerSafeModeRule rule; + + @BeforeEach + public void setup() throws ContainerNotFoundException { + final ContainerManager containerManager = mock(ContainerManager.class); + final ConfigurationSource conf = mock(ConfigurationSource.class); + final EventQueue eventQueue = mock(EventQueue.class); + final SCMSafeModeManager safeModeManager = mock(SCMSafeModeManager.class); + final SafeModeMetrics metrics = mock(SafeModeMetrics.class); + + when(safeModeManager.getSafeModeMetrics()).thenReturn(metrics); + containers = new ArrayList<>(); + when(containerManager.getContainers(getReplicationType())).thenReturn(containers); + when(containerManager.getContainer(any(ContainerID.class))).thenAnswer(invocation -> { + ContainerID id = invocation.getArgument(0); + return containers.stream() + .filter(c -> c.containerID().equals(id)) + .findFirst() + .orElseThrow(ContainerNotFoundException::new); + }); + + rule = createRule(eventQueue, conf, containerManager, safeModeManager); + rule.setValidateBasedOnReportProcessing(false); + } + + @Test + public void testRefreshInitializeContainers() { + containers.add(mockContainer(LifeCycleState.OPEN, 1L)); + containers.add(mockContainer(LifeCycleState.CLOSED, 2L)); + rule.refresh(true); + + assertEquals(0.0, rule.getCurrentContainerThreshold()); + } + + @ParameterizedTest + @EnumSource(value = LifeCycleState.class, + names = {"OPEN", "CLOSING", "QUASI_CLOSED", "CLOSED", "DELETING", "DELETED", "RECOVERING"}) + public void testValidateReturnsTrueAndFalse(LifeCycleState state) { + containers.add(mockContainer(state, 1L)); + rule.refresh(true); + + boolean expected = state != LifeCycleState.QUASI_CLOSED && state != LifeCycleState.CLOSED; + assertEquals(expected, rule.validate()); + } + + @Test + public void testProcessContainer() { + long containerId = 123L; + containers.add(mockContainer(LifeCycleState.CLOSED, containerId)); + rule.refresh(true); + + assertEquals(0.0, rule.getCurrentContainerThreshold()); + + // Send as many distinct reports as the container's minReplica requires + int minReplica = rule.getMinReplica(ContainerID.valueOf(containerId)); + for (int i = 0; i < minReplica; i++) { + rule.process(getNewContainerReport(containerId)); + } + + assertEquals(1.0, rule.getCurrentContainerThreshold()); + } + + private NodeRegistrationContainerReport getNewContainerReport(long containerID) { + ContainerReplicaProto replica = mock(ContainerReplicaProto.class); + ContainerReportsProto containerReport = mock(ContainerReportsProto.class); + NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); + DatanodeDetails datanodeDetails = mock(DatanodeDetails.class); + + when(replica.getContainerID()).thenReturn(containerID); + when(containerReport.getReportsList()).thenReturn(Collections.singletonList(replica)); + when(report.getReport()).thenReturn(containerReport); + when(report.getDatanodeDetails()).thenReturn(datanodeDetails); + when(datanodeDetails.getID()).thenReturn(DatanodeID.randomID()); + + return report; + } + + @Test + public void testAllContainersClosed() { + containers.add(mockContainer(LifeCycleState.CLOSED, 11L)); + containers.add(mockContainer(LifeCycleState.CLOSED, 32L)); + rule.refresh(true); + + assertEquals(0.0, rule.getCurrentContainerThreshold(), "Threshold should be 0.0 when all containers are closed"); + assertFalse(rule.validate(), "Validate should return false when all containers are closed"); + } + + @Test + public void testAllContainersOpen() { + containers.add(mockContainer(LifeCycleState.OPEN, 11L)); + containers.add(mockContainer(LifeCycleState.OPEN, 32L)); + rule.refresh(true); + + assertEquals(1.0, rule.getCurrentContainerThreshold(), "Threshold should be 1.0 when all containers are open"); + assertTrue(rule.validate(), "Validate should return true when all containers are open"); + } + + @Test + public void testDuplicateContainerIdsInReports() { + long containerId = 42L; + containers.add(mockContainer(LifeCycleState.OPEN, containerId)); + rule.refresh(true); + + ContainerReplicaProto replica = mock(ContainerReplicaProto.class); + ContainerReportsProto containerReport = mock(ContainerReportsProto.class); + NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); + DatanodeDetails datanodeDetails = mock(DatanodeDetails.class); + + when(replica.getContainerID()).thenReturn(containerId); + when(containerReport.getReportsList()).thenReturn(Collections.singletonList(replica)); + when(report.getReport()).thenReturn(containerReport); + when(report.getDatanodeDetails()).thenReturn(datanodeDetails); + when(datanodeDetails.getID()).thenReturn(DatanodeID.randomID()); + + rule.process(report); + rule.process(report); + + assertEquals(1.0, rule.getCurrentContainerThreshold(), "Duplicated containers should be counted only once"); + } + + @Test + public void testValidateBasedOnReportProcessingTrue() { + rule.setValidateBasedOnReportProcessing(true); + long containerId = 1L; + containers.add(mockContainer(LifeCycleState.OPEN, containerId)); + rule.refresh(true); + + ContainerReplicaProto replica = mock(ContainerReplicaProto.class); + ContainerReportsProto reportsProto = mock(ContainerReportsProto.class); + NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); + DatanodeDetails datanodeDetails = mock(DatanodeDetails.class); + + when(replica.getContainerID()).thenReturn(containerId); + when(reportsProto.getReportsList()).thenReturn(Collections.singletonList(replica)); + when(report.getReport()).thenReturn(reportsProto); + when(report.getDatanodeDetails()).thenReturn(datanodeDetails); + when(datanodeDetails.getID()).thenReturn(DatanodeID.randomID()); + + rule.process(report); + + assertTrue(rule.validate(), "Should validate based on reported containers"); + } + + protected abstract ReplicationType getReplicationType(); + + protected abstract AbstractContainerSafeModeRule createRule( + EventQueue eventQueue, + ConfigurationSource conf, + ContainerManager containerManager, + SCMSafeModeManager safeModeManager + ); + + protected abstract ContainerInfo mockContainer(LifeCycleState state, long containerID); +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestECContainerSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestECContainerSafeModeRule.java index 23dcbfd979a2..8390747cf5c8 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestECContainerSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestECContainerSafeModeRule.java @@ -17,187 +17,39 @@ package org.apache.hadoop.hdds.scm.safemode; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.conf.ConfigurationSource; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.DatanodeID; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; -import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; -import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport; import org.apache.hadoop.hdds.server.events.EventQueue; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.EnumSource; /** * This class tests ECContainerSafeModeRule. */ -public class TestECContainerSafeModeRule { - private List containers; - private ECContainerSafeModeRule rule; - - @BeforeEach - public void setup() throws ContainerNotFoundException { - final ContainerManager containerManager = mock(ContainerManager.class); - final ConfigurationSource conf = mock(ConfigurationSource.class); - final EventQueue eventQueue = mock(EventQueue.class); - final SCMSafeModeManager safeModeManager = mock(SCMSafeModeManager.class); - final SafeModeMetrics metrics = mock(SafeModeMetrics.class); - containers = new ArrayList<>(); - when(containerManager.getContainers(ReplicationType.EC)).thenReturn(containers); - when(containerManager.getContainer(any(ContainerID.class))).thenAnswer(invocation -> { - ContainerID id = invocation.getArgument(0); - return containers.stream() - .filter(c -> c.containerID().equals(id)) - .findFirst() - .orElseThrow(ContainerNotFoundException::new); - }); - - when(safeModeManager.getSafeModeMetrics()).thenReturn(metrics); - - rule = new ECContainerSafeModeRule(eventQueue, conf, containerManager, safeModeManager); - rule.setValidateBasedOnReportProcessing(false); - } - - @Test - public void testRefreshInitializeECContainers() { - containers.add(mockECContainer(LifeCycleState.CLOSED, 1L)); - containers.add(mockECContainer(LifeCycleState.OPEN, 2L)); - - rule.refresh(true); - - assertEquals(0.0, rule.getCurrentContainerThreshold()); - } - - @ParameterizedTest - @EnumSource(value = LifeCycleState.class, - names = {"OPEN", "CLOSING", "QUASI_CLOSED", "CLOSED", "DELETING", "DELETED", "RECOVERING"}) - public void testValidateReturnsTrueAndFalse(LifeCycleState state) { - containers.add(mockECContainer(state, 1L)); - rule.refresh(true); - boolean expected = state != LifeCycleState.QUASI_CLOSED && state != LifeCycleState.CLOSED; - assertEquals(expected, rule.validate()); - } - - @Test - public void testProcessECContainer() { - long containerId = 123L; - containers.add(mockECContainer(LifeCycleState.CLOSED, containerId)); - rule.refresh(true); - - assertEquals(0.0, rule.getCurrentContainerThreshold()); - - // We need at least 3 replicas to be reported to validate the rule - rule.process(getNewContainerReport(containerId)); - rule.process(getNewContainerReport(containerId)); - rule.process(getNewContainerReport(containerId)); - - assertEquals(1.0, rule.getCurrentContainerThreshold()); - } - - private NodeRegistrationContainerReport getNewContainerReport(long containerID) { - DatanodeDetails datanode = mock(DatanodeDetails.class); - ContainerReportsProto containerReport = mock(ContainerReportsProto.class); - NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); - ContainerReplicaProto replica = mock(ContainerReplicaProto.class); - - when(report.getDatanodeDetails()).thenReturn(datanode); - when(datanode.getID()).thenReturn(DatanodeID.randomID()); - when(replica.getContainerID()).thenReturn(containerID); - when(containerReport.getReportsList()).thenReturn(Collections.singletonList(replica)); - when(report.getReport()).thenReturn(containerReport); - return report; - } - - @Test - public void testAllContainersClosed() { - containers.add(mockECContainer(LifeCycleState.CLOSED, 11L)); - containers.add(mockECContainer(LifeCycleState.CLOSED, 32L)); - - rule.refresh(true); - - assertEquals(0.0, rule.getCurrentContainerThreshold(), "Threshold should be 0.0 when all containers are closed"); - assertFalse(rule.validate(), "Validate should return false when all containers are closed"); - } - - @Test - public void testAllContainersOpen() { - containers.add(mockECContainer(LifeCycleState.OPEN, 11L)); - containers.add(mockECContainer(LifeCycleState.OPEN, 32L)); - - rule.refresh(true); - - assertEquals(1.0, rule.getCurrentContainerThreshold(), "Threshold should be 1.0 when all containers are open"); - assertTrue(rule.validate(), "Validate should return true when all containers are open"); +public class TestECContainerSafeModeRule extends AbstractContainerSafeModeRuleTest { + @Override + protected ReplicationType getReplicationType() { + return ReplicationType.EC; } - @Test - public void testDuplicateContainerIdsInReports() { - long containerId = 42L; - containers.add(mockECContainer(LifeCycleState.OPEN, containerId)); - - rule.refresh(true); - - ContainerReplicaProto replica = mock(ContainerReplicaProto.class); - ContainerReportsProto containerReport = mock(ContainerReportsProto.class); - NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); - DatanodeDetails datanodeDetails = mock(DatanodeDetails.class); - - when(replica.getContainerID()).thenReturn(containerId); - when(containerReport.getReportsList()).thenReturn(Collections.singletonList(replica)); - when(report.getReport()).thenReturn(containerReport); - when(report.getDatanodeDetails()).thenReturn(datanodeDetails); - when(datanodeDetails.getID()).thenReturn(DatanodeID.randomID()); - - rule.process(report); - rule.process(report); - - assertEquals(1.0, rule.getCurrentContainerThreshold(), "Duplicated containers should be counted only once"); - } - - @Test - public void testValidateBasedOnReportProcessingTrue() throws Exception { - rule.setValidateBasedOnReportProcessing(true); - long containerId = 1L; - containers.add(mockECContainer(LifeCycleState.OPEN, containerId)); - - rule.refresh(true); - - ContainerReplicaProto replica = mock(ContainerReplicaProto.class); - ContainerReportsProto reportsProto = mock(ContainerReportsProto.class); - NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); - DatanodeDetails datanodeDetails = mock(DatanodeDetails.class); - - when(replica.getContainerID()).thenReturn(containerId); - when(reportsProto.getReportsList()).thenReturn(Collections.singletonList(replica)); - when(report.getReport()).thenReturn(reportsProto); - when(report.getDatanodeDetails()).thenReturn(datanodeDetails); - when(datanodeDetails.getID()).thenReturn(DatanodeID.randomID()); - - - rule.process(report); - - assertTrue(rule.validate(), "Should validate based on reported containers"); + @Override + protected AbstractContainerSafeModeRule createRule( + EventQueue eventQueue, + ConfigurationSource conf, + ContainerManager containerManager, + SCMSafeModeManager safeModeManager + ) { + return new ECContainerSafeModeRule(eventQueue, conf, containerManager, safeModeManager); } - private static ContainerInfo mockECContainer(LifeCycleState state, long containerID) { + @Override + protected ContainerInfo mockContainer(LifeCycleState state, long containerID) { ContainerInfo container = mock(ContainerInfo.class); when(container.getReplicationType()).thenReturn(ReplicationType.EC); when(container.getState()).thenReturn(state); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestRatisContainerSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestRatisContainerSafeModeRule.java index 58929ffdd3fb..d6b34ec8e755 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestRatisContainerSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestRatisContainerSafeModeRule.java @@ -17,186 +17,41 @@ package org.apache.hadoop.hdds.scm.safemode; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.ConfigurationSource; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.DatanodeID; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; -import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; -import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport; import org.apache.hadoop.hdds.server.events.EventQueue; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.EnumSource; /** * This class tests RatisContainerSafeModeRule. */ -public class TestRatisContainerSafeModeRule { +public class TestRatisContainerSafeModeRule extends AbstractContainerSafeModeRuleTest { - private List containers; - private RatisContainerSafeModeRule rule; - - @BeforeEach - public void setup() throws ContainerNotFoundException { - final ContainerManager containerManager = mock(ContainerManager.class); - final ConfigurationSource conf = mock(ConfigurationSource.class); - final EventQueue eventQueue = mock(EventQueue.class); - final SCMSafeModeManager safeModeManager = mock(SCMSafeModeManager.class); - final SafeModeMetrics metrics = mock(SafeModeMetrics.class); - - when(safeModeManager.getSafeModeMetrics()).thenReturn(metrics); - containers = new ArrayList<>(); - when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); - when(containerManager.getContainer(any(ContainerID.class))).thenAnswer(invocation -> { - ContainerID id = invocation.getArgument(0); - return containers.stream() - .filter(c -> c.containerID().equals(id)) - .findFirst() - .orElseThrow(ContainerNotFoundException::new); - }); - - rule = new RatisContainerSafeModeRule(eventQueue, conf, containerManager, safeModeManager); - rule.setValidateBasedOnReportProcessing(false); - } - - @Test - public void testRefreshInitializeRatisContainers() { - containers.add(mockRatisContainer(LifeCycleState.CLOSED, 1L)); - containers.add(mockRatisContainer(LifeCycleState.OPEN, 2L)); - - rule.refresh(true); - - assertEquals(0.0, rule.getCurrentContainerThreshold()); - } - - @ParameterizedTest - @EnumSource(value = LifeCycleState.class, - names = {"OPEN", "CLOSING", "QUASI_CLOSED", "CLOSED", "DELETING", "DELETED", "RECOVERING"}) - public void testValidateReturnsTrueAndFalse(LifeCycleState state) { - containers.add(mockRatisContainer(state, 1L)); - rule.refresh(true); - - boolean expected = state != LifeCycleState.QUASI_CLOSED && state != LifeCycleState.CLOSED; - assertEquals(expected, rule.validate()); + @Override + protected ReplicationType getReplicationType() { + return ReplicationType.RATIS; } - @Test - public void testProcessRatisContainer() { - long containerId = 123L; - containers.add(mockRatisContainer(LifeCycleState.CLOSED, containerId)); - - rule.refresh(true); - - assertEquals(0.0, rule.getCurrentContainerThreshold()); - - ContainerReplicaProto replica = mock(ContainerReplicaProto.class); - List replicas = new ArrayList<>(); - replicas.add(replica); - ContainerReportsProto containerReport = mock(ContainerReportsProto.class); - NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); - DatanodeDetails datanodeDetails = mock(DatanodeDetails.class); - - when(replica.getContainerID()).thenReturn(containerId); - when(containerReport.getReportsList()).thenReturn(replicas); - when(report.getReport()).thenReturn(containerReport); - when(report.getDatanodeDetails()).thenReturn(datanodeDetails); - when(datanodeDetails.getID()).thenReturn(DatanodeID.randomID()); - - rule.process(report); - - assertEquals(1.0, rule.getCurrentContainerThreshold()); - } - - @Test - public void testAllContainersClosed() throws ContainerNotFoundException { - containers.add(mockRatisContainer(LifeCycleState.CLOSED, 11L)); - containers.add(mockRatisContainer(LifeCycleState.CLOSED, 32L)); - - rule.refresh(true); - - assertEquals(0.0, rule.getCurrentContainerThreshold(), "Threshold should be 0.0 when all containers are closed"); - assertFalse(rule.validate(), "Validate should return false when all containers are closed"); + @Override + protected AbstractContainerSafeModeRule createRule( + EventQueue eventQueue, + ConfigurationSource conf, + ContainerManager containerManager, + SCMSafeModeManager safeModeManager + ) { + return new RatisContainerSafeModeRule(eventQueue, conf, containerManager, safeModeManager); } - @Test - public void testAllContainersOpen() { - containers.add(mockRatisContainer(LifeCycleState.OPEN, 11L)); - containers.add(mockRatisContainer(LifeCycleState.OPEN, 32L)); - - rule.refresh(false); - - assertEquals(1.0, rule.getCurrentContainerThreshold(), "Threshold should be 1.0 when all containers are open"); - assertTrue(rule.validate(), "Validate should return true when all containers are open"); - } - - @Test - public void testDuplicateContainerIdsInReports() { - long containerId = 42L; - containers.add(mockRatisContainer(LifeCycleState.OPEN, containerId)); - - rule.refresh(true); - - ContainerReplicaProto replica = mock(ContainerReplicaProto.class); - ContainerReportsProto containerReport = mock(ContainerReportsProto.class); - NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); - DatanodeDetails datanodeDetails = mock(DatanodeDetails.class); - - when(replica.getContainerID()).thenReturn(containerId); - when(containerReport.getReportsList()).thenReturn(Collections.singletonList(replica)); - when(report.getReport()).thenReturn(containerReport); - when(report.getDatanodeDetails()).thenReturn(datanodeDetails); - when(datanodeDetails.getID()).thenReturn(DatanodeID.randomID()); - - rule.process(report); - rule.process(report); - - assertEquals(1.0, rule.getCurrentContainerThreshold(), "Duplicated containers should be counted only once"); - } - - @Test - public void testValidateBasedOnReportProcessingTrue() throws Exception { - rule.setValidateBasedOnReportProcessing(true); - long containerId = 1L; - containers.add(mockRatisContainer(LifeCycleState.OPEN, containerId)); - - rule.refresh(false); - - ContainerReplicaProto replica = mock(ContainerReplicaProto.class); - ContainerReportsProto reportsProto = mock(ContainerReportsProto.class); - NodeRegistrationContainerReport report = mock(NodeRegistrationContainerReport.class); - DatanodeDetails datanodeDetails = mock(DatanodeDetails.class); - - when(replica.getContainerID()).thenReturn(containerId); - when(reportsProto.getReportsList()).thenReturn(Collections.singletonList(replica)); - when(report.getReport()).thenReturn(reportsProto); - when(report.getDatanodeDetails()).thenReturn(datanodeDetails); - when(datanodeDetails.getID()).thenReturn(DatanodeID.randomID()); - - rule.process(report); - - assertTrue(rule.validate(), "Should validate based on reported containers"); - } - - private static ContainerInfo mockRatisContainer(LifeCycleState state, long containerID) { + @Override + protected ContainerInfo mockContainer(LifeCycleState state, long containerID) { ContainerInfo container = mock(ContainerInfo.class); when(container.getReplicationType()).thenReturn(ReplicationType.RATIS); when(container.getState()).thenReturn(state); @@ -207,5 +62,4 @@ private static ContainerInfo mockRatisContainer(LifeCycleState state, long conta .thenReturn(RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE)); return container; } - } diff --git a/hadoop-hdds/test-utils/pom.xml b/hadoop-hdds/test-utils/pom.xml index 4dd86c7dc1ff..7b59c9196e06 100644 --- a/hadoop-hdds/test-utils/pom.xml +++ b/hadoop-hdds/test-utils/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone hdds - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT hdds-test-utils - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HDDS Test Utils Apache Ozone Distributed Data Store Test Utils diff --git a/hadoop-ozone/cli-admin/pom.xml b/hadoop-ozone/cli-admin/pom.xml index 7357cdeb3bff..9d713e43bf91 100644 --- a/hadoop-ozone/cli-admin/pom.xml +++ b/hadoop-ozone/cli-admin/pom.xml @@ -17,12 +17,12 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../../hadoop-hdds/hadoop-dependency-client ozone-cli-admin - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone CLI Admin Apache Ozone CLI Admin diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index 133166dec487..6bad18d29018 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; @@ -175,7 +176,7 @@ public ContainerWithPipeline createContainer(String owner) * @throws IOException */ public void createContainer(XceiverClientSpi client, - long containerId) throws IOException { + long containerId) throws IOException { String encodedToken = getEncodedContainerToken(containerId); ContainerProtocolCalls.createContainer(client, containerId, encodedToken); @@ -184,7 +185,7 @@ public void createContainer(XceiverClientSpi client, // creation state. if (LOG.isDebugEnabled()) { LOG.debug("Created container {} machines {}", containerId, - client.getPipeline().getNodes()); + client.getPipeline().getNodes()); } } @@ -199,7 +200,7 @@ public String getEncodedContainerToken(long containerId) throws IOException { @Override public ContainerWithPipeline createContainer(HddsProtos.ReplicationType type, - HddsProtos.ReplicationFactor factor, String owner) throws IOException { + HddsProtos.ReplicationFactor factor, String owner) throws IOException { ReplicationConfig replicationConfig = ReplicationConfig.fromProtoTypeAndFactor(replicationType, factor); return createContainer(replicationConfig, owner); @@ -210,7 +211,7 @@ public ContainerWithPipeline createContainer(ReplicationConfig replicationConfig XceiverClientSpi client = null; XceiverClientManager clientManager = getXceiverClientManager(); try { - ContainerWithPipeline containerWithPipeline = + ContainerWithPipeline containerWithPipeline = storageContainerLocationClient.allocateContainer(replicationConfig, owner); Pipeline pipeline = containerWithPipeline.getPipeline(); // connect to pipeline leader and allocate container on leader datanode. @@ -259,14 +260,14 @@ public List recommissionNodes(List hosts) @Override public List startMaintenanceNodes(List hosts, - int endHours, boolean force) throws IOException { + int endHours, boolean force) throws IOException { return storageContainerLocationClient.startMaintenanceNodes( hosts, endHours, force); } @Override public Pipeline createReplicationPipeline(HddsProtos.ReplicationType type, - HddsProtos.ReplicationFactor factor, HddsProtos.NodePool nodePool) + HddsProtos.ReplicationFactor factor, HddsProtos.NodePool nodePool) throws IOException { return storageContainerLocationClient.createReplicationPipeline(type, factor, nodePool); @@ -317,7 +318,7 @@ public void close() { @Override public void deleteContainer(long containerId, Pipeline pipeline, - boolean force) throws IOException { + boolean force) throws IOException { XceiverClientSpi client = null; XceiverClientManager clientManager = getXceiverClientManager(); try { @@ -348,7 +349,7 @@ public void deleteContainer(long containerID, boolean force) @Override public ContainerListResult listContainer(long startContainerID, - int count) throws IOException { + int count) throws IOException { if (count > maxCountOfContainerList) { LOG.warn("Attempting to list {} containers. However, this exceeds" + " the cluster's current limit of {}. The results will be capped at the" + @@ -361,9 +362,9 @@ public ContainerListResult listContainer(long startContainerID, @Override public ContainerListResult listContainer(long startContainerID, - int count, HddsProtos.LifeCycleState state, - HddsProtos.ReplicationType repType, - ReplicationConfig replicationConfig) throws IOException { + int count, HddsProtos.LifeCycleState state, + HddsProtos.ReplicationType repType, + ReplicationConfig replicationConfig) throws IOException { if (count > maxCountOfContainerList) { LOG.warn("Attempting to list {} containers. However, this exceeds" + " the cluster's current limit of {}. The results will be capped at the" + @@ -376,7 +377,7 @@ public ContainerListResult listContainer(long startContainerID, @Override public ContainerDataProto readContainer(long containerID, - Pipeline pipeline) throws IOException { + Pipeline pipeline) throws IOException { XceiverClientManager clientManager = getXceiverClientManager(); String encodedToken = getEncodedContainerToken(containerID); XceiverClientSpi client = null; @@ -396,8 +397,7 @@ public ContainerDataProto readContainer(long containerID, } } - public Map - readContainerFromAllNodes(long containerID, Pipeline pipeline) + public Map readContainerFromAllNodes(long containerID, Pipeline pipeline) throws IOException, InterruptedException { XceiverClientManager clientManager = getXceiverClientManager(); String encodedToken = getEncodedContainerToken(containerID); @@ -434,8 +434,7 @@ public ContainerWithPipeline getContainerWithPipeline(long containerId) } @Override - public List - getContainerReplicas(long containerId) throws IOException { + public List getContainerReplicas(long containerId) throws IOException { List protos = storageContainerLocationClient.getContainerReplicas(containerId, ClientVersion.CURRENT_VERSION); @@ -550,6 +549,11 @@ public void transferLeadership(String newLeaderId) throws IOException { storageContainerLocationClient.transferLeadership(newLeaderId); } + @Override + public DeletedBlocksTransactionSummary getDeletedBlockSummary() throws IOException { + return storageContainerLocationClient.getDeletedBlockSummary(); + } + @Override public List getDatanodeUsageInfo( String address, String uuid) throws IOException { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java index d536b81be140..e096a55b95c6 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java @@ -29,6 +29,7 @@ import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.admin.OzoneAdmin; import org.apache.hadoop.ozone.admin.om.lease.LeaseSubCommand; +import org.apache.hadoop.ozone.admin.om.snapshot.SnapshotSubCommand; import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.protocol.ClientProtocol; @@ -59,7 +60,8 @@ UpdateRangerSubcommand.class, TransferOmLeaderSubCommand.class, FetchKeySubCommand.class, - LeaseSubCommand.class + LeaseSubCommand.class, + SnapshotSubCommand.class }) @MetaInfServices(AdminSubcommand.class) public class OMAdmin implements AdminSubcommand { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/DefragSubCommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/DefragSubCommand.java new file mode 100644 index 000000000000..6062353d60ba --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/DefragSubCommand.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.om.snapshot; + +import java.io.IOException; +import java.util.concurrent.Callable; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.admin.om.OMAdmin; +import org.apache.hadoop.ozone.om.helpers.OMNodeDetails; +import org.apache.hadoop.ozone.om.protocolPB.OMAdminProtocolClientSideImpl; +import org.apache.hadoop.security.UserGroupInformation; +import picocli.CommandLine; + +/** + * Handler of ozone admin om snapshot defrag command. + */ +@CommandLine.Command( + name = "defrag", + description = "Triggers the Snapshot Defragmentation Service to run " + + "immediately. This command manually initiates the snapshot " + + "defragmentation process which compacts snapshot data and removes " + + "fragmentation to improve storage efficiency. " + + "This command works only on OzoneManager HA cluster.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class DefragSubCommand implements Callable { + + @CommandLine.ParentCommand + private SnapshotSubCommand parent; + + @CommandLine.Option( + names = {"-id", "--service-id"}, + description = "Ozone Manager Service ID" + ) + private String omServiceId; + + @CommandLine.Option( + names = {"--node-id"}, + description = "NodeID of the OM to trigger snapshot defragmentation on.", + required = false + ) + private String nodeId; + + @CommandLine.Option( + names = {"--no-wait"}, + description = "Do not wait for the defragmentation task to complete. " + + "The command will return immediately after triggering the task.", + defaultValue = "false" + ) + private boolean noWait; + + @Override + public Void call() throws Exception { + // Navigate up to get OMAdmin + OMAdmin omAdmin = getOMAdmin(); + OzoneConfiguration conf = omAdmin.getParent().getOzoneConf(); + OMNodeDetails omNodeDetails = OMNodeDetails.getOMNodeDetailsFromConf( + conf, omServiceId, nodeId); + + if (omNodeDetails == null) { + System.err.println("Error: OMNodeDetails could not be determined with given " + + "service ID and node ID."); + return null; + } + + try (OMAdminProtocolClientSideImpl omAdminProtocolClient = createClient(conf, omNodeDetails)) { + execute(omAdminProtocolClient); + } catch (IOException ex) { + System.err.println("Failed to trigger snapshot defragmentation: " + + ex.getMessage()); + throw ex; + } + + return null; + } + + protected OMAdminProtocolClientSideImpl createClient( + OzoneConfiguration conf, OMNodeDetails omNodeDetails) throws IOException { + return OMAdminProtocolClientSideImpl.createProxyForSingleOM(conf, + UserGroupInformation.getCurrentUser(), omNodeDetails); + } + + protected void execute(OMAdminProtocolClientSideImpl omAdminProtocolClient) + throws IOException { + System.out.println("Triggering Snapshot Defrag Service ..."); + boolean result = omAdminProtocolClient.triggerSnapshotDefrag(noWait); + + if (noWait) { + System.out.println("Snapshot defragmentation task has been triggered " + + "successfully and is running in the background."); + } else { + if (result) { + System.out.println("Snapshot defragmentation completed successfully."); + } else { + System.out.println("Snapshot defragmentation task failed or was interrupted."); + } + } + } + + private OMAdmin getOMAdmin() { + // The parent hierarchy is: DefragSubCommand -> SnapshotSubCommand -> OMAdmin + return parent.getParent(); + } +} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/SnapshotSubCommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/SnapshotSubCommand.java new file mode 100644 index 000000000000..48ca9e365ff7 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/SnapshotSubCommand.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.om.snapshot; + +import org.apache.hadoop.ozone.admin.om.OMAdmin; +import picocli.CommandLine; + +/** + * Handler of ozone admin om snapshot command. + */ +@CommandLine.Command( + name = "snapshot", + description = "Command for all snapshot related operations.", + subcommands = { + DefragSubCommand.class + } +) +public class SnapshotSubCommand { + + @CommandLine.ParentCommand + private OMAdmin parent; + + public OMAdmin getParent() { + return parent; + } +} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/package-info.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/package-info.java new file mode 100644 index 000000000000..00fd11817ccb --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Command line for Ozone Manager snapshot operations. + */ +package org.apache.hadoop.ozone.admin.om.snapshot; diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java new file mode 100644 index 000000000000..b816cee2d7b6 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.scm; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import picocli.CommandLine; + +/** + * Subcommand to group container related operations. + */ +@CommandLine.Command( + name = "deletedBlocksTxn", + description = "SCM deleted blocks transaction specific operations", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class, + subcommands = { + GetDeletedBlockSummarySubcommand.class, + }) +public class DeletedBlocksTxnCommands { + +} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetDeletedBlockSummarySubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetDeletedBlockSummarySubcommand.java new file mode 100644 index 000000000000..34c54db27097 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetDeletedBlockSummarySubcommand.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.scm; + +import java.io.IOException; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import picocli.CommandLine; + +/** + * Handler of getting deleted blocks summary from SCM side. + */ +@CommandLine.Command( + name = "summary", + description = "get DeletedBlocksTransaction summary", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class GetDeletedBlockSummarySubcommand extends ScmSubcommand { + + @Override + public void execute(ScmClient client) throws IOException { + HddsProtos.DeletedBlocksTransactionSummary summary = client.getDeletedBlockSummary(); + if (summary == null) { + System.out.println("DeletedBlocksTransaction summary is not available"); + } else { + System.out.println("DeletedBlocksTransaction summary:"); + System.out.println(" Start from tx ID: " + + summary.getFirstTxID()); + System.out.println(" Total number of transactions: " + + summary.getTotalTransactionCount()); + System.out.println(" Total number of blocks: " + + summary.getTotalBlockCount()); + System.out.println(" Total size of blocks: " + + summary.getTotalBlockSize()); + System.out.println(" Total replicated size of blocks: " + + summary.getTotalBlockReplicatedSize()); + } + } +} diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/om/snapshot/TestDefragSubCommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/om/snapshot/TestDefragSubCommand.java new file mode 100644 index 000000000000..105a79f987d8 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/om/snapshot/TestDefragSubCommand.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.om.snapshot; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.om.helpers.OMNodeDetails; +import org.apache.hadoop.ozone.om.protocolPB.OMAdminProtocolClientSideImpl; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import picocli.CommandLine; + +/** + * Unit tests to validate the DefragSubCommand class includes + * the correct output when executed against a mock client. + */ +public class TestDefragSubCommand { + + private TestableDefragSubCommand cmd; + private OMAdminProtocolClientSideImpl omAdminClient; + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); + private final PrintStream originalOut = System.out; + private final PrintStream originalErr = System.err; + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + + /** + * Testable version of DefragSubCommand that allows injecting a mock client. + */ + private static class TestableDefragSubCommand extends DefragSubCommand { + private final OMAdminProtocolClientSideImpl mockClient; + + TestableDefragSubCommand(OMAdminProtocolClientSideImpl mockClient) { + this.mockClient = mockClient; + } + + @Override + protected OMAdminProtocolClientSideImpl createClient( + OzoneConfiguration conf, OMNodeDetails omNodeDetails) { + return mockClient; + } + } + + @BeforeEach + public void setup() throws Exception { + omAdminClient = mock(OMAdminProtocolClientSideImpl.class); + cmd = new TestableDefragSubCommand(omAdminClient); + + // Mock close() to do nothing - needed for try-with-resources + doNothing().when(omAdminClient).close(); + + + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); + } + + @AfterEach + public void tearDown() { + System.setOut(originalOut); + System.setErr(originalErr); + } + + @Test + public void testTriggerSnapshotDefragWithWait() throws Exception { + // Mock the client to return success + when(omAdminClient.triggerSnapshotDefrag(false)).thenReturn(true); + + // Execute the command (default behavior: wait for completion) + CommandLine c = new CommandLine(cmd); + c.parseArgs(); + cmd.execute(omAdminClient); + + // Verify the client method was called with correct parameter + verify(omAdminClient).triggerSnapshotDefrag(eq(false)); + + // Verify output contains success message + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Triggering Snapshot Defrag Service")); + assertTrue(output.contains("Snapshot defragmentation completed successfully")); + } + + @Test + public void testTriggerSnapshotDefragWithWaitFailure() throws Exception { + // Mock the client to return failure + when(omAdminClient.triggerSnapshotDefrag(false)).thenReturn(false); + + // Execute the command + CommandLine c = new CommandLine(cmd); + c.parseArgs(); + cmd.execute(omAdminClient); + + // Verify the client method was called + verify(omAdminClient).triggerSnapshotDefrag(eq(false)); + + // Verify output contains failure message + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Triggering Snapshot Defrag")); + assertTrue(output.contains("Snapshot defragmentation task failed or was interrupted")); + } + + @Test + public void testTriggerSnapshotDefragWithServiceIdAndNodeId() throws Exception { + // Mock the client with both service ID and node ID + when(omAdminClient.triggerSnapshotDefrag(false)).thenReturn(true); + + // Execute the command with service ID and node ID + CommandLine c = new CommandLine(cmd); + c.parseArgs("--service-id", "om-service-1", "--node-id", "om1"); + cmd.execute(omAdminClient); + + // Verify the client method was called + verify(omAdminClient).triggerSnapshotDefrag(eq(false)); + + // Verify success message + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Snapshot defragmentation completed successfully")); + } + + @Test + public void testTriggerSnapshotDefragWithAllOptions() throws Exception { + // Test with service-id, node-id, and no-wait options + when(omAdminClient.triggerSnapshotDefrag(true)).thenReturn(true); + + // Execute the command with multiple options + CommandLine c = new CommandLine(cmd); + c.parseArgs("--service-id", "om-service-1", "--node-id", "om1", "--no-wait"); + cmd.execute(omAdminClient); + + // Verify the client method was called + verify(omAdminClient).triggerSnapshotDefrag(eq(true)); + + // Verify output for background execution + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("triggered successfully and is running in the background")); + } +} + diff --git a/hadoop-ozone/cli-shell/pom.xml b/hadoop-ozone/cli-shell/pom.xml index f7eeee7583c2..89d326efbd31 100644 --- a/hadoop-ozone/cli-shell/pom.xml +++ b/hadoop-ozone/cli-shell/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../../hadoop-hdds/hadoop-dependency-client ozone-cli-shell - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone CLI Shell Apache Ozone CLI Shell diff --git a/hadoop-ozone/client/pom.xml b/hadoop-ozone/client/pom.xml index 126f9a725842..603a87e3fe64 100644 --- a/hadoop-ozone/client/pom.xml +++ b/hadoop-ozone/client/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../../hadoop-hdds/hadoop-dependency-client ozone-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Client Apache Ozone Client diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneSnapshot.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneSnapshot.java index b7bf7051caeb..360fd4cef6da 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneSnapshot.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneSnapshot.java @@ -192,7 +192,7 @@ public static OzoneSnapshot fromSnapshotInfo(SnapshotInfo snapshotInfo) { snapshotInfo.getSnapshotStatus(), snapshotInfo.getSnapshotId(), snapshotInfo.getSnapshotPath(), - snapshotInfo.getCheckpointDir(), + snapshotInfo.getCheckpointDirName(0), snapshotInfo.getReferencedSize(), snapshotInfo.getReferencedReplicatedSize(), snapshotInfo.getExclusiveSize() + snapshotInfo.getExclusiveSizeDeltaFromDirDeepCleaning(), @@ -222,4 +222,22 @@ public int hashCode() { return Objects.hash(volumeName, bucketName, name, creationTime, snapshotStatus, snapshotId, snapshotPath, checkpointDir, referencedSize, referencedReplicatedSize, exclusiveSize, exclusiveReplicatedSize); } + + @Override + public String toString() { + return "OzoneSnapshot{" + + "bucketName='" + bucketName + '\'' + + ", volumeName='" + volumeName + '\'' + + ", name='" + name + '\'' + + ", creationTime=" + creationTime + + ", snapshotStatus=" + snapshotStatus + + ", snapshotId=" + snapshotId + + ", snapshotPath='" + snapshotPath + '\'' + + ", checkpointDir='" + checkpointDir + '\'' + + ", referencedSize=" + referencedSize + + ", referencedReplicatedSize=" + referencedReplicatedSize + + ", exclusiveSize=" + exclusiveSize + + ", exclusiveReplicatedSize=" + exclusiveReplicatedSize + + '}'; + } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index ee4070c9eba4..d4ebf0be1b38 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -18,6 +18,8 @@ package org.apache.hadoop.ozone.client.rpc; import static org.apache.hadoop.ozone.OzoneAcl.LINK_BUCKET_DEFAULT_ACL; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CLIENT_ELASTIC_BYTE_BUFFER_POOL_MAX_SIZE; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CLIENT_ELASTIC_BYTE_BUFFER_POOL_MAX_SIZE_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CLIENT_KEY_PROVIDER_CACHE_EXPIRY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CLIENT_KEY_PROVIDER_CACHE_EXPIRY_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CLIENT_REQUIRED_OM_VERSION_MIN_KEY; @@ -71,6 +73,7 @@ import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.StorageType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -88,7 +91,6 @@ import org.apache.hadoop.hdds.tracing.TracingUtil; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.io.ByteBufferPool; -import org.apache.hadoop.io.ElasticByteBufferPool; import org.apache.hadoop.io.Text; import org.apache.hadoop.ozone.OzoneAcl; import org.apache.hadoop.ozone.OzoneConfigKeys; @@ -110,6 +112,7 @@ import org.apache.hadoop.ozone.client.VolumeArgs; import org.apache.hadoop.ozone.client.io.BlockInputStreamFactory; import org.apache.hadoop.ozone.client.io.BlockInputStreamFactoryImpl; +import org.apache.hadoop.ozone.client.io.BoundedElasticByteBufferPool; import org.apache.hadoop.ozone.client.io.CipherOutputStreamOzone; import org.apache.hadoop.ozone.client.io.ECBlockInputStream; import org.apache.hadoop.ozone.client.io.ECKeyOutputStream; @@ -318,7 +321,11 @@ public void onRemoval( } } }).build(); - this.byteBufferPool = new ElasticByteBufferPool(); + long maxPoolSize = (long) conf.getStorageSize( + OZONE_CLIENT_ELASTIC_BYTE_BUFFER_POOL_MAX_SIZE, + OZONE_CLIENT_ELASTIC_BYTE_BUFFER_POOL_MAX_SIZE_DEFAULT, + StorageUnit.GB); + this.byteBufferPool = new BoundedElasticByteBufferPool(maxPoolSize); this.blockInputStreamFactory = BlockInputStreamFactoryImpl .getInstance(byteBufferPool, ecReconstructExecutor); this.clientMetrics = ContainerClientMetrics.acquire(); diff --git a/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/TestOzoneSnapshot.java b/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/TestOzoneSnapshot.java index 8980e28b59b4..028e937a9c2e 100644 --- a/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/TestOzoneSnapshot.java +++ b/hadoop-ozone/client/src/test/java/org/apache/hadoop/ozone/client/TestOzoneSnapshot.java @@ -19,6 +19,7 @@ import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.when; import java.util.UUID; @@ -40,7 +41,7 @@ private SnapshotInfo getMockedSnapshotInfo(UUID snapshotId) { when(snapshotInfo.getSnapshotStatus()).thenReturn(SNAPSHOT_ACTIVE); when(snapshotInfo.getSnapshotId()).thenReturn(snapshotId); when(snapshotInfo.getSnapshotPath()).thenReturn("volume/bucket"); - when(snapshotInfo.getCheckpointDir()).thenReturn("checkpointDir"); + when(snapshotInfo.getCheckpointDirName(eq(0))).thenReturn("checkpointDir"); when(snapshotInfo.getReferencedSize()).thenReturn(1000L); when(snapshotInfo.getReferencedReplicatedSize()).thenReturn(3000L); when(snapshotInfo.getExclusiveSize()).thenReturn(4000L); diff --git a/hadoop-ozone/common/pom.xml b/hadoop-ozone/common/pom.xml index 1ecafebb8b3f..afbb9c4f14f8 100644 --- a/hadoop-ozone/common/pom.xml +++ b/hadoop-ozone/common/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../../hadoop-hdds/hadoop-dependency-client ozone-common - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Common Apache Ozone Common diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java index cbc3709ea1e8..a26422cd81fb 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java @@ -71,7 +71,6 @@ public final class SnapshotInfo implements Auditable, CopyObject { private UUID pathPreviousSnapshotId; private UUID globalPreviousSnapshotId; private String snapshotPath; // snapshot mask - private String checkpointDir; /** * RocksDB's transaction sequence number at the time of checkpoint creation. */ @@ -99,7 +98,6 @@ private SnapshotInfo(Builder b) { this.pathPreviousSnapshotId = b.pathPreviousSnapshotId; this.globalPreviousSnapshotId = b.globalPreviousSnapshotId; this.snapshotPath = b.snapshotPath; - this.checkpointDir = b.checkpointDir; this.dbTxSequenceNumber = b.dbTxSequenceNumber; this.deepClean = b.deepClean; this.sstFiltered = b.sstFiltered; @@ -150,10 +148,6 @@ public void setSnapshotPath(String snapshotPath) { this.snapshotPath = snapshotPath; } - public void setCheckpointDir(String checkpointDir) { - this.checkpointDir = checkpointDir; - } - public boolean isDeepCleaned() { return deepClean; } @@ -202,10 +196,6 @@ public String getSnapshotPath() { return snapshotPath; } - public String getCheckpointDir() { - return checkpointDir; - } - public boolean isSstFiltered() { return sstFiltered; } @@ -231,7 +221,6 @@ public SnapshotInfo.Builder toBuilder() { .setPathPreviousSnapshotId(pathPreviousSnapshotId) .setGlobalPreviousSnapshotId(globalPreviousSnapshotId) .setSnapshotPath(snapshotPath) - .setCheckpointDir(checkpointDir) .setDbTxSequenceNumber(dbTxSequenceNumber) .setDeepClean(deepClean) .setSstFiltered(sstFiltered) @@ -260,7 +249,6 @@ public static class Builder { private UUID pathPreviousSnapshotId; private UUID globalPreviousSnapshotId; private String snapshotPath; - private String checkpointDir; private long dbTxSequenceNumber; private boolean deepClean; private boolean sstFiltered; @@ -339,12 +327,6 @@ public Builder setSnapshotPath(String snapshotPath) { return this; } - /** @param checkpointDir - Snapshot checkpoint directory. */ - public Builder setCheckpointDir(String checkpointDir) { - this.checkpointDir = checkpointDir; - return this; - } - /** @param dbTxSequenceNumber - RDB latest transaction sequence number. */ public Builder setDbTxSequenceNumber(long dbTxSequenceNumber) { this.dbTxSequenceNumber = dbTxSequenceNumber; @@ -459,7 +441,6 @@ public OzoneManagerProtocolProtos.SnapshotInfo getProtobuf() { } sib.setSnapshotPath(snapshotPath) - .setCheckpointDir(checkpointDir) .setDbTxSequenceNumber(dbTxSequenceNumber) .setDeepClean(deepClean); return sib.build(); @@ -544,7 +525,6 @@ public static SnapshotInfo getFromProtobuf( } osib.setSnapshotPath(snapshotInfoProto.getSnapshotPath()) - .setCheckpointDir(snapshotInfoProto.getCheckpointDir()) .setDbTxSequenceNumber(snapshotInfoProto.getDbTxSequenceNumber()); return osib.build(); @@ -562,17 +542,20 @@ public Map toAuditMap() { /** * Get the name of the checkpoint directory. */ - public static String getCheckpointDirName(UUID snapshotId) { + public static String getCheckpointDirName(UUID snapshotId, int version) { Objects.requireNonNull(snapshotId, "SnapshotId is needed to create checkpoint directory"); - return OM_SNAPSHOT_SEPARATOR + snapshotId; + if (version == 0) { + return OM_SNAPSHOT_SEPARATOR + snapshotId; + } + return OM_SNAPSHOT_SEPARATOR + snapshotId + OM_SNAPSHOT_SEPARATOR + version; } /** * Get the name of the checkpoint directory, (non-static). */ - public String getCheckpointDirName() { - return getCheckpointDirName(getSnapshotId()); + public String getCheckpointDirName(int version) { + return getCheckpointDirName(getSnapshotId(), version); } public long getDbTxSequenceNumber() { @@ -703,10 +686,6 @@ public static SnapshotInfo newInstance(String volumeName, .setBucketName(bucketName) .setDeepClean(false) .setDeepCleanedDeletedDir(false); - - if (snapshotId != null) { - builder.setCheckpointDir(getCheckpointDirName(snapshotId)); - } return builder.build(); } @@ -729,7 +708,6 @@ public boolean equals(Object o) { Objects.equals( globalPreviousSnapshotId, that.globalPreviousSnapshotId) && snapshotPath.equals(that.snapshotPath) && - checkpointDir.equals(that.checkpointDir) && deepClean == that.deepClean && sstFiltered == that.sstFiltered && referencedSize == that.referencedSize && @@ -746,7 +724,7 @@ public int hashCode() { return Objects.hash(snapshotId, name, volumeName, bucketName, snapshotStatus, creationTime, deletionTime, pathPreviousSnapshotId, - globalPreviousSnapshotId, snapshotPath, checkpointDir, + globalPreviousSnapshotId, snapshotPath, deepClean, sstFiltered, referencedSize, referencedReplicatedSize, exclusiveSize, exclusiveReplicatedSize, deepCleanedDeletedDir, lastTransactionInfo, createTransactionInfo); @@ -773,7 +751,6 @@ public String toString() { ", pathPreviousSnapshotId: '" + pathPreviousSnapshotId + '\'' + ", globalPreviousSnapshotId: '" + globalPreviousSnapshotId + '\'' + ", snapshotPath: '" + snapshotPath + '\'' + - ", checkpointDir: '" + checkpointDir + '\'' + ", dbTxSequenceNumber: '" + dbTxSequenceNumber + '\'' + ", deepClean: '" + deepClean + '\'' + ", sstFiltered: '" + sstFiltered + '\'' + diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/FlatResource.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/FlatResource.java index 73f8357252f2..45534197866d 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/FlatResource.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/FlatResource.java @@ -26,7 +26,12 @@ public enum FlatResource implements Resource { // Background services lock on a Snapshot. SNAPSHOT_GC_LOCK("SNAPSHOT_GC_LOCK"), // Lock acquired on a Snapshot's RocksDB Handle. - SNAPSHOT_DB_LOCK("SNAPSHOT_DB_LOCK"); + SNAPSHOT_DB_LOCK("SNAPSHOT_DB_LOCK"), + // Lock acquired on a Snapshot's Local Data. + SNAPSHOT_LOCAL_DATA_LOCK("SNAPSHOT_LOCAL_DATA_LOCK"), + // Lock acquired on a Snapshot's RocksDB contents. + SNAPSHOT_DB_CONTENT_LOCK("SNAPSHOT_DB_CONTENT_LOCK"); + private String name; private IOzoneManagerLock.ResourceManager resourceManager; diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java index 8588620074d1..cb6baf79fe7e 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java @@ -45,4 +45,12 @@ public interface OMAdminProtocol extends Closeable { * @param columnFamily */ void compactOMDB(String columnFamily) throws IOException; + + /** + * Triggers the Snapshot Defragmentation Service to run immediately. + * @param noWait if true, return immediately without waiting for completion + * @return true if defragmentation completed successfully (when noWait is false), + * or if the task was triggered successfully (when noWait is true) + */ + boolean triggerSnapshotDefrag(boolean noWait) throws IOException; } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java index f7d22713b329..7ae8a30b73af 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java @@ -47,6 +47,8 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMNodeInfo; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.TriggerSnapshotDefragRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.TriggerSnapshotDefragResponse; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -232,6 +234,32 @@ public void compactOMDB(String columnFamily) throws IOException { } } + @Override + public boolean triggerSnapshotDefrag(boolean noWait) throws IOException { + TriggerSnapshotDefragRequest request = TriggerSnapshotDefragRequest.newBuilder() + .setNoWait(noWait) + .build(); + TriggerSnapshotDefragResponse response; + try { + response = rpcProxy.triggerSnapshotDefrag(NULL_RPC_CONTROLLER, request); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + if (!response.getSuccess()) { + throwException("Request to trigger snapshot defragmentation" + + ", sent to " + omPrintInfo + " failed with error: " + + response.getErrorMsg()); + } + if (response.hasResult()) { + return response.getResult(); + } else { + throwException("Missing result in TriggerSnapshotDefragResponse from " + omPrintInfo + + ". This likely indicates a server error."); + // Unreachable, required for compilation + return false; + } + } + private void throwException(String errorMsg) throws IOException { throw new IOException("Request Failed. Error: " + errorMsg); diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java index b08c041fd56c..9aea06fd7969 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/TestOmUtils.java @@ -31,6 +31,7 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.io.File; @@ -257,5 +258,72 @@ public void testGetActiveNonListenerOMNodeIdsFiltering() { assertEquals(expected.size(), result.size()); assertTrue(result.containsAll(expected)); } -} + @Test + void testGetOMEpoch() { + assertEquals(2, OmUtils.getOMEpoch()); + assertEquals(OmUtils.EPOCH_WHEN_RATIS_ENABLED, OmUtils.getOMEpoch()); + } + + @Test + void testAddEpochToTxId() { + assertEquals(0L, OmUtils.addEpochToTxId(0, 0)); + assertEquals(1L << 62, OmUtils.addEpochToTxId(1, 0)); + assertEquals(2L << 62, OmUtils.addEpochToTxId(2, 0)); + assertEquals(3L << 62, OmUtils.addEpochToTxId(3, 0)); + + long txId = 12345L; + long expected = (2L << 62) | (txId << 8); + assertEquals(expected, OmUtils.addEpochToTxId(2, txId)); + + long maxTxId = OmUtils.MAX_TRXN_ID; + long maxExpected = (2L << 62) | (maxTxId << 8); + assertEquals(maxExpected, OmUtils.addEpochToTxId(2, maxTxId)); + + // Verify bit structure + long result = OmUtils.addEpochToTxId(2, 0x123456789ABCDL); + assertEquals(2L, result >>> 62); + assertEquals(0x123456789ABCDL, (result & 0x3FFFFFFFFFFFFFFFL) >>> 8); + } + + // Intentionally no tests for getTxIdFromObjectId(); this helper is not + // used in production paths and may be removed in the future. + + @Test + void testGetObjectIdFromTxId() { + long txId = 12345L; + long epoch = 2L; + long expected = OmUtils.addEpochToTxId(epoch, txId); + assertEquals(expected, OmUtils.getObjectIdFromTxId(epoch, txId)); + + for (long e = 0; e <= 3; e++) { + long result = OmUtils.getObjectIdFromTxId(e, txId); + assertEquals(e, result >>> 62); + assertEquals(txId, (result & 0x3FFFFFFFFFFFFFFFL) >>> 8); + } + + long maxTxId = OmUtils.MAX_TRXN_ID; + long maxResult = OmUtils.getObjectIdFromTxId(epoch, maxTxId); + assertEquals(epoch, maxResult >>> 62); + assertEquals(maxTxId, (maxResult & 0x3FFFFFFFFFFFFFFFL) >>> 8); + } + + @Test + void testGetObjectIdFromTxIdValidation() { + long validTxId = OmUtils.MAX_TRXN_ID; + // Test valid case - should not throw exception + try { + OmUtils.getObjectIdFromTxId(2, validTxId); + } catch (Exception e) { + fail("Valid txId should not throw exception: " + e.getMessage()); + } + + long invalidTxId = (1L << 54) - 1; // MAX_TRXN_ID + 1 + IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, + () -> OmUtils.getObjectIdFromTxId(2, invalidTxId)); + assertTrue(exception.getMessage().contains("TransactionID exceeds max limit")); + } + + // Consistency checks between epoch and txId are covered by + // testAddEpochToTxId() and testGetObjectIdFromTxId(). +} diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOMNodeDetails.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOMNodeDetails.java new file mode 100644 index 000000000000..9a9951c20a29 --- /dev/null +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOMNodeDetails.java @@ -0,0 +1,429 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.helpers; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.URL; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.ha.ConfUtils; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.NodeState; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMNodeInfo; +import org.junit.jupiter.api.Test; + +/** + * Test OMNodeDetails. + */ +public class TestOMNodeDetails { + + private static final String OM_SERVICE_ID = "om-service"; + private static final String OM_NODE_ID = "om-01"; + private static final String HOST_ADDRESS = "localhost"; + private static final int RPC_PORT = 9862; + private static final int RATIS_PORT = 9873; + private static final String HTTP_ADDRESS = "0.0.0.0:9874"; + private static final String HTTPS_ADDRESS = "0.0.0.0:9875"; + + /** + * Test builder with InetSocketAddress. + */ + @Test + public void testBuilderWithInetSocketAddress() { + InetSocketAddress rpcAddr = new InetSocketAddress(HOST_ADDRESS, RPC_PORT); + + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setRpcAddress(rpcAddr) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .setIsListener(false) + .build(); + + assertEquals(OM_SERVICE_ID, nodeDetails.getServiceId()); + assertEquals(OM_NODE_ID, nodeDetails.getNodeId()); + assertEquals(RPC_PORT, nodeDetails.getRpcPort()); + assertEquals(RATIS_PORT, nodeDetails.getRatisPort()); + assertEquals(HTTP_ADDRESS, nodeDetails.getHttpAddress()); + assertEquals(HTTPS_ADDRESS, nodeDetails.getHttpsAddress()); + assertEquals(HOST_ADDRESS, nodeDetails.getHostAddress()); + } + + /** + * Test builder with host address string. + */ + @Test + public void testBuilderWithHostAddressString() { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .build(); + + assertEquals(OM_SERVICE_ID, nodeDetails.getServiceId()); + assertEquals(OM_NODE_ID, nodeDetails.getNodeId()); + assertEquals(RPC_PORT, nodeDetails.getRpcPort()); + assertEquals(RATIS_PORT, nodeDetails.getRatisPort()); + } + + /** + * Test isRatisListener flag. + */ + @Test + public void testRatisListenerFlag() { + OMNodeDetails nonListener = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .setIsListener(false) + .build(); + + assertFalse(nonListener.isRatisListener()); + + OMNodeDetails listener = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID + "-listener") + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT + 1) + .setRatisPort(RATIS_PORT + 1) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .setIsListener(true) + .build(); + + assertTrue(listener.isRatisListener()); + + nonListener.setRatisListener(); + assertTrue(nonListener.isRatisListener()); + } + + /** + * Test decommissioned state. + */ + @Test + public void testDecommissionedState() { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .build(); + + assertFalse(nodeDetails.isDecommissioned()); + + nodeDetails.setDecommissioningState(); + assertTrue(nodeDetails.isDecommissioned()); + } + + /** + * Test toString method. + */ + @Test + public void testToString() { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .setIsListener(true) + .build(); + + String result = nodeDetails.toString(); + assertTrue(result.contains("omServiceId=" + OM_SERVICE_ID)); + assertTrue(result.contains("omNodeId=" + OM_NODE_ID)); + assertTrue(result.contains("rpcPort=" + RPC_PORT)); + assertTrue(result.contains("ratisPort=" + RATIS_PORT)); + assertTrue(result.contains("httpAddress=" + HTTP_ADDRESS)); + assertTrue(result.contains("httpsAddress=" + HTTPS_ADDRESS)); + assertTrue(result.contains("isListener=true")); + } + + /** + * Test getOMPrintInfo method. + */ + @Test + public void testGetOMPrintInfo() { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .build(); + + String result = nodeDetails.getOMPrintInfo(); + assertEquals(OM_NODE_ID + "[" + HOST_ADDRESS + ":" + RPC_PORT + "]", result); + } + + /** + * Test getRpcPort method. + */ + @Test + public void testGetRpcPort() { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .build(); + + assertEquals(RPC_PORT, nodeDetails.getRpcPort()); + } + + /** + * Test protobuf conversion for active node. + */ + @Test + public void testProtobufConversionActiveNode() { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .setIsListener(false) + .build(); + + OMNodeInfo protobuf = nodeDetails.getProtobuf(); + + assertEquals(OM_NODE_ID, protobuf.getNodeID()); + assertEquals(HOST_ADDRESS, protobuf.getHostAddress()); + assertEquals(RPC_PORT, protobuf.getRpcPort()); + assertEquals(RATIS_PORT, protobuf.getRatisPort()); + assertEquals(NodeState.ACTIVE, protobuf.getNodeState()); + assertFalse(protobuf.getIsListener()); + + OMNodeDetails restored = OMNodeDetails.getFromProtobuf(protobuf); + assertEquals(nodeDetails.getNodeId(), restored.getNodeId()); + assertEquals(nodeDetails.getHostAddress(), restored.getHostAddress()); + assertEquals(nodeDetails.getRpcPort(), restored.getRpcPort()); + assertEquals(nodeDetails.getRatisPort(), restored.getRatisPort()); + assertEquals(nodeDetails.isDecommissioned(), restored.isDecommissioned()); + assertEquals(nodeDetails.isRatisListener(), restored.isRatisListener()); + } + + /** + * Test protobuf conversion for decommissioned node. + */ + @Test + public void testProtobufConversionDecommissionedNode() { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .build(); + + nodeDetails.setDecommissioningState(); + + OMNodeInfo protobuf = nodeDetails.getProtobuf(); + assertEquals(NodeState.DECOMMISSIONED, protobuf.getNodeState()); + + OMNodeDetails restored = OMNodeDetails.getFromProtobuf(protobuf); + assertTrue(restored.isDecommissioned()); + } + + /** + * Test protobuf conversion for listener node. + */ + @Test + public void testProtobufConversionListenerNode() { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .setIsListener(true) + .build(); + + OMNodeInfo protobuf = nodeDetails.getProtobuf(); + assertTrue(protobuf.getIsListener()); + + OMNodeDetails restored = OMNodeDetails.getFromProtobuf(protobuf); + assertTrue(restored.isRatisListener()); + } + + /** + * Test getOMDBCheckpointEndpointUrl for HTTP. + */ + @Test + public void testGetOMDBCheckpointEndpointUrlHttp() throws IOException { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HOST_ADDRESS + ":9874") + .setHttpsAddress(HOST_ADDRESS + ":9875") + .build(); + + URL urlWithoutFlush = nodeDetails.getOMDBCheckpointEndpointUrl(true, false); + assertNotNull(urlWithoutFlush); + assertEquals("http", urlWithoutFlush.getProtocol()); + assertEquals(HOST_ADDRESS + ":9874", urlWithoutFlush.getAuthority()); + assertNotNull(urlWithoutFlush.getQuery()); + assertTrue(urlWithoutFlush.getQuery().contains("flushBeforeCheckpoint=false")); + + URL urlWithFlush = nodeDetails.getOMDBCheckpointEndpointUrl(true, true); + assertNotNull(urlWithFlush); + assertTrue(urlWithFlush.getQuery().contains("flushBeforeCheckpoint=true")); + assertTrue(urlWithFlush.getQuery().contains("includeSnapshotData=true")); + } + + /** + * Test getOMDBCheckpointEndpointUrl for HTTPS. + */ + @Test + public void testGetOMDBCheckpointEndpointUrlHttps() throws IOException { + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setHostAddress(HOST_ADDRESS) + .setRpcPort(RPC_PORT) + .setRatisPort(RATIS_PORT) + .setHttpAddress(HOST_ADDRESS + ":9874") + .setHttpsAddress(HOST_ADDRESS + ":9875") + .build(); + + URL url = nodeDetails.getOMDBCheckpointEndpointUrl(false, false); + assertNotNull(url); + assertEquals("https", url.getProtocol()); + assertEquals(HOST_ADDRESS + ":9875", url.getAuthority()); + } + + /** + * Test getOMNodeAddressFromConf. + */ + @Test + public void testGetOMNodeAddressFromConf() { + OzoneConfiguration conf = new OzoneConfiguration(); + + String configKey = "ozone.om.address.om-service.om-01"; + String expectedAddress = "localhost:9862"; + conf.set(configKey, expectedAddress); + + String address = OMNodeDetails.getOMNodeAddressFromConf(conf, "om-service", "om-01"); + assertEquals(expectedAddress, address); + + String missingAddress = OMNodeDetails.getOMNodeAddressFromConf(conf, "nonexistent", "node"); + assertNull(missingAddress); + } + + /** + * Test getOMNodeDetailsFromConf with valid configuration. + */ + @Test + public void testGetOMNodeDetailsFromConfValid() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + + String serviceId = "om-service"; + String nodeId = "om-01"; + + conf.set(ConfUtils.addKeySuffixes(OMConfigKeys.OZONE_OM_ADDRESS_KEY, serviceId, nodeId), + "localhost:9862"); + conf.set(ConfUtils.addKeySuffixes(OMConfigKeys.OZONE_OM_RATIS_PORT_KEY, serviceId, nodeId), + "9873"); + conf.set(ConfUtils.addKeySuffixes(OMConfigKeys.OZONE_OM_HTTP_ADDRESS_KEY, serviceId, nodeId), + "localhost:9874"); + conf.set(ConfUtils.addKeySuffixes(OMConfigKeys.OZONE_OM_HTTPS_ADDRESS_KEY, serviceId, nodeId), + "localhost:9875"); + + OMNodeDetails nodeDetails = OMNodeDetails.getOMNodeDetailsFromConf(conf, serviceId, nodeId); + + assertNotNull(nodeDetails); + assertEquals(serviceId, nodeDetails.getServiceId()); + assertEquals(nodeId, nodeDetails.getNodeId()); + assertEquals(9862, nodeDetails.getRpcPort()); + assertEquals(9873, nodeDetails.getRatisPort()); + } + + /** + * Test getOMNodeDetailsFromConf with missing configuration. + */ + @Test + public void testGetOMNodeDetailsFromConfMissing() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + + OMNodeDetails nodeDetails = OMNodeDetails.getOMNodeDetailsFromConf(conf, "nonexistent", "node"); + assertNull(nodeDetails); + + String serviceId = "om-service"; + String nodeId = "om-01"; + + nodeDetails = OMNodeDetails.getOMNodeDetailsFromConf(conf, serviceId, null); + assertNull(nodeDetails); + nodeDetails = OMNodeDetails.getOMNodeDetailsFromConf(conf, null, nodeId); + assertNull(nodeDetails); + } + + /** + * Test setRatisAddress in builder. + */ + @Test + public void testSetRatisAddress() { + InetSocketAddress ratisAddr = new InetSocketAddress("192.168.1.100", 9873); + + OMNodeDetails nodeDetails = new OMNodeDetails.Builder() + .setOMServiceId(OM_SERVICE_ID) + .setOMNodeId(OM_NODE_ID) + .setRatisAddress(ratisAddr) + .setRpcPort(RPC_PORT) + .setHttpAddress(HTTP_ADDRESS) + .setHttpsAddress(HTTPS_ADDRESS) + .build(); + + assertEquals("192.168.1.100", nodeDetails.getHostAddress()); + assertEquals(9873, nodeDetails.getRatisPort()); + } +} diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmSnapshotInfo.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmSnapshotInfo.java index 98cc035b3c07..e7695debd619 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmSnapshotInfo.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmSnapshotInfo.java @@ -45,7 +45,6 @@ public class TestOmSnapshotInfo { private static final UUID GLOBAL_PREVIOUS_SNAPSHOT_ID = PATH_PREVIOUS_SNAPSHOT_ID; private static final String SNAPSHOT_PATH = "test/path"; - private static final String CHECKPOINT_DIR = "checkpoint.testdir"; private static final long DB_TX_SEQUENCE_NUMBER = 12345L; private SnapshotInfo createSnapshotInfo() { @@ -60,7 +59,6 @@ private SnapshotInfo createSnapshotInfo() { .setPathPreviousSnapshotId(PATH_PREVIOUS_SNAPSHOT_ID) .setGlobalPreviousSnapshotId(GLOBAL_PREVIOUS_SNAPSHOT_ID) .setSnapshotPath(SNAPSHOT_PATH) - .setCheckpointDir(CHECKPOINT_DIR) .setDbTxSequenceNumber(DB_TX_SEQUENCE_NUMBER) .setDeepClean(false) .setSstFiltered(false) @@ -86,7 +84,6 @@ private OzoneManagerProtocolProtos.SnapshotInfo createSnapshotInfoProto() { .setPathPreviousSnapshotID(toProtobuf(PATH_PREVIOUS_SNAPSHOT_ID)) .setGlobalPreviousSnapshotID(toProtobuf(GLOBAL_PREVIOUS_SNAPSHOT_ID)) .setSnapshotPath(SNAPSHOT_PATH) - .setCheckpointDir(CHECKPOINT_DIR) .setDbTxSequenceNumber(DB_TX_SEQUENCE_NUMBER) .setDeepClean(false) .setSstFiltered(false) diff --git a/hadoop-ozone/csi/pom.xml b/hadoop-ozone/csi/pom.xml index 511c9b08cea2..9c44a8809853 100644 --- a/hadoop-ozone/csi/pom.xml +++ b/hadoop-ozone/csi/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-csi - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone CSI service Apache Ozone CSI service diff --git a/hadoop-ozone/datanode/pom.xml b/hadoop-ozone/datanode/pom.xml index 60c3bfac2ae4..a91604198157 100644 --- a/hadoop-ozone/datanode/pom.xml +++ b/hadoop-ozone/datanode/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-datanode - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Datanode diff --git a/hadoop-ozone/dist/pom.xml b/hadoop-ozone/dist/pom.xml index b5a62d4835d9..ceb45e8c9860 100644 --- a/hadoop-ozone/dist/pom.xml +++ b/hadoop-ozone/dist/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-dist - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Distribution diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/mpu_lib.robot b/hadoop-ozone/dist/src/main/smoketest/s3/mpu_lib.robot index 0aaa0affec1d..fed0c539a074 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/mpu_lib.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/mpu_lib.robot @@ -42,6 +42,7 @@ Upload MPU part IF '${expected_rc}' == '0' Should contain ${result} ETag ${etag} = Execute echo '${result}' | jq -r '.ETag' + ${etag} = Replace String ${etag} \" ${EMPTY} ${md5sum} = Execute md5sum ${file} | awk '{print $1}' Should Be Equal As Strings ${etag} ${md5sum} RETURN ${etag} diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/pom.xml b/hadoop-ozone/fault-injection-test/mini-chaos-tests/pom.xml index bf7a1636749e..97d3076e889e 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/pom.xml +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone ozone-fault-injection-test - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT mini-chaos-tests - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT Apache Ozone Mini Ozone Chaos Tests Apache Ozone Mini Ozone Chaos Tests diff --git a/hadoop-ozone/fault-injection-test/network-tests/pom.xml b/hadoop-ozone/fault-injection-test/network-tests/pom.xml index 878efae01349..75b265ee0aad 100644 --- a/hadoop-ozone/fault-injection-test/network-tests/pom.xml +++ b/hadoop-ozone/fault-injection-test/network-tests/pom.xml @@ -17,7 +17,7 @@ org.apache.ozone ozone-fault-injection-test - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-network-tests jar diff --git a/hadoop-ozone/fault-injection-test/pom.xml b/hadoop-ozone/fault-injection-test/pom.xml index 3ba13e168546..1651e7e1529e 100644 --- a/hadoop-ozone/fault-injection-test/pom.xml +++ b/hadoop-ozone/fault-injection-test/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-fault-injection-test - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT pom Apache Ozone Fault Injection Tests Apache Ozone Fault Injection Tests diff --git a/hadoop-ozone/freon/pom.xml b/hadoop-ozone/freon/pom.xml index bceadd99f8ca..c0cf09fdb34c 100644 --- a/hadoop-ozone/freon/pom.xml +++ b/hadoop-ozone/freon/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-freon - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Freon Apache Ozone Freon diff --git a/hadoop-ozone/httpfsgateway/pom.xml b/hadoop-ozone/httpfsgateway/pom.xml index 058b60c5a0f4..d4df054b4da4 100644 --- a/hadoop-ozone/httpfsgateway/pom.xml +++ b/hadoop-ozone/httpfsgateway/pom.xml @@ -19,10 +19,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-httpfsgateway - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone HttpFS diff --git a/hadoop-ozone/insight/pom.xml b/hadoop-ozone/insight/pom.xml index 471f9e09a729..fd64cd841f6e 100644 --- a/hadoop-ozone/insight/pom.xml +++ b/hadoop-ozone/insight/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-insight - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Insight Tool Apache Ozone Insight Tool diff --git a/hadoop-ozone/integration-test-recon/pom.xml b/hadoop-ozone/integration-test-recon/pom.xml index 45d8a3ee2486..a26835c4a6af 100644 --- a/hadoop-ozone/integration-test-recon/pom.xml +++ b/hadoop-ozone/integration-test-recon/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-integration-test-recon - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Recon Integration Tests Apache Ozone Integration Tests with Recon diff --git a/hadoop-ozone/integration-test-s3/pom.xml b/hadoop-ozone/integration-test-s3/pom.xml index 1c41eee0d6d0..30eb3db975f0 100644 --- a/hadoop-ozone/integration-test-s3/pom.xml +++ b/hadoop-ozone/integration-test-s3/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-integration-test-s3 - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone S3 Integration Tests Apache Ozone Integration Tests with S3 Gateway diff --git a/hadoop-ozone/integration-test-s3/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v1/AbstractS3SDKV1Tests.java b/hadoop-ozone/integration-test-s3/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v1/AbstractS3SDKV1Tests.java index 00b482629932..016ab60537fb 100644 --- a/hadoop-ozone/integration-test-s3/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v1/AbstractS3SDKV1Tests.java +++ b/hadoop-ozone/integration-test-s3/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v1/AbstractS3SDKV1Tests.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.ozone.s3.awssdk.S3SDKTestUtils.calculateDigest; import static org.apache.hadoop.ozone.s3.awssdk.S3SDKTestUtils.createFile; import static org.apache.hadoop.ozone.s3.util.S3Consts.CUSTOM_METADATA_HEADER_PREFIX; +import static org.apache.hadoop.ozone.s3.util.S3Utils.stripQuotes; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -1231,7 +1232,7 @@ private void completeMPU(String keyName, String uploadId, List complet for (PartETag part : completedParts) { completionXml.append(" \n"); completionXml.append(" ").append(part.getPartNumber()).append("\n"); - completionXml.append(" ").append(part.getETag()).append("\n"); + completionXml.append(" ").append(stripQuotes(part.getETag())).append("\n"); completionXml.append(" \n"); } completionXml.append(""); diff --git a/hadoop-ozone/integration-test-s3/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v2/AbstractS3SDKV2Tests.java b/hadoop-ozone/integration-test-s3/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v2/AbstractS3SDKV2Tests.java index 925e2e75df5a..119849281acc 100644 --- a/hadoop-ozone/integration-test-s3/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v2/AbstractS3SDKV2Tests.java +++ b/hadoop-ozone/integration-test-s3/src/test/java/org/apache/hadoop/ozone/s3/awssdk/v2/AbstractS3SDKV2Tests.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.ozone.OzoneConsts.MB; import static org.apache.hadoop.ozone.s3.awssdk.S3SDKTestUtils.calculateDigest; import static org.apache.hadoop.ozone.s3.awssdk.S3SDKTestUtils.createFile; +import static org.apache.hadoop.ozone.s3.util.S3Utils.stripQuotes; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -970,7 +971,7 @@ private String buildCompleteMultipartUploadXml(List parts) { for (CompletedPart part : parts) { xml.append(" \n"); xml.append(" ").append(part.partNumber()).append("\n"); - xml.append(" ").append(part.eTag()).append("\n"); + xml.append(" ").append(stripQuotes(part.eTag())).append("\n"); xml.append(" \n"); } xml.append(""); @@ -1142,11 +1143,11 @@ private List uploadParts(String bucketName, String key, String up RequestBody.fromByteBuffer(bb)); assertEquals(DatatypeConverter.printHexBinary( - calculateDigest(fileInputStream, 0, partSize)).toLowerCase(), partResponse.eTag()); + calculateDigest(fileInputStream, 0, partSize)).toLowerCase(), stripQuotes(partResponse.eTag())); CompletedPart part = CompletedPart.builder() .partNumber(partNumber) - .eTag(partResponse.eTag()) + .eTag(stripQuotes(partResponse.eTag())) .build(); completedParts.add(part); @@ -1643,7 +1644,7 @@ public void testCompleteMultipartUpload() { CompletedMultipartUpload completedUpload = CompletedMultipartUpload.builder() .parts( - CompletedPart.builder().partNumber(1).eTag(uploadPartResponse.eTag()).build() + CompletedPart.builder().partNumber(1).eTag(stripQuotes(uploadPartResponse.eTag())).build() ).build(); diff --git a/hadoop-ozone/integration-test/pom.xml b/hadoop-ozone/integration-test/pom.xml index d70cea608970..df9da45b3b6d 100644 --- a/hadoop-ozone/integration-test/pom.xml +++ b/hadoop-ozone/integration-test/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-integration-test - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Integration Tests Apache Ozone Integration Tests diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsSnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsSnapshot.java index d02319a4cab6..6a97796af32b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsSnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFsSnapshot.java @@ -547,7 +547,7 @@ private String createSnapshot() throws Exception { SnapshotInfo snapshotInfo = ozoneManager.getMetadataManager() .getSnapshotInfoTable() .get(SnapshotInfo.getTableKey(VOLUME, BUCKET, snapshotName)); - String snapshotDirName = getSnapshotPath(conf, snapshotInfo) + + String snapshotDirName = getSnapshotPath(conf, snapshotInfo, 0) + OM_KEY_PREFIX + "CURRENT"; GenericTestUtils.waitFor(() -> new File(snapshotDirName).exists(), 1000, 100000); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index e407e30bffc6..7276dc871eac 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -25,6 +25,7 @@ import static org.apache.hadoop.hdds.scm.HddsTestUtils.mockRemoteUser; import static org.apache.hadoop.hdds.scm.HddsWhiteboxTestUtils.setInternalState; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.common.BlockGroup.SIZE_NOT_AVAILABLE; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -64,11 +65,13 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeID; @@ -118,6 +121,7 @@ import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.OzoneTestUtils; import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.ozone.container.ContainerTestHelper; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; @@ -267,7 +271,7 @@ private void testBlockDeletionTransactions(MiniOzoneCluster cluster) throws Exce OzoneTestUtils.closeContainers(keyInfo.getKeyLocationVersions(), cluster.getStorageContainerManager()); } - Map> containerBlocks = createDeleteTXLog( + Map> containerBlocks = createDeleteTXLog( cluster.getStorageContainerManager(), delLog, keyLocations, cluster); @@ -285,10 +289,12 @@ private void testBlockDeletionTransactions(MiniOzoneCluster cluster) throws Exce // but unknown block IDs. for (Long containerID : containerBlocks.keySet()) { // Add 2 TXs per container. - Map> deletedBlocks = new HashMap<>(); - List blocks = new ArrayList<>(); - blocks.add(RandomUtils.secure().randomLong()); - blocks.add(RandomUtils.secure().randomLong()); + Map> deletedBlocks = new HashMap<>(); + List blocks = new ArrayList<>(); + blocks.add(new DeletedBlock(new BlockID(containerID, RandomUtils.secure().randomLong()), + SIZE_NOT_AVAILABLE, SIZE_NOT_AVAILABLE)); + blocks.add(new DeletedBlock(new BlockID(containerID, RandomUtils.secure().randomLong()), + SIZE_NOT_AVAILABLE, SIZE_NOT_AVAILABLE)); deletedBlocks.put(containerID, blocks); addTransactions(cluster.getStorageContainerManager(), delLog, deletedBlocks); @@ -464,7 +470,7 @@ public void testBlockDeletingThrottling() throws Exception { } } - private Map> createDeleteTXLog( + private Map> createDeleteTXLog( StorageContainerManager scm, DeletedBlockLog delLog, Map keyLocations, MiniOzoneCluster cluster) @@ -489,17 +495,17 @@ private Map> createDeleteTXLog( getAllBlocks(cluster, containerNames).size()); // Create a deletion TX for each key. - Map> containerBlocks = Maps.newHashMap(); + Map> containerBlocks = Maps.newHashMap(); for (OmKeyInfo info : keyLocations.values()) { List list = info.getLatestVersionLocations().getLocationList(); list.forEach(location -> { if (containerBlocks.containsKey(location.getContainerID())) { containerBlocks.get(location.getContainerID()) - .add(location.getBlockID().getLocalID()); + .add(new DeletedBlock(location.getBlockID(), SIZE_NOT_AVAILABLE, SIZE_NOT_AVAILABLE)); } else { - List blks = Lists.newArrayList(); - blks.add(location.getBlockID().getLocalID()); + List blks = Lists.newArrayList(); + blks.add(new DeletedBlock(location.getBlockID(), SIZE_NOT_AVAILABLE, SIZE_NOT_AVAILABLE)); containerBlocks.put(location.getContainerID(), blks); } }); @@ -875,7 +881,7 @@ public void testIncrementalContainerReportQueue() throws Exception { private void addTransactions(StorageContainerManager scm, DeletedBlockLog delLog, - Map> containerBlocksMap) + Map> containerBlocksMap) throws IOException, TimeoutException { delLog.addTransactions(containerBlocksMap); scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); @@ -908,9 +914,9 @@ public List getAllBlocks(MiniOzoneCluster cluster, Long containerID) throw } public boolean verifyBlocksWithTxnTable(MiniOzoneCluster cluster, - Map> containerBlocks) + Map> containerBlocks) throws IOException { - for (Map.Entry> entry : containerBlocks.entrySet()) { + for (Map.Entry> entry : containerBlocks.entrySet()) { KeyValueContainerData cData = getContainerMetadata(cluster, entry.getKey()); try (DBHandle db = BlockUtils.getDB(cData, cluster.getConf())) { DatanodeStore ds = db.getStore(); @@ -925,7 +931,9 @@ public boolean verifyBlocksWithTxnTable(MiniOzoneCluster cluster, txnsInTxnTable) { conID.addAll(txn.getValue().getLocalIDList()); } - if (!conID.equals(containerBlocks.get(entry.getKey()))) { + List localIDList = containerBlocks.get(entry.getKey()).stream() + .map(b -> b.getBlockID().getLocalID()).collect(Collectors.toList()); + if (!conID.equals(localIDList)) { return false; } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmDataDistributionFinalization.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmDataDistributionFinalization.java new file mode 100644 index 000000000000..cad55e761577 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmDataDistributionFinalization.java @@ -0,0 +1,475 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.upgrade; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT; +import static org.apache.hadoop.hdds.client.ReplicationFactor.THREE; +import static org.apache.hadoop.hdds.client.ReplicationType.RATIS; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.CLOSED; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL; +import static org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.EMPTY_SUMMARY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.common.BlockGroup.SIZE_NOT_AVAILABLE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; +import org.apache.hadoop.hdds.scm.ScmConfig; +import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl; +import org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager; +import org.apache.hadoop.hdds.scm.ha.SCMHADBTransactionBuffer; +import org.apache.hadoop.hdds.scm.metadata.DBTransactionBuffer; +import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; +import org.apache.hadoop.hdds.scm.server.SCMConfigurator; +import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint; +import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizationContext; +import org.apache.hadoop.hdds.utils.db.CodecException; +import org.apache.hadoop.hdds.utils.db.RocksDatabaseException; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.UniformDatanodesFactory; +import org.apache.hadoop.ozone.client.BucketArgs; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.OzoneKeyDetails; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.common.DeletedBlock; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; +import org.apache.hadoop.ozone.upgrade.UpgradeFinalizationExecutor; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests upgrade finalization failure scenarios and corner cases specific to SCM data distribution feature. + */ +public class TestScmDataDistributionFinalization { + private static final String CLIENT_ID = UUID.randomUUID().toString(); + private static final Logger LOG = + LoggerFactory.getLogger(TestScmDataDistributionFinalization.class); + + private StorageContainerLocationProtocol scmClient; + private MiniOzoneHAClusterImpl cluster; + private static final int NUM_DATANODES = 3; + private static final int NUM_SCMS = 3; + private Future finalizationFuture; + private final String volumeName = UUID.randomUUID().toString(); + private final String bucketName = UUID.randomUUID().toString(); + private OzoneBucket bucket; + private static final long BLOCK_SIZE = 1024 * 1024; // 1 MB + private static final long BLOCKS_PER_TX = 5; // 1 MB + + public void init(OzoneConfiguration conf, + UpgradeFinalizationExecutor executor, boolean doFinalize) throws Exception { + + SCMConfigurator configurator = new SCMConfigurator(); + configurator.setUpgradeFinalizationExecutor(executor); + + conf.setInt(SCMStorageConfig.TESTING_INIT_LAYOUT_VERSION_KEY, HDDSLayoutFeature.HBASE_SUPPORT.layoutVersion()); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, + TimeUnit.MILLISECONDS); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, + TimeUnit.MILLISECONDS); + conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, + 100, TimeUnit.MILLISECONDS); + ScmConfig scmConfig = conf.getObject(ScmConfig.class); + scmConfig.setBlockDeletionInterval(Duration.ofMillis(100)); + conf.setFromObject(scmConfig); + conf.set(HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT, "0s"); + + DatanodeConfiguration dnConf = + conf.getObject(DatanodeConfiguration.class); + dnConf.setBlockDeletionInterval(Duration.ofMillis(100)); + conf.setFromObject(dnConf); + + MiniOzoneHAClusterImpl.Builder clusterBuilder = MiniOzoneCluster.newHABuilder(conf); + clusterBuilder.setNumOfStorageContainerManagers(NUM_SCMS) + .setNumOfActiveSCMs(NUM_SCMS) + .setSCMServiceId("scmservice") + .setOMServiceId("omServiceId") + .setNumOfOzoneManagers(1) + .setSCMConfigurator(configurator) + .setNumDatanodes(NUM_DATANODES) + .setDatanodeFactory(UniformDatanodesFactory.newBuilder() + .setLayoutVersion(HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()) + .build()); + this.cluster = clusterBuilder.build(); + + scmClient = cluster.getStorageContainerLocationClient(); + cluster.waitForClusterToBeReady(); + assertEquals(HDDSLayoutFeature.HBASE_SUPPORT.layoutVersion(), + cluster.getStorageContainerManager().getLayoutVersionManager().getMetadataLayoutVersion()); + + // Create Volume and Bucket + try (OzoneClient ozoneClient = OzoneClientFactory.getRpcClient(conf)) { + ObjectStore store = ozoneClient.getObjectStore(); + store.createVolume(volumeName); + OzoneVolume volume = store.getVolume(volumeName); + BucketArgs.Builder builder = BucketArgs.newBuilder(); + volume.createBucket(bucketName, builder.build()); + bucket = volume.getBucket(bucketName); + } + + // Launch finalization from the client. In the current implementation, + // this call will block until finalization completes. If the test + // involves restarts or leader changes the client may be disconnected, + // but finalization should still proceed. + if (doFinalize) { + finalizationFuture = Executors.newSingleThreadExecutor().submit( + () -> { + try { + scmClient.finalizeScmUpgrade(CLIENT_ID); + } catch (IOException ex) { + LOG.info("finalization client failed. This may be expected if the" + + " test injected failures.", ex); + } + }); + } + } + + @AfterEach + public void shutdown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + /** + * Test for an empty cluster. + */ + @Test + public void testFinalizationEmptyClusterDataDistribution() throws Exception { + init(new OzoneConfiguration(), null, true); + assertNull(cluster.getStorageContainerLocationClient().getDeletedBlockSummary()); + + finalizationFuture.get(); + TestHddsUpgradeUtils.waitForFinalizationFromClient(scmClient, CLIENT_ID); + // Make sure old leader has caught up and all SCMs have finalized. + waitForScmsToFinalize(cluster.getStorageContainerManagersList()); + assertEquals(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION.layoutVersion(), + cluster.getStorageContainerManager().getLayoutVersionManager().getMetadataLayoutVersion()); + + TestHddsUpgradeUtils.testPostUpgradeConditionsSCM( + cluster.getStorageContainerManagersList(), 0, NUM_DATANODES); + TestHddsUpgradeUtils.testPostUpgradeConditionsDataNodes( + cluster.getHddsDatanodes(), 0, CLOSED); + assertNotNull(cluster.getStorageContainerLocationClient().getDeletedBlockSummary()); + + for (StorageContainerManager scm: cluster.getStorageContainerManagersList()) { + DeletedBlockLogImpl deletedBlockLog = (DeletedBlockLogImpl) scm.getScmBlockManager().getDeletedBlockLog(); + SCMDeletedBlockTransactionStatusManager statusManager = + deletedBlockLog.getSCMDeletedBlockTransactionStatusManager(); + HddsProtos.DeletedBlocksTransactionSummary summary = statusManager.getTransactionSummary(); + assertEquals(EMPTY_SUMMARY, summary); + } + + long lastTxId = findLastTx(); + StorageContainerManager activeSCM = cluster.getActiveSCM(); + assertEquals(-1, lastTxId, "Last transaction ID should be -1"); + + // generate old format deletion tx, summary should keep empty, total DB tx 4 + int txCount = 4; + activeSCM.getScmBlockManager().getDeletedBlockLog().addTransactions(generateDeletedBlocks(txCount, false)); + flushDBTransactionBuffer(activeSCM); + ArrayList txIdList = getRowsInTable(activeSCM.getScmMetadataStore().getDeletedBlocksTXTable()); + assertEquals(txCount, txIdList.size()); + DeletedBlockLogImpl deletedBlockLog = (DeletedBlockLogImpl) activeSCM.getScmBlockManager().getDeletedBlockLog(); + SCMDeletedBlockTransactionStatusManager statusManager = + deletedBlockLog.getSCMDeletedBlockTransactionStatusManager(); + HddsProtos.DeletedBlocksTransactionSummary summary = statusManager.getTransactionSummary(); + assertEquals(EMPTY_SUMMARY, summary); + statusManager.removeTransactions(txIdList); + + // generate new deletion tx, summary should be updated, total DB tx 4 + lastTxId = findLastTx(); + activeSCM.getScmBlockManager().getDeletedBlockLog().addTransactions(generateDeletedBlocks(txCount, true)); + flushDBTransactionBuffer(activeSCM); + ArrayList txWithSizeList = getRowsInTable(activeSCM.getScmMetadataStore().getDeletedBlocksTXTable()); + assertEquals(txCount, txWithSizeList.size()); + + summary = statusManager.getTransactionSummary(); + assertEquals(lastTxId + 1, summary.getFirstTxID()); + assertEquals(txCount, summary.getTotalTransactionCount()); + assertEquals(txCount * BLOCKS_PER_TX, summary.getTotalBlockCount()); + assertEquals(txCount * BLOCKS_PER_TX * BLOCK_SIZE, summary.getTotalBlockSize()); + assertEquals(txCount * BLOCKS_PER_TX * BLOCK_SIZE * 3, summary.getTotalBlockReplicatedSize()); + + // delete first half of txs and verify summary, total DB tx 2 + txIdList = txWithSizeList.stream().limit(txCount / 2).collect(Collectors.toCollection(ArrayList::new)); + assertEquals(txCount / 2, txIdList.size()); + statusManager.removeTransactions(txIdList); + flushDBTransactionBuffer(activeSCM); + txWithSizeList = getRowsInTable(activeSCM.getScmMetadataStore().getDeletedBlocksTXTable()); + assertEquals(txCount / 2, txWithSizeList.size()); + summary = statusManager.getTransactionSummary(); + assertEquals(lastTxId + 1, summary.getFirstTxID()); + assertEquals(txCount / 2, summary.getTotalTransactionCount()); + assertEquals(txCount * BLOCKS_PER_TX / 2, summary.getTotalBlockCount()); + assertEquals(txCount * BLOCKS_PER_TX * BLOCK_SIZE / 2, summary.getTotalBlockSize()); + assertEquals(txCount * BLOCKS_PER_TX * BLOCK_SIZE * 3 / 2, summary.getTotalBlockReplicatedSize()); + + // generate old format deletion tx, summary should keep the same, total DB tx 6 + activeSCM.getScmBlockManager().getDeletedBlockLog().addTransactions(generateDeletedBlocks(txCount, false)); + flushDBTransactionBuffer(activeSCM); + txIdList = getRowsInTable(activeSCM.getScmMetadataStore().getDeletedBlocksTXTable()); + assertEquals(txCount + txCount / 2, txIdList.size()); + txIdList.removeAll(txWithSizeList); + ArrayList txWithoutSizeList = txIdList; + + summary = statusManager.getTransactionSummary(); + assertEquals(lastTxId + 1, summary.getFirstTxID()); + assertEquals(txCount / 2, summary.getTotalTransactionCount()); + assertEquals(txCount * BLOCKS_PER_TX / 2, summary.getTotalBlockCount()); + assertEquals(txCount * BLOCKS_PER_TX * BLOCK_SIZE / 2, summary.getTotalBlockSize()); + assertEquals(txCount * BLOCKS_PER_TX * BLOCK_SIZE * 3 / 2, summary.getTotalBlockReplicatedSize()); + + // delete old format deletion tx, summary should keep the same + statusManager.removeTransactions(txWithoutSizeList); + flushDBTransactionBuffer(activeSCM); + summary = statusManager.getTransactionSummary(); + assertEquals(lastTxId + 1, summary.getFirstTxID()); + assertEquals(txCount / 2, summary.getTotalTransactionCount()); + assertEquals(txCount * BLOCKS_PER_TX / 2, summary.getTotalBlockCount()); + assertEquals(txCount * BLOCKS_PER_TX * BLOCK_SIZE / 2, summary.getTotalBlockSize()); + assertEquals(txCount * BLOCKS_PER_TX * BLOCK_SIZE * 3 / 2, summary.getTotalBlockReplicatedSize()); + + // delete remaining txs, summary should become nearly empty + statusManager.removeTransactions(txWithSizeList); + flushDBTransactionBuffer(activeSCM); + summary = statusManager.getTransactionSummary(); + assertEquals(lastTxId + 1, summary.getFirstTxID()); + assertEquals(0, summary.getTotalTransactionCount()); + assertEquals(0, summary.getTotalBlockCount()); + assertEquals(0, summary.getTotalBlockSize()); + assertEquals(0, summary.getTotalBlockReplicatedSize()); + + // delete remaining txs twice, summary should keep the same + statusManager.removeTransactions(txWithSizeList); + flushDBTransactionBuffer(activeSCM); + summary = statusManager.getTransactionSummary(); + assertEquals(lastTxId + 1, summary.getFirstTxID()); + assertEquals(0, summary.getTotalTransactionCount()); + assertEquals(0, summary.getTotalBlockCount()); + assertEquals(0, summary.getTotalBlockSize()); + assertEquals(0, summary.getTotalBlockReplicatedSize()); + } + + /** + * Test for none empty cluster. + */ + @Test + public void testFinalizationNonEmptyClusterDataDistribution() throws Exception { + init(new OzoneConfiguration(), null, false); + // stop SCMBlockDeletingService + for (StorageContainerManager scm: cluster.getStorageContainerManagersList()) { + scm.getScmBlockManager().getSCMBlockDeletingService().stop(); + } + + // write some tx + int txCount = 2; + StorageContainerManager activeSCM = cluster.getActiveSCM(); + activeSCM.getScmBlockManager().getDeletedBlockLog().addTransactions(generateDeletedBlocks(txCount, false)); + flushDBTransactionBuffer(activeSCM); + assertNull(cluster.getStorageContainerLocationClient().getDeletedBlockSummary()); + + finalizationFuture = Executors.newSingleThreadExecutor().submit( + () -> { + try { + scmClient.finalizeScmUpgrade(CLIENT_ID); + } catch (IOException ex) { + LOG.info("finalization client failed. This may be expected if the" + + " test injected failures.", ex); + } + }); + finalizationFuture.get(); + TestHddsUpgradeUtils.waitForFinalizationFromClient(scmClient, CLIENT_ID); + // Make sure old leader has caught up and all SCMs have finalized. + waitForScmsToFinalize(cluster.getStorageContainerManagersList()); + assertEquals(HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION.layoutVersion(), + cluster.getStorageContainerManager().getLayoutVersionManager().getMetadataLayoutVersion()); + + TestHddsUpgradeUtils.testPostUpgradeConditionsSCM( + cluster.getStorageContainerManagersList(), 0, NUM_DATANODES); + TestHddsUpgradeUtils.testPostUpgradeConditionsDataNodes( + cluster.getHddsDatanodes(), 0, CLOSED); + assertNotNull(cluster.getStorageContainerLocationClient().getDeletedBlockSummary()); + + for (StorageContainerManager scm: cluster.getStorageContainerManagersList()) { + DeletedBlockLogImpl deletedBlockLog = (DeletedBlockLogImpl) scm.getScmBlockManager().getDeletedBlockLog(); + SCMDeletedBlockTransactionStatusManager statusManager = + deletedBlockLog.getSCMDeletedBlockTransactionStatusManager(); + HddsProtos.DeletedBlocksTransactionSummary summary = statusManager.getTransactionSummary(); + assertEquals(EMPTY_SUMMARY, summary); + } + + long lastTxId = findLastTx(); + assertNotEquals(-1, lastTxId, "Last transaction ID should not be -1"); + + final String keyName = "key" + System.nanoTime(); + // Create the key + String value = "sample value"; + TestDataUtil.createKey(bucket, keyName, ReplicationConfig.fromTypeAndFactor(RATIS, THREE), value.getBytes(UTF_8)); + // update scmInfo in OM + OzoneKeyDetails keyDetails = bucket.getKey(keyName); + // delete the key + bucket.deleteKey(keyName); + + DeletedBlockLogImpl deletedBlockLog = (DeletedBlockLogImpl) activeSCM.getScmBlockManager().getDeletedBlockLog(); + SCMDeletedBlockTransactionStatusManager statusManager = + deletedBlockLog.getSCMDeletedBlockTransactionStatusManager(); + GenericTestUtils.waitFor( + () -> !EMPTY_SUMMARY.equals(statusManager.getTransactionSummary()), 100, 5000); + HddsProtos.DeletedBlocksTransactionSummary summary = statusManager.getTransactionSummary(); + assertEquals(lastTxId + 1, summary.getFirstTxID()); + assertEquals(1, summary.getTotalTransactionCount()); + assertEquals(1, summary.getTotalBlockCount()); + assertEquals(value.getBytes(UTF_8).length, summary.getTotalBlockSize()); + assertEquals(value.getBytes(UTF_8).length * 3, summary.getTotalBlockReplicatedSize()); + + // force close the container so that block can be deleted + activeSCM.getClientProtocolServer().closeContainer( + keyDetails.getOzoneKeyLocations().get(0).getContainerID()); + // wait for container to be closed + GenericTestUtils.waitFor(() -> { + try { + return activeSCM.getClientProtocolServer().getContainer( + keyDetails.getOzoneKeyLocations().get(0).getContainerID()) + .getState() == HddsProtos.LifeCycleState.CLOSED; + } catch (IOException e) { + fail("Error while checking container state", e); + return false; + } + }, 100, 5000); + + // flush buffer and start SCMBlockDeletingService + for (StorageContainerManager scm: cluster.getStorageContainerManagersList()) { + flushDBTransactionBuffer(scm); + scm.getScmBlockManager().getSCMBlockDeletingService().start(); + } + + // wait for block deletion transactions to be confirmed by DN + GenericTestUtils.waitFor( + () -> statusManager.getTransactionSummary().getTotalTransactionCount() == 0, 100, 10000); + } + + private Map> generateDeletedBlocks(int dataSize, boolean withSize) { + Map> blockMap = new HashMap<>(); + int continerIDBase = RandomUtils.secure().randomInt(0, 100); + int localIDBase = RandomUtils.secure().randomInt(0, 1000); + for (int i = 0; i < dataSize; i++) { + long containerID = continerIDBase + i; + List blocks = new ArrayList<>(); + for (int j = 0; j < BLOCKS_PER_TX; j++) { + long localID = localIDBase + j; + if (withSize) { + blocks.add(new DeletedBlock(new BlockID(containerID, localID), BLOCK_SIZE, BLOCK_SIZE * 3)); + } else { + blocks.add(new DeletedBlock(new BlockID(containerID, localID), SIZE_NOT_AVAILABLE, SIZE_NOT_AVAILABLE)); + } + } + blockMap.put(containerID, blocks); + } + return blockMap; + } + + private long findLastTx() throws RocksDatabaseException, CodecException { + StorageContainerManager activeSCM = cluster.getActiveSCM(); + long lastTxId = -1; + try (Table.KeyValueIterator iter = + activeSCM.getScmMetadataStore().getDeletedBlocksTXTable().iterator()) { + while (iter.hasNext()) { + Table.KeyValue entry = iter.next(); + if (lastTxId < entry.getKey()) { + lastTxId = entry.getKey(); + } + } + } + return lastTxId; + } + + private void waitForScmsToFinalize(Collection scms) + throws Exception { + for (StorageContainerManager scm: scms) { + waitForScmToFinalize(scm); + } + } + + private void waitForScmToFinalize(StorageContainerManager scm) + throws Exception { + GenericTestUtils.waitFor(() -> !scm.isInSafeMode(), 500, 5000); + GenericTestUtils.waitFor(() -> { + FinalizationCheckpoint checkpoint = + scm.getScmContext().getFinalizationCheckpoint(); + LOG.info("Waiting for SCM {} (leader? {}) to finalize. Current " + + "finalization checkpoint is {}", + scm.getSCMNodeId(), scm.checkLeader(), checkpoint); + return checkpoint.hasCrossed( + FinalizationCheckpoint.FINALIZATION_COMPLETE); + }, 2_000, 60_000); + } + + private void flushDBTransactionBuffer(StorageContainerManager scm) throws IOException { + DBTransactionBuffer dbTxBuffer = scm.getScmHAManager().getDBTransactionBuffer(); + if (dbTxBuffer instanceof SCMHADBTransactionBuffer) { + SCMHADBTransactionBuffer buffer = (SCMHADBTransactionBuffer) dbTxBuffer; + buffer.flush(); + } + } + + private ArrayList getRowsInTable(Table table) + throws IOException { + ArrayList txIdList = new ArrayList<>(); + if (table != null) { + try (Table.KeyValueIterator keyValueTableIterator = table.iterator()) { + while (keyValueTableIterator.hasNext()) { + txIdList.add(keyValueTableIterator.next().getKey()); + } + } + } + return txIdList; + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestOMSnapshotDAG.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestOMSnapshotDAG.java index 5429dc0f4a12..9f69ed51b7ca 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestOMSnapshotDAG.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/freon/TestOMSnapshotDAG.java @@ -130,7 +130,7 @@ public static void shutdown() { } private String getDBCheckpointAbsolutePath(SnapshotInfo snapshotInfo) { - return OmSnapshotManager.getSnapshotPath(conf, snapshotInfo); + return OmSnapshotManager.getSnapshotPath(conf, snapshotInfo, 0); } private static String getSnapshotDBKey(String volumeName, String bucketName, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java index 3d542785e113..d0b38116d5fa 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java @@ -733,7 +733,7 @@ private String createSnapshot(String vname, String bname) writeClient.createSnapshot(vname, bname, snapshotName); SnapshotInfo snapshotInfo = om.getMetadataManager().getSnapshotInfoTable() .get(SnapshotInfo.getTableKey(vname, bname, snapshotName)); - String snapshotPath = getSnapshotPath(conf, snapshotInfo) + String snapshotPath = getSnapshotPath(conf, snapshotInfo, 0) + OM_KEY_PREFIX; GenericTestUtils.waitFor(() -> new File(snapshotPath).exists(), 100, 30000); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java index 0f5c8bae4b46..a6ae3eaab21f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConsts.OM_CHECKPOINT_DIR; import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; @@ -35,6 +36,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyBoolean; import static org.mockito.Mockito.doCallRealMethod; @@ -146,6 +149,7 @@ void init() throws Exception { // ensure cache entries are not evicted thereby snapshot db's are not closed conf.setTimeDuration(OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL, 100, TimeUnit.MINUTES); + conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); } @AfterEach @@ -228,12 +232,15 @@ public void write(int b) throws IOException { .thenReturn(lock); doCallRealMethod().when(omDbCheckpointServletMock).getCheckpoint(any(), anyBoolean()); assertNull(doCallRealMethod().when(omDbCheckpointServletMock).getBootstrapTempData()); - doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirs(any()); doCallRealMethod().when(omDbCheckpointServletMock). processMetadataSnapshotRequest(any(), any(), anyBoolean(), anyBoolean()); doCallRealMethod().when(omDbCheckpointServletMock).writeDbDataToStream(any(), any(), any(), any()); doCallRealMethod().when(omDbCheckpointServletMock).getCompactionLogDir(); doCallRealMethod().when(omDbCheckpointServletMock).getSstBackupDir(); + doCallRealMethod().when(omDbCheckpointServletMock) + .transferSnapshotData(anySet(), any(), anySet(), any(), any(), anyMap()); + doCallRealMethod().when(omDbCheckpointServletMock).createAndPrepareCheckpoint(anyBoolean()); + doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirsFromDB(any(), any(), any()); } @ParameterizedTest @@ -586,6 +593,99 @@ public void testBootstrapLockBlocksMultipleServices() throws Exception { assertTrue(servicesSucceeded.get() > 0, "Services should have succeeded after lock release"); } + /** + * Tests the full checkpoint servlet flow to ensure snapshot paths are read + * from checkpoint metadata (frozen state) rather than live OM metadata (current state). + * Scenario: + * 1. Create snapshots S1 + * 2. create snapshot S2 later just before checkpoint + * 3. Servlet processes checkpoint - should still include S1, S3 data as + * checkpoint snapshotInfoTable has S1 S3 + */ + @Test + public void testCheckpointIncludesSnapshotsFromFrozenState() throws Exception { + String volumeName = "vol" + RandomStringUtils.secure().nextNumeric(5); + String bucketName = "buck" + RandomStringUtils.secure().nextNumeric(5); + + setupCluster(); + om.getKeyManager().getSnapshotSstFilteringService().pause(); + + // Create test data and snapshots + OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(client, volumeName, bucketName); + + // Create key before first snapshot + TestDataUtil.createKey(bucket, "key1", + ReplicationConfig.fromTypeAndFactor(ReplicationType.RATIS, ReplicationFactor.ONE), + "data1".getBytes(StandardCharsets.UTF_8)); + client.getObjectStore().createSnapshot(volumeName, bucketName, "snapshot1"); + // At this point: Live OM has snapshots S1 + List snapshots = new ArrayList<>(); + client.getObjectStore().listSnapshot(volumeName, bucketName, "", null) + .forEachRemaining(snapshots::add); + assertEquals(1, snapshots.size(), "Should have 1 snapshot initially"); + OzoneSnapshot snapshot1 = snapshots.stream() + .filter(snap -> snap.getName().equals("snapshot1")) + .findFirst() + .orElseThrow(() -> new RuntimeException("snapshot1 not found")); + + // Setup servlet mocks for checkpoint processing + setupMocks(); + when(requestMock.getParameter(OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA)).thenReturn("true"); + + // Create a checkpoint that captures current state (S1) + DBStore dbStore = om.getMetadataManager().getStore(); + DBStore spyDbStore = spy(dbStore); + AtomicReference capturedCheckpoint = new AtomicReference<>(); + + when(spyDbStore.getCheckpoint(true)).thenAnswer(invocation -> { + // Purge snapshot2 before checkpoint + // create snapshot 3 before checkpoint + client.getObjectStore().createSnapshot(volumeName, bucketName, "snapshot2"); + // Also wait for double buffer to flush to ensure all transactions are committed + om.awaitDoubleBufferFlush(); + DBCheckpoint checkpoint = spy(dbStore.getCheckpoint(true)); + doNothing().when(checkpoint).cleanupCheckpoint(); // Don't cleanup for verification + capturedCheckpoint.set(checkpoint); + return checkpoint; + }); + + // Initialize servlet + doCallRealMethod().when(omDbCheckpointServletMock).initialize(any(), any(), + eq(false), any(), any(), eq(false)); + omDbCheckpointServletMock.initialize(spyDbStore, om.getMetrics().getDBCheckpointMetrics(), + false, om.getOmAdminUsernames(), om.getOmAdminGroups(), false); + when(responseMock.getOutputStream()).thenReturn(servletOutputStream); + // Process checkpoint servlet + omDbCheckpointServletMock.doGet(requestMock, responseMock); + snapshots.clear(); + client.getObjectStore().listSnapshot(volumeName, bucketName, "", null) + .forEachRemaining(snapshots::add); + assertEquals(2, snapshots.size(), "Should have 2 snapshots"); + OzoneSnapshot snapshot2 = snapshots.stream() + .filter(snap -> snap.getName().equals("snapshot2")) + .findFirst() + .orElseThrow(() -> new RuntimeException("snapshot2 not found")); + // Extract tarball and verify contents + String testDirName = folder.resolve("testDir").toString(); + String newDbDirName = testDirName + OM_KEY_PREFIX + OM_DB_NAME; + File newDbDir = new File(newDbDirName); + assertTrue(newDbDir.mkdirs()); + FileUtil.unTar(tempFile, newDbDir); + OmSnapshotUtils.createHardLinks(newDbDir.toPath(), true); + Path snapshot1DbDir = Paths.get(newDbDir.toPath().toString(), OM_SNAPSHOT_CHECKPOINT_DIR, + OM_DB_NAME + "-" + snapshot1.getSnapshotId()); + Path snapshot2DbDir = Paths.get(newDbDir.toPath().toString(), OM_SNAPSHOT_CHECKPOINT_DIR, + OM_DB_NAME + "-" + snapshot2.getSnapshotId()); + boolean snapshot1IncludedInCheckpoint = Files.exists(snapshot1DbDir); + boolean snapshot2IncludedInCheckpoint = Files.exists(snapshot2DbDir); + assertTrue(snapshot1IncludedInCheckpoint && snapshot2IncludedInCheckpoint, + "Checkpoint should include both snapshot1 and snapshot2 data"); + // Cleanup + if (capturedCheckpoint.get() != null) { + capturedCheckpoint.get().cleanupCheckpoint(); + } + } + private static void deleteWalFiles(Path snapshotDbDir) throws IOException { try (Stream filesInTarball = Files.list(snapshotDbDir)) { List files = filesInTarball.filter(p -> p.toString().contains(".log")) @@ -648,6 +748,7 @@ private void setupClusterAndMocks(String volumeName, String bucketName, // Init the mock with the spyDbstore doCallRealMethod().when(omDbCheckpointServletMock).initialize(any(), any(), eq(false), any(), any(), eq(false)); + doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirsFromDB(any(), any(), any()); omDbCheckpointServletMock.initialize(spyDbStore, om.getMetrics().getDBCheckpointMetrics(), false, om.getOmAdminUsernames(), om.getOmAdminGroups(), false); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMHALeaderSpecificACLEnforcement.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMHALeaderSpecificACLEnforcement.java index 4e49a92a9c91..2c50aa9ddce5 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMHALeaderSpecificACLEnforcement.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMHALeaderSpecificACLEnforcement.java @@ -17,11 +17,13 @@ package org.apache.hadoop.ozone.om; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_AUTHORIZER_CLASS; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.PERMISSION_DENIED; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -31,6 +33,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Locale; +import java.util.concurrent.TimeoutException; import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.IOUtils; @@ -41,8 +44,10 @@ import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.OzoneKey; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.VolumeArgs; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer; import org.apache.hadoop.ozone.security.acl.OzoneNativeAuthorizer; @@ -50,6 +55,7 @@ import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @@ -76,16 +82,20 @@ public class TestOMHALeaderSpecificACLEnforcement { private MiniOzoneHAClusterImpl cluster; private OzoneClient client; private UserGroupInformation testUserUgi; + private UserGroupInformation adminUserUgi; + private OzoneManager theLeaderOM; @BeforeAll public void init() throws Exception { // Create test user testUserUgi = UserGroupInformation.createUserForTesting(TEST_USER, new String[]{"testgroup"}); + adminUserUgi = UserGroupInformation.getCurrentUser(); // Set up and start the cluster setupCluster(); // Create admin volume that will be used for bucket permission testing + theLeaderOM = cluster.getOMLeader(); createAdminVolume(); } @@ -97,6 +107,22 @@ public void shutdown() { } } + @BeforeEach + public void restoreLeadership() throws IOException, InterruptedException, TimeoutException { + OzoneManager currentLeader = cluster.getOMLeader(); + if (!currentLeader.getOMNodeId().equals(theLeaderOM.getOMNodeId())) { + currentLeader.transferLeadership(theLeaderOM.getOMNodeId()); + GenericTestUtils.waitFor(() -> { + try { + OzoneManager currentLeaderCheck = cluster.getOMLeader(); + return !currentLeaderCheck.getOMNodeId().equals(currentLeader.getOMNodeId()); + } catch (Exception e) { + return false; + } + }, 1000, 30000); + } + } + /** * Main test method that validates leader-specific ACL enforcement in OM HA. * 1. Creates a mini cluster with OM HA @@ -165,7 +191,7 @@ private OzoneConfiguration createBaseConfiguration() throws IOException { conf.setBoolean(OZONE_ACL_ENABLED, true); // Set current user as initial admin (needed for cluster setup) - String currentUser = UserGroupInformation.getCurrentUser().getShortUserName(); + String currentUser = adminUserUgi.getShortUserName(); conf.set(OZONE_ADMINISTRATORS, currentUser); return conf; @@ -181,7 +207,7 @@ private void createAdminVolume() throws Exception { // Create volume as admin user VolumeArgs volumeArgs = VolumeArgs.newBuilder() - .setOwner(UserGroupInformation.getCurrentUser().getShortUserName()) + .setOwner(adminUserUgi.getShortUserName()) .build(); adminObjectStore.createVolume(ADMIN_VOLUME, volumeArgs); @@ -260,7 +286,90 @@ private void testVolumeAndBucketCreationAsUser(boolean shouldSucceed) throws Exc } } finally { // Reset to original user - UserGroupInformation.setLoginUser(UserGroupInformation.getCurrentUser()); + UserGroupInformation.setLoginUser(adminUserUgi); + } + } + + /** + * Tests that setTimes ACL check is enforced in preExecute and is leader-specific. + * 1. Creates a key with admin user + * 2. Adds test user as admin on the current leader + * 3. Verifies that test user (as admin) can setTimes on key owned by someone else + * 4. Transfers leadership to another node + * 5. Verifies that setTimes fails with PERMISSION_DENIED when test user is no longer admin + */ + @Test + public void testKeySetTimesAclEnforcementAfterLeadershipChange() throws Exception { + // Step 1: Create a volume, bucket, and key as the admin user + ObjectStore adminObjectStore = client.getObjectStore(); + String keyTestVolume = "keyvol-" + + RandomStringUtils.secure().nextAlphabetic(5).toLowerCase(Locale.ROOT); + String keyTestBucket = "keybucket-" + + RandomStringUtils.secure().nextAlphabetic(5).toLowerCase(Locale.ROOT); + String keyName = "testkey-" + + RandomStringUtils.secure().nextAlphabetic(5).toLowerCase(Locale.ROOT); + + String adminUser = adminUserUgi.getShortUserName(); + VolumeArgs volumeArgs = VolumeArgs.newBuilder() + .setOwner(adminUser) + .build(); + adminObjectStore.createVolume(keyTestVolume, volumeArgs); + OzoneVolume adminVolume = adminObjectStore.getVolume(keyTestVolume); + + BucketArgs bucketArgs = BucketArgs.newBuilder().build(); + adminVolume.createBucket(keyTestBucket, bucketArgs); + OzoneBucket adminBucket = adminVolume.getBucket(keyTestBucket); + + // Create a key as admin (so test user is NOT the owner) + try (OzoneOutputStream out = adminBucket.createKey(keyName, 0)) { + out.write("test data".getBytes(UTF_8)); + } + + OzoneKey key = adminBucket.getKey(keyName); + assertNotNull(key, "Key should be created successfully"); + long originalMtime = key.getModificationTime().toEpochMilli(); + + // Step 2: Get the current leader and add test user as admin + OzoneManager currentLeader = cluster.getOMLeader(); + String leaderNodeId = currentLeader.getOMNodeId(); + addAdminToSpecificOM(currentLeader, TEST_USER); + + // Verify admin was added + assertTrue(currentLeader.getOmAdminUsernames().contains(TEST_USER), + "Test user should be admin on leader OM"); + + // Switch to test user and try setTimes as admin (should succeed) + UserGroupInformation.setLoginUser(testUserUgi); + try (OzoneClient userClient = OzoneClientFactory.getRpcClient(OM_SERVICE_ID, cluster.getConf())) { + ObjectStore userObjectStore = userClient.getObjectStore(); + OzoneVolume userVolume = userObjectStore.getVolume(keyTestVolume); + OzoneBucket userBucket = userVolume.getBucket(keyTestBucket); + + long newMtime = System.currentTimeMillis(); + userBucket.setTimes(keyName, newMtime, -1); + + // Verify the modification time was updated + key = userBucket.getKey(keyName); + assertEquals(newMtime, key.getModificationTime().toEpochMilli(), + "Modification time should be updated by admin user"); + assertNotEquals(originalMtime, key.getModificationTime().toEpochMilli(), + "Modification time should have changed"); + + OzoneManager newLeader = transferLeadershipToAnotherNode(currentLeader); + assertNotEquals(leaderNodeId, newLeader.getOMNodeId(), + "Leadership should have transferred to a different node"); + assertFalse(newLeader.getOmAdminUsernames().contains(TEST_USER), + "Test user should NOT be admin on new leader OM"); + + long anotherMtime = System.currentTimeMillis() + 10000; + OMException exception = assertThrows(OMException.class, () -> { + userBucket.setTimes(keyName, anotherMtime, -1); + }, "setTimes should fail for non-admin user on new leader"); + assertEquals(PERMISSION_DENIED, exception.getResult(), + "Should get PERMISSION_DENIED when ACL check fails in preExecute"); + } finally { + // Reset to original user + UserGroupInformation.setLoginUser(adminUserUgi); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index a1de8fc377a0..3609703c7ef6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -337,11 +337,11 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, File followerMetaDir = OMStorage.getOmDbDir(followerOM.getConfiguration()); Path followerActiveDir = Paths.get(followerMetaDir.toString(), OM_DB_NAME); Path followerSnapshotDir = - Paths.get(getSnapshotPath(followerOM.getConfiguration(), snapshotInfo)); + Paths.get(getSnapshotPath(followerOM.getConfiguration(), snapshotInfo, 0)); File leaderMetaDir = OMStorage.getOmDbDir(leaderOM.getConfiguration()); Path leaderActiveDir = Paths.get(leaderMetaDir.toString(), OM_DB_NAME); Path leaderSnapshotDir = - Paths.get(getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo)); + Paths.get(getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo, 0)); // Get list of live files on the leader. RocksDB activeRocksDB = ((RDBStore) leaderOM.getMetadataManager().getStore()) @@ -1056,7 +1056,7 @@ private SnapshotInfo createOzoneSnapshot(OzoneManager leaderOM, String name) .get(tableKey); // Allow the snapshot to be written to disk String fileName = - getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo); + getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo, 0); File snapshotDir = new File(fileName); if (!RDBCheckpointUtils .waitForCheckpointDirectoryExist(snapshotDir)) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java index e508e585201b..01e8463f6a9b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java @@ -194,6 +194,21 @@ public void testReadKeyPermissionDenied() throws Exception { verifyAuditLog(OMAction.READ_KEY, AuditEventStatus.FAILURE); } + @Test + public void testGetFileStatusPermissionDenied() throws Exception { + OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(client); + TestDataUtil.createKey(bucket, "testKey", "testcontent".getBytes(StandardCharsets.UTF_8)); + + authorizer.keyAclAllow = false; + OMException exception = assertThrows(OMException.class, + () -> bucket.getFileStatus("testKey")); + + assertEquals(ResultCodes.PERMISSION_DENIED, exception.getResult()); + assertThat(logCapturer.getOutput()).contains("doesn't have READ " + + "permission to access key"); + verifyAuditLog(OMAction.GET_FILE_STATUS, AuditEventStatus.FAILURE); + } + @Test public void testSetACLPermissionDenied() throws Exception { OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(client); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestBlockDeletionService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestBlockDeletionService.java new file mode 100644 index 000000000000..c6b3b6560eaa --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestBlockDeletionService.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.service; + +import static org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature.HBASE_SUPPORT; +import static org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature.STORAGE_DATA_DISTRIBUTION; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.common.BlockGroup.SIZE_NOT_AVAILABLE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.params.provider.Arguments.arguments; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.timeout; +import static org.mockito.Mockito.verify; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.HashMap; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; +import org.apache.hadoop.hdds.client.ECReplicationConfig; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.client.ReplicationFactor; +import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.block.BlockManager; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMPerformanceMetrics; +import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; +import org.apache.hadoop.hdds.scm.server.SCMConfigurator; +import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizationContext; +import org.apache.hadoop.hdds.upgrade.TestHddsUpgradeUtils; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.UniformDatanodesFactory; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.common.BlockGroup; +import org.apache.hadoop.ozone.common.DeletedBlock; +import org.apache.hadoop.ozone.om.helpers.QuotaUtil; +import org.apache.hadoop.ozone.upgrade.InjectedUpgradeFinalizationExecutor; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.ArgumentCaptor; + +/** + * DeletionService test to Pass Usage from OM to SCM. + */ +public class TestBlockDeletionService { + private static final String CLIENT_ID = UUID.randomUUID().toString(); + private static final String VOLUME_NAME = "vol1"; + private static final String BUCKET_NAME = "bucket1"; + private static final int KEY_SIZE = 5 * 1024; // 5 KB + private static MiniOzoneCluster cluster; + private static StorageContainerLocationProtocol scmClient; + private static OzoneBucket bucket; + private static SCMPerformanceMetrics metrics; + private static InjectedUpgradeFinalizationExecutor scmFinalizationExecutor; + + public static Stream replicationConfigProvider() { + return Stream.of( + arguments(RatisReplicationConfig.getInstance(ReplicationFactor.ONE.toProto())), + arguments(RatisReplicationConfig.getInstance(ReplicationFactor.THREE.toProto())), + arguments(new ECReplicationConfig(3, 2, ECReplicationConfig.EcCodec.RS, 2 * 1024 * 1024)), + arguments(new ECReplicationConfig(6, 3, ECReplicationConfig.EcCodec.RS, 2 * 1024 * 1024)), + arguments(StandaloneReplicationConfig.getInstance(ReplicationFactor.ONE.toProto())), + arguments(StandaloneReplicationConfig.getInstance(ReplicationFactor.THREE.toProto())) + ); + } + + @BeforeAll + public static void init() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 500, TimeUnit.MILLISECONDS); + conf.setInt(SCMStorageConfig.TESTING_INIT_LAYOUT_VERSION_KEY, HBASE_SUPPORT.layoutVersion()); + + scmFinalizationExecutor = new InjectedUpgradeFinalizationExecutor<>(); + SCMConfigurator configurator = new SCMConfigurator(); + configurator.setUpgradeFinalizationExecutor(scmFinalizationExecutor); + + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(9) + .setSCMConfigurator(configurator) + .setDatanodeFactory(UniformDatanodesFactory.newBuilder() + .setLayoutVersion(HBASE_SUPPORT.layoutVersion()).build()) + .build(); + cluster.waitForClusterToBeReady(); + scmClient = cluster.getStorageContainerLocationClient(); + assertEquals(HBASE_SUPPORT.ordinal(), + cluster.getStorageContainerManager().getLayoutVersionManager().getMetadataLayoutVersion()); + metrics = cluster.getStorageContainerManager().getBlockProtocolServer().getMetrics(); + + OzoneClient ozoneClient = cluster.newClient(); + // create a volume and a bucket to be used by OzoneFileSystem + ozoneClient.getObjectStore().createVolume(VOLUME_NAME); + ozoneClient.getObjectStore().getVolume(VOLUME_NAME).createBucket(BUCKET_NAME); + bucket = ozoneClient.getObjectStore().getVolume(VOLUME_NAME).getBucket(BUCKET_NAME); + } + + @AfterAll + public static void teardown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testDeleteKeyQuotaWithUpgrade() throws Exception { + long initialSuccessBlocks = metrics.getDeleteKeySuccessBlocks(); + long initialFailedBlocks = metrics.getDeleteKeyFailedBlocks(); + + ReplicationConfig replicationConfig = RatisReplicationConfig.getInstance(ReplicationFactor.THREE.toProto()); + // PRE-UPGRADE + // Step 1: write a key + String keyName = UUID.randomUUID().toString(); + createKey(keyName, replicationConfig); + // Step 2: Spy on BlockManager and inject it into SCM + BlockManager spyManagerBefore = injectSpyBlockManager(cluster); + ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); + // Step 3: Delete the key (which triggers deleteBlocks call) + bucket.deleteKey(keyName); + // Step 4: Verify deleteBlocks call and capture argument + verify(spyManagerBefore, timeout(50000).atLeastOnce()).deleteBlocks(captor.capture()); + verifyAndAssertQuota(replicationConfig, captor, false); + GenericTestUtils.waitFor(() -> metrics.getDeleteKeySuccessBlocks() - initialSuccessBlocks == 1, 50, 1000); + GenericTestUtils.waitFor(() -> metrics.getDeleteKeyFailedBlocks() - initialFailedBlocks == 0, 50, 1000); + + // UPGRADE SCM (if specified) + // Step 5: wait for finalizing upgrade + Future finalizationFuture = Executors.newSingleThreadExecutor().submit(() -> { + try { + scmClient.finalizeScmUpgrade(CLIENT_ID); + } catch (IOException ex) { + fail("finalization client failed", ex); + } + }); + finalizationFuture.get(); + TestHddsUpgradeUtils.waitForFinalizationFromClient(scmClient, CLIENT_ID); + assertEquals(STORAGE_DATA_DISTRIBUTION.ordinal(), + cluster.getStorageContainerManager().getLayoutVersionManager().getMetadataLayoutVersion()); + + // POST-UPGRADE + //Step 6: Repeat the same steps in pre-upgrade + keyName = UUID.randomUUID().toString(); + createKey(keyName, replicationConfig); + BlockManager spyManagerAfter = injectSpyBlockManager(cluster); + bucket.deleteKey(keyName); + verify(spyManagerAfter, timeout(50000).atLeastOnce()).deleteBlocks(captor.capture()); + verifyAndAssertQuota(replicationConfig, captor, true); + GenericTestUtils.waitFor(() -> metrics.getDeleteKeySuccessBlocks() - initialSuccessBlocks == 2, 50, 1000); + GenericTestUtils.waitFor(() -> metrics.getDeleteKeyFailedBlocks() - initialFailedBlocks == 0, 50, 1000); + } + + @ParameterizedTest + @MethodSource("replicationConfigProvider") + public void testDeleteKeyQuotaWithDifferentReplicationTypes(ReplicationConfig replicationConfig) throws Exception { + long initialSuccessBlocks = metrics.getDeleteKeySuccessBlocks(); + long initialFailedBlocks = metrics.getDeleteKeyFailedBlocks(); + + // Step 1: write a key + String keyName = UUID.randomUUID().toString(); + createKey(keyName, replicationConfig); + // Step 2: Spy on BlockManager and inject it into SCM + BlockManager spyManagerBefore = injectSpyBlockManager(cluster); + ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); + // Step 3: Delete the key (which triggers deleteBlocks call) + bucket.deleteKey(keyName); + // Step 4: Verify deleteBlocks call and capture argument + verify(spyManagerBefore, timeout(50000).atLeastOnce()).deleteBlocks(captor.capture()); + verifyAndAssertQuota(replicationConfig, captor, true); + GenericTestUtils.waitFor(() -> metrics.getDeleteKeySuccessBlocks() - initialSuccessBlocks == 1, 50, 1000); + GenericTestUtils.waitFor(() -> metrics.getDeleteKeyFailedBlocks() - initialFailedBlocks == 0, 50, 1000); + } + + private void createKey(String keyName, ReplicationConfig replicationConfig) throws IOException { + byte[] data = new byte[KEY_SIZE]; + try (OzoneOutputStream out = bucket.createKey(keyName, KEY_SIZE, + replicationConfig, new HashMap<>())) { + out.write(data); + } + } + + private BlockManager injectSpyBlockManager(MiniOzoneCluster miniOzoneCluster) throws Exception { + StorageContainerManager scm = miniOzoneCluster.getStorageContainerManager(); + BlockManager realManager = scm.getScmBlockManager(); + BlockManager spyManager = spy(realManager); + + Field field = scm.getClass().getDeclaredField("scmBlockManager"); + field.setAccessible(true); + field.set(scm, spyManager); + return spyManager; + } + + private void verifyAndAssertQuota(ReplicationConfig replicationConfig, + ArgumentCaptor> captor, + boolean isIncludeBlockSize) throws IOException { + int index = captor.getAllValues().size() - 1; + List blockGroups = captor.getAllValues().get(index); + + long totalUsedBytes = blockGroups.stream() + .flatMap(group -> group.getDeletedBlocks().stream()) + .mapToLong(DeletedBlock::getReplicatedSize).sum(); + + long totalUnreplicatedBytes = blockGroups.stream() + .flatMap(group -> group.getDeletedBlocks().stream()) + .mapToLong(DeletedBlock::getSize).sum(); + + assertEquals(1, blockGroups.get(0).getDeletedBlocks().size()); + assertEquals(isIncludeBlockSize ? + QuotaUtil.getReplicatedSize(KEY_SIZE, replicationConfig) : SIZE_NOT_AVAILABLE, totalUsedBytes); + assertEquals(isIncludeBlockSize ? KEY_SIZE : SIZE_NOT_AVAILABLE, totalUnreplicatedBytes); + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshot.java index 93dba945d46d..19b237fe2600 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshot.java @@ -1990,7 +1990,7 @@ private String createSnapshot(String volName, String buckName, .get(SnapshotInfo.getTableKey(volName, linkedBuckets.getOrDefault(buckName, buckName), snapshotName)); String snapshotDirName = OmSnapshotManager.getSnapshotPath(ozoneManager.getConfiguration(), - snapshotInfo) + OM_KEY_PREFIX + "CURRENT"; + snapshotInfo, 0) + OM_KEY_PREFIX + "CURRENT"; GenericTestUtils .waitFor(() -> new File(snapshotDirName).exists(), 1000, 120000); return snapshotKeyPrefix; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotFileSystem.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotFileSystem.java index fca8b137b720..964513702a08 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotFileSystem.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotFileSystem.java @@ -709,7 +709,7 @@ private String createSnapshot(String snapshotName) SnapshotInfo snapshotInfo = ozoneManager.getMetadataManager() .getSnapshotInfoTable() .get(SnapshotInfo.getTableKey(snapshot.getVolumeName(), snapshot.getBucketName(), snapshotName)); - String snapshotDirName = getSnapshotPath(conf, snapshotInfo) + + String snapshotDirName = getSnapshotPath(conf, snapshotInfo, 0) + OM_KEY_PREFIX + "CURRENT"; GenericTestUtils.waitFor(() -> new File(snapshotDirName).exists(), 1000, 120000); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java index bae852ae3368..b6008ab3d2e2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java @@ -410,7 +410,7 @@ private void createSnapshot(String volName, String buckName, String snapName) th String tableKey = SnapshotInfo.getTableKey(volName, buckName, snapName); SnapshotInfo snapshotInfo = SnapshotUtils.getSnapshotInfo(cluster.getOMLeader(), tableKey); - String fileName = getSnapshotPath(cluster.getOMLeader().getConfiguration(), snapshotInfo); + String fileName = getSnapshotPath(cluster.getOMLeader().getConfiguration(), snapshotInfo, 0); File snapshotDir = new File(fileName); if (!RDBCheckpointUtils.waitForCheckpointDirectoryExist(snapshotDir)) { throw new IOException("Snapshot directory doesn't exist"); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerSnapshotAcl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerSnapshotAcl.java index f735ad15d295..455f1430d997 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerSnapshotAcl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerSnapshotAcl.java @@ -685,7 +685,7 @@ private void createSnapshot() .get(SnapshotInfo.getTableKey(volumeName, bucketName, snapshotName)); // Allow the snapshot to be written to disk String fileName = - getSnapshotPath(ozoneManager.getConfiguration(), snapshotInfo); + getSnapshotPath(ozoneManager.getConfiguration(), snapshotInfo, 0); File snapshotDir = new File(fileName); if (!RDBCheckpointUtils .waitForCheckpointDirectoryExist(snapshotDir)) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneSnapshotRestore.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneSnapshotRestore.java index 6c67554d7b8d..b2fde1f01960 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneSnapshotRestore.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneSnapshotRestore.java @@ -159,7 +159,7 @@ private String createSnapshot(String volName, String buckName, .getSnapshotInfoTable() .get(SnapshotInfo.getTableKey(volName, buckName, snapshotName)); String snapshotDirName = OmSnapshotManager - .getSnapshotPath(clientConf, snapshotInfo) + OM_KEY_PREFIX + "CURRENT"; + .getSnapshotPath(clientConf, snapshotInfo, 0) + OM_KEY_PREFIX + "CURRENT"; GenericTestUtils.waitFor(() -> new File(snapshotDirName).exists(), 1000, 120000); return snapshotKeyPrefix; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java index a67a4599beee..eacde483d2ac 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java @@ -633,7 +633,7 @@ private SnapshotInfo createOzoneSnapshot(OzoneManager leaderOM, String name) thr .getSnapshotInfoTable() .get(tableKey); // Allow the snapshot to be written to disk - String fileName = getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo); + String fileName = getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo, 0); File snapshotDir = new File(fileName); if (!RDBCheckpointUtils.waitForCheckpointDirectoryExist(snapshotDir)) { throw new IOException("snapshot directory doesn't exist"); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDefragAdmin.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDefragAdmin.java new file mode 100644 index 000000000000..dff56a35d164 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDefragAdmin.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.snapshot; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.IOUtils; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.admin.OzoneAdmin; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +/** + * Integration test for 'ozone admin om snapshot defrag' command. + * Tests that the defrag command can be successfully triggered on any OM + * (leader or follower) in an HA cluster. + */ +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class TestSnapshotDefragAdmin { + + private static MiniOzoneHAClusterImpl cluster; + private static OzoneClient client; + private static String omServiceId; + + @BeforeAll + public static void init() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(OMConfigKeys.OZONE_FILESYSTEM_SNAPSHOT_ENABLED_KEY, true); + // Enable snapshot defrag service + conf.setInt(OMConfigKeys.OZONE_SNAPSHOT_DEFRAG_SERVICE_INTERVAL, 7200); + conf.setInt(OMConfigKeys.SNAPSHOT_DEFRAG_LIMIT_PER_TASK, 1); + + omServiceId = "om-service-test-defrag"; + cluster = MiniOzoneCluster.newHABuilder(conf) + .setOMServiceId(omServiceId) + .setNumOfOzoneManagers(3) + .build(); + + cluster.waitForClusterToBeReady(); + client = cluster.newClient(); + } + + @AfterAll + public static void cleanup() { + IOUtils.closeQuietly(client); + if (cluster != null) { + cluster.shutdown(); + } + } + + /** + * Tests triggering snapshot defrag on the OM leader. + */ + @Test + public void testDefragOnLeader() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + String leaderId = leader.getOMNodeId(); + + executeDefragCommand(leaderId, false); + } + + /** + * Tests triggering snapshot defrag on an OM follower. + */ + @Test + public void testDefragOnFollower() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + List allOMs = cluster.getOzoneManagersList(); + + // Find a follower OM + OzoneManager follower = null; + for (OzoneManager om : allOMs) { + if (!om.getOMNodeId().equals(leader.getOMNodeId())) { + follower = om; + break; + } + } + + assertNotNull(follower, "Should have at least one follower OM"); + executeDefragCommand(follower.getOMNodeId(), false); + } + + /** + * Tests triggering snapshot defrag on all OMs in the cluster. + */ + @Test + public void testDefragOnAllOMs() throws Exception { + List allOMs = cluster.getOzoneManagersList(); + + assertEquals(3, allOMs.size(), "Expected 3 OMs in the cluster"); + + // Test defrag on each OM + for (OzoneManager om : allOMs) { + String omNodeId = om.getOMNodeId(); + executeDefragCommand(omNodeId, false); + } + } + + /** + * Tests triggering snapshot defrag with --no-wait option. + */ + @Test + public void testDefragWithNoWait() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + String leaderId = leader.getOMNodeId(); + + executeDefragCommand(leaderId, true); + } + + /** + * Tests triggering snapshot defrag on a follower with --no-wait option. + */ + @Test + public void testDefragOnFollowerWithNoWait() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + List allOMs = cluster.getOzoneManagersList(); + + // Find a follower OM + OzoneManager follower = null; + for (OzoneManager om : allOMs) { + if (!om.getOMNodeId().equals(leader.getOMNodeId())) { + follower = om; + break; + } + } + + assertNotNull(follower, "Should have at least one follower OM"); + executeDefragCommand(follower.getOMNodeId(), true); + } + + /** + * Helper method to execute the defrag command on a specific OM node. + * + * @param nodeId the OM node ID to target + * @param noWait whether to use the --no-wait option + */ + private void executeDefragCommand(String nodeId, boolean noWait) throws Exception { + OzoneAdmin ozoneAdmin = new OzoneAdmin(); + ozoneAdmin.getOzoneConf().addResource(cluster.getConf()); + + // Capture output to verify command execution + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(baos, true, StandardCharsets.UTF_8.name()); + PrintStream oldOut = System.out; + System.setOut(ps); + + try { + String[] args; + if (noWait) { + args = new String[]{ + "om", + "snapshot", + "defrag", + "-id", omServiceId, + "--node-id", nodeId, + "--no-wait" + }; + } else { + args = new String[]{ + "om", + "snapshot", + "defrag", + "-id", omServiceId, + "--node-id", nodeId + }; + } + + int exitCode = ozoneAdmin.execute(args); + System.out.flush(); + String output = baos.toString(StandardCharsets.UTF_8.name()); + + // Verify successful execution + assertEquals(0, exitCode, + "Command should execute successfully on OM " + nodeId); + assertTrue(output.contains("Triggering Snapshot Defrag Service"), + "Output should indicate defrag service is being triggered"); + + if (noWait) { + assertTrue(output.contains("triggered successfully") && + output.contains("background"), + "Output should indicate task triggered in background: " + output); + } else { + assertTrue(output.contains("completed successfully") || + output.contains("failed") || + output.contains("interrupted"), + "Output should indicate completion status: " + output); + } + } finally { + System.setOut(oldOut); + ps.close(); + } + } +} + diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java new file mode 100644 index 000000000000..ec9335ea1137 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.shell; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionSummary; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.block.DeletedBlockLog; +import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.ContainerStateManager; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.admin.scm.GetDeletedBlockSummarySubcommand; +import org.apache.hadoop.ozone.common.DeletedBlock; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test for DeletedBlocksTxnSubcommand Cli. + */ +public class TestDeletedBlocksTxnShell { + + private static final Logger LOG = LoggerFactory + .getLogger(TestDeletedBlocksTxnShell.class); + + private final PrintStream originalOut = System.out; + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private MiniOzoneHAClusterImpl cluster = null; + private OzoneConfiguration conf; + private String scmServiceId; + private File txnFile; + private int numOfSCMs = 3; + + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + private static final int BLOCKS_PER_TX = 5; + private static final int BLOCK_SIZE = 100; + private static final int BLOCK_REPLICATED_SIZE = 300; + + @TempDir + private Path tempDir; + + /** + * Create a MiniOzoneHACluster for testing. + * + * @throws IOException + */ + @BeforeEach + public void init() throws Exception { + conf = new OzoneConfiguration(); + scmServiceId = "scm-service-test1"; + + cluster = MiniOzoneCluster.newHABuilder(conf) + .setSCMServiceId(scmServiceId) + .setNumOfStorageContainerManagers(numOfSCMs) + .setNumOfActiveSCMs(numOfSCMs) + .setNumOfOzoneManagers(1) + .build(); + cluster.waitForClusterToBeReady(); + + txnFile = tempDir.resolve("txn.txt").toFile(); + LOG.info("txnFile path: {}", txnFile.getAbsolutePath()); + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + } + + /** + * Shutdown MiniDFSCluster. + */ + @AfterEach + public void shutdown() { + if (cluster != null) { + cluster.shutdown(); + } + System.setOut(originalOut); + } + + //> + private Map> generateData(int dataSize) throws Exception { + Map> blockMap = new HashMap<>(); + int continerIDBase = RandomUtils.secure().randomInt(0, 100); + int localIDBase = RandomUtils.secure().randomInt(0, 1000); + for (int i = 0; i < dataSize; i++) { + long containerID = continerIDBase + i; + updateContainerMetadata(containerID); + List blocks = new ArrayList<>(); + for (int j = 0; j < BLOCKS_PER_TX; j++) { + long localID = localIDBase + j; + blocks.add(new DeletedBlock(new BlockID(containerID, localID), BLOCK_SIZE, BLOCK_REPLICATED_SIZE)); + } + blockMap.put(containerID, blocks); + } + return blockMap; + } + + private void updateContainerMetadata(long cid) throws Exception { + final ContainerInfo container = + new ContainerInfo.Builder() + .setContainerID(cid) + .setReplicationConfig(RatisReplicationConfig.getInstance(THREE)) + .setState(HddsProtos.LifeCycleState.CLOSED) + .setOwner("TestDeletedBlockLog") + .setPipelineID(PipelineID.randomId()) + .build(); + final Set replicaSet = cluster.getHddsDatanodes() + .subList(0, 3) + .stream() + .map(dn -> ContainerReplica.newBuilder() + .setContainerID(container.containerID()) + .setContainerState(State.CLOSED) + .setDatanodeDetails(dn.getDatanodeDetails()) + .build()) + .collect(Collectors.toSet()); + ContainerStateManager containerStateManager = getSCMLeader(). + getContainerManager().getContainerStateManager(); + containerStateManager.addContainer(container.getProtobuf()); + for (ContainerReplica replica: replicaSet) { + containerStateManager.updateContainerReplica(replica); + } + } + + private StorageContainerManager getSCMLeader() { + return cluster.getStorageContainerManagersList() + .stream().filter(a -> a.getScmContext().isLeaderReady()) + .collect(Collectors.toList()).get(0); + } + + private void flush() throws Exception { + // only flush leader here, avoid the follower concurrent flush and write + getSCMLeader().getScmHAManager().asSCMHADBTransactionBuffer().flush(); + } + + @Test + public void testGetDeletedBlockSummarySubcommand() throws Exception { + int currentValidTxnNum; + // add 30 block deletion transactions + DeletedBlockLog deletedBlockLog = getSCMLeader(). + getScmBlockManager().getDeletedBlockLog(); + deletedBlockLog.addTransactions(generateData(30)); + flush(); + currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions(); + LOG.info("Valid num of txns: {}", currentValidTxnNum); + assertEquals(30, currentValidTxnNum); + DeletedBlocksTransactionSummary summary = deletedBlockLog.getTransactionSummary(); + assertEquals(1, summary.getFirstTxID()); + assertEquals(30, summary.getTotalTransactionCount()); + assertEquals(30 * BLOCKS_PER_TX, summary.getTotalBlockCount()); + assertEquals(30 * BLOCKS_PER_TX * BLOCK_SIZE, summary.getTotalBlockSize()); + assertEquals(30 * BLOCKS_PER_TX * BLOCK_REPLICATED_SIZE, summary.getTotalBlockReplicatedSize()); + + GetDeletedBlockSummarySubcommand getDeletedBlockSummarySubcommand = + new GetDeletedBlockSummarySubcommand(); + outContent.reset(); + ContainerOperationClient scmClient = new ContainerOperationClient(conf); + getDeletedBlockSummarySubcommand.execute(scmClient); + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Start from tx ID: 1")); + assertTrue(output.contains("Total number of transactions: 30")); + assertTrue(output.contains("Total number of blocks: 150")); + assertTrue(output.contains("Total size of blocks: 15000")); + assertTrue(output.contains("Total replicated size of blocks: 45000")); + } +} diff --git a/hadoop-ozone/interface-client/pom.xml b/hadoop-ozone/interface-client/pom.xml index cd1aabff5313..412181d8f096 100644 --- a/hadoop-ozone/interface-client/pom.xml +++ b/hadoop-ozone/interface-client/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-interface-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Client Interface Apache Ozone Client interface diff --git a/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto index 4b104514f6ae..5e726b400e87 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto @@ -79,6 +79,16 @@ message CompactResponse { optional string errorMsg = 3; } +message TriggerSnapshotDefragRequest { + required bool noWait = 1; +} + +message TriggerSnapshotDefragResponse { + required bool success = 1; + optional string errorMsg = 2; + optional bool result = 3; +} + /** The service for OM admin operations. */ @@ -95,4 +105,8 @@ service OzoneManagerAdminService { // RPC request from admin to compact a column family of the OM's db rpc compactDB(CompactRequest) returns(CompactResponse); + + // RPC request from admin to trigger snapshot defragmentation + rpc triggerSnapshotDefrag(TriggerSnapshotDefragRequest) + returns(TriggerSnapshotDefragResponse); } diff --git a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto index 61a3c1d6792e..1e5675f612e6 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto @@ -880,7 +880,7 @@ message SnapshotInfo { optional hadoop.hdds.UUID pathPreviousSnapshotID = 8; optional hadoop.hdds.UUID globalPreviousSnapshotID = 9; optional string snapshotPath = 10; - optional string checkpointDir = 11; + optional string checkpointDir = 11 [deprecated = true]; optional int64 dbTxSequenceNumber = 12; optional bool deepClean = 13; optional bool sstFiltered = 14; diff --git a/hadoop-ozone/interface-storage/pom.xml b/hadoop-ozone/interface-storage/pom.xml index ae54089416b7..a4604bbd79f4 100644 --- a/hadoop-ozone/interface-storage/pom.xml +++ b/hadoop-ozone/interface-storage/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-interface-storage - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Storage Interface Apache Ozone Storage Interface diff --git a/hadoop-ozone/mini-cluster/pom.xml b/hadoop-ozone/mini-cluster/pom.xml index b5c2ff697fed..a96cfc23184b 100644 --- a/hadoop-ozone/mini-cluster/pom.xml +++ b/hadoop-ozone/mini-cluster/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-mini-cluster - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Mini Cluster Apache Ozone Mini Cluster for Integration Tests diff --git a/hadoop-ozone/multitenancy-ranger/pom.xml b/hadoop-ozone/multitenancy-ranger/pom.xml index ad025f9c6e59..623b213a337f 100644 --- a/hadoop-ozone/multitenancy-ranger/pom.xml +++ b/hadoop-ozone/multitenancy-ranger/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-multitenancy-ranger - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Multitenancy with Ranger Implementation of multitenancy for Apache Ozone Manager Server using Apache Ranger diff --git a/hadoop-ozone/ozone-manager/pom.xml b/hadoop-ozone/ozone-manager/pom.xml index cdbffa65f19f..923b1c02cbeb 100644 --- a/hadoop-ozone/ozone-manager/pom.xml +++ b/hadoop-ozone/ozone-manager/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-manager - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Manager Server Apache Ozone Manager Server diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index e458fa73236a..cf6694480c04 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -142,6 +142,7 @@ import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.OzoneAcl; import org.apache.hadoop.ozone.common.BlockGroup; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.ozone.om.PendingKeysDeletion.PurgedKey; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes; @@ -832,12 +833,15 @@ public PendingKeysDeletion getPendingDeletionKeys( // Skip the key if the filter doesn't allow the file to be deleted. if (filter == null || filter.apply(Table.newKeyValue(kv.getKey(), info))) { - List blockIDS = info.getKeyLocationVersions().stream() + List deletedBlocks = info.getKeyLocationVersions().stream() .flatMap(versionLocations -> versionLocations.getLocationList().stream() - .map(b -> new BlockID(b.getContainerID(), b.getLocalID()))).collect(Collectors.toList()); + .map(b -> new DeletedBlock(new BlockID(b.getContainerID(), + b.getLocalID()), info.getDataSize(), info.getReplicatedSize()))).collect(Collectors.toList()); String blockGroupName = kv.getKey() + "/" + reclaimableKeyCount++; + BlockGroup keyBlocks = BlockGroup.newBuilder().setKeyName(blockGroupName) - .addAllBlockIDs(blockIDS).build(); + .addAllDeletedBlocks(deletedBlocks) + .build(); reclaimableKeys.put(blockGroupName, new PurgedKey(info.getVolumeName(), info.getBucketName(), bucketId, keyBlocks, kv.getKey(), OMKeyRequest.sumBlockLengths(info), info.isDeletedKeyCommitted())); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 4d85e9f07472..efe9fc0aeea9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -69,6 +69,7 @@ import org.apache.hadoop.ozone.lock.BootstrapStateHandler; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.snapshot.OMDBCheckpointUtils; +import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager; import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Time; @@ -347,7 +348,8 @@ private Set getSnapshotDirs(DBCheckpoint checkpoint, boolean waitForDir) OzoneConfiguration conf = getConf(); Set snapshotPaths = new HashSet<>(); - + OzoneManager om = (OzoneManager) getServletContext().getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE); + OmSnapshotLocalDataManager snapshotLocalDataManager = om.getOmSnapshotManager().getSnapshotLocalDataManager(); // get snapshotInfo entries OmMetadataManagerImpl checkpointMetadataManager = OmMetadataManagerImpl.createCheckpointMetadataManager( @@ -359,11 +361,14 @@ private Set getSnapshotDirs(DBCheckpoint checkpoint, boolean waitForDir) // For each entry, wait for corresponding directory. while (iterator.hasNext()) { Table.KeyValue entry = iterator.next(); - Path path = Paths.get(getSnapshotPath(conf, entry.getValue())); - if (waitForDir) { - waitForDirToExist(path); + try (OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataMetaProvider snapMetaProvider = + snapshotLocalDataManager.getOmSnapshotLocalDataMeta(entry.getValue())) { + Path path = Paths.get(getSnapshotPath(conf, entry.getValue(), snapMetaProvider.getMeta().getVersion())); + if (waitForDir) { + waitForDirToExist(path); + } + snapshotPaths.add(path); } - snapshotPaths.add(path); } } finally { checkpointMetadataManager.stop(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java index f967e30ec52f..748329be83ae 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java @@ -27,6 +27,7 @@ import static org.apache.hadoop.ozone.OzoneConsts.ROCKSDB_SST_SUFFIX; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_SNAPSHOT_MAX_TOTAL_SST_SIZE_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_SNAPSHOT_MAX_TOTAL_SST_SIZE_KEY; +import static org.apache.hadoop.ozone.om.OmSnapshotManager.getSnapshotPath; import static org.apache.hadoop.ozone.om.lock.FlatResource.SNAPSHOT_DB_LOCK; import static org.apache.hadoop.ozone.om.snapshot.OMDBCheckpointUtils.includeSnapshotData; import static org.apache.hadoop.ozone.om.snapshot.OMDBCheckpointUtils.logEstimatedTarballSize; @@ -44,11 +45,13 @@ import java.nio.file.StandardOpenOption; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; @@ -64,6 +67,8 @@ import org.apache.hadoop.hdds.recon.ReconConfig; import org.apache.hadoop.hdds.utils.DBCheckpointServlet; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.lock.BootstrapStateHandler; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; @@ -71,6 +76,7 @@ import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Time; +import org.apache.ozone.compaction.log.CompactionLogEntry; import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -206,6 +212,7 @@ public void writeDbDataToStream(HttpServletRequest request, OutputStream destina DBCheckpoint checkpoint = null; OzoneManager om = (OzoneManager) getServletContext().getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE); OMMetadataManager omMetadataManager = om.getMetadataManager(); + OmSnapshotLocalDataManager snapshotLocalDataManager = om.getOmSnapshotManager().getSnapshotLocalDataManager(); boolean includeSnapshotData = includeSnapshotData(request); AtomicLong maxTotalSstSize = new AtomicLong(getConf().getLong(OZONE_OM_RATIS_SNAPSHOT_MAX_TOTAL_SST_SIZE_KEY, OZONE_OM_RATIS_SNAPSHOT_MAX_TOTAL_SST_SIZE_DEFAULT)); @@ -215,7 +222,7 @@ public void writeDbDataToStream(HttpServletRequest request, OutputStream destina if (!includeSnapshotData) { maxTotalSstSize.set(Long.MAX_VALUE); } else { - snapshotPaths = getSnapshotDirs(omMetadataManager); + snapshotPaths = getSnapshotDirsFromDB(omMetadataManager, omMetadataManager, snapshotLocalDataManager); } if (sstFilesToExclude.isEmpty()) { @@ -249,7 +256,9 @@ public void writeDbDataToStream(HttpServletRequest request, OutputStream destina if (shouldContinue) { // we finished transferring files from snapshot DB's by now and // this is the last step where we transfer the active om.db contents - checkpoint = createAndPrepareCheckpoint(tmpdir, true); + // get the list of sst files of the checkpoint. + checkpoint = createAndPrepareCheckpoint(true); + List sstBackupFiles = extractSSTFilesFromCompactionLog(checkpoint); // unlimited files as we want the Active DB contents to be transferred in a single batch maxTotalSstSize.set(Long.MAX_VALUE); Path checkpointDir = checkpoint.getCheckpointLocation(); @@ -257,12 +266,16 @@ public void writeDbDataToStream(HttpServletRequest request, OutputStream destina writeDBToArchive(sstFilesToExclude, checkpointDir, maxTotalSstSize, archiveOutputStream, tmpdir, hardLinkFileMap, false); if (includeSnapshotData) { - Path tmpCompactionLogDir = tmpdir.resolve(getCompactionLogDir().getFileName()); - Path tmpSstBackupDir = tmpdir.resolve(getSstBackupDir().getFileName()); - writeDBToArchive(sstFilesToExclude, tmpCompactionLogDir, maxTotalSstSize, archiveOutputStream, tmpdir, - hardLinkFileMap, getCompactionLogDir(), false); - writeDBToArchive(sstFilesToExclude, tmpSstBackupDir, maxTotalSstSize, archiveOutputStream, tmpdir, - hardLinkFileMap, getSstBackupDir(), false); + // get the list of snapshots from the checkpoint + try (OmMetadataManagerImpl checkpointMetadataManager = OmMetadataManagerImpl + .createCheckpointMetadataManager(om.getConfiguration(), checkpoint)) { + snapshotPaths = getSnapshotDirsFromDB(omMetadataManager, checkpointMetadataManager, + snapshotLocalDataManager); + } + writeDBToArchive(sstFilesToExclude, getCompactionLogDir(), maxTotalSstSize, archiveOutputStream, tmpdir, + hardLinkFileMap, false); + writeDBToArchive(sstFilesToExclude, sstBackupFiles.stream(), + maxTotalSstSize, archiveOutputStream, tmpdir, hardLinkFileMap, false); // This is done to ensure all data to be copied correctly is flushed in the snapshot DB transferSnapshotData(sstFilesToExclude, tmpdir, snapshotPaths, maxTotalSstSize, archiveOutputStream, hardLinkFileMap); @@ -291,7 +304,7 @@ public void writeDbDataToStream(HttpServletRequest request, OutputStream destina * @param hardLinkFileMap Map of hardlink file paths to their unique identifiers for deduplication. * @throws IOException if an I/O error occurs during processing. */ - private void transferSnapshotData(Set sstFilesToExclude, Path tmpdir, Set snapshotPaths, + void transferSnapshotData(Set sstFilesToExclude, Path tmpdir, Set snapshotPaths, AtomicLong maxTotalSstSize, ArchiveOutputStream archiveOutputStream, Map hardLinkFileMap) throws IOException { OzoneManager om = (OzoneManager) getServletContext().getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE); @@ -317,14 +330,6 @@ private void transferSnapshotData(Set sstFilesToExclude, Path tmpdir, Se } } - @VisibleForTesting - boolean writeDBToArchive(Set sstFilesToExclude, Path dir, - AtomicLong maxTotalSstSize, ArchiveOutputStream archiveOutputStream, - Path tmpdir, Map hardLinkFileMap, boolean onlySstFile) throws IOException { - return writeDBToArchive(sstFilesToExclude, dir, maxTotalSstSize, - archiveOutputStream, tmpdir, hardLinkFileMap, null, onlySstFile); - } - private static void cleanupCheckpoint(DBCheckpoint checkpoint) { if (checkpoint != null) { try { @@ -384,36 +389,55 @@ private OzoneConfiguration getConf() { } /** - * Collects paths to all snapshot databases. + * Collects paths to all snapshot databases from the OM DB. * - * @param omMetadataManager OMMetadataManager instance + * @param activeOMMetadataManager OMMetadataManager instance * @return Set of paths to snapshot databases * @throws IOException if an I/O error occurs */ - Set getSnapshotDirs(OMMetadataManager omMetadataManager) throws IOException { + Set getSnapshotDirsFromDB(OMMetadataManager activeOMMetadataManager, OMMetadataManager omMetadataManager, + OmSnapshotLocalDataManager localDataManager) throws IOException { Set snapshotPaths = new HashSet<>(); - SnapshotChainManager snapshotChainManager = new SnapshotChainManager(omMetadataManager); - for (SnapshotChainInfo snapInfo : snapshotChainManager.getGlobalSnapshotChain().values()) { - String snapshotDir = - OmSnapshotManager.getSnapshotPath(getConf(), SnapshotInfo.getCheckpointDirName(snapInfo.getSnapshotId())); - Path path = Paths.get(snapshotDir); - snapshotPaths.add(path); + try (TableIterator> iter = + omMetadataManager.getSnapshotInfoTable().iterator()) { + while (iter.hasNext()) { + Table.KeyValue kv = iter.next(); + SnapshotInfo snapshotInfo = kv.getValue(); + try (OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataMetaProvider snapLocalMeta = + localDataManager.getOmSnapshotLocalDataMeta(snapshotInfo.getSnapshotId())) { + Path snapshotDir = getSnapshotPath(activeOMMetadataManager, snapshotInfo.getSnapshotId(), + snapLocalMeta.getMeta().getVersion()); + snapshotPaths.add(snapshotDir); + } + } } return snapshotPaths; } + @VisibleForTesting + boolean writeDBToArchive(Set sstFilesToExclude, Path dbDir, AtomicLong maxTotalSstSize, + ArchiveOutputStream archiveOutputStream, Path tmpDir, + Map hardLinkFileMap, boolean onlySstFile) throws IOException { + if (!Files.exists(dbDir)) { + LOG.warn("DB directory {} does not exist. Skipping.", dbDir); + return true; + } + Stream files = Files.list(dbDir); + return writeDBToArchive(sstFilesToExclude, files, + maxTotalSstSize, archiveOutputStream, tmpDir, hardLinkFileMap, onlySstFile); + } + /** * Writes database files to the archive, handling deduplication based on inode IDs. * Here the dbDir could either be a snapshot db directory, the active om.db, * compaction log dir, sst backup dir. * * @param sstFilesToExclude Set of SST file IDs to exclude from the archive - * @param dbDir Directory containing database files to archive + * @param files Stream of files to archive * @param maxTotalSstSize Maximum total size of SST files to include * @param archiveOutputStream Archive output stream * @param tmpDir Temporary directory for processing * @param hardLinkFileMap Map of hardlink file paths to their unique identifiers for deduplication - * @param destDir Destination directory for the archived files. If null, * the archived files are not moved to this directory. * @param onlySstFile If true, only SST files are processed. If false, all files are processed. *

@@ -424,49 +448,40 @@ Set getSnapshotDirs(OMMetadataManager omMetadataManager) throws IOExceptio * @throws IOException if an I/O error occurs */ @SuppressWarnings("checkstyle:ParameterNumber") - private boolean writeDBToArchive(Set sstFilesToExclude, Path dbDir, AtomicLong maxTotalSstSize, + private boolean writeDBToArchive(Set sstFilesToExclude, Stream files, AtomicLong maxTotalSstSize, ArchiveOutputStream archiveOutputStream, Path tmpDir, - Map hardLinkFileMap, Path destDir, boolean onlySstFile) throws IOException { - if (!Files.exists(dbDir)) { - LOG.warn("DB directory {} does not exist. Skipping.", dbDir); - return true; - } + Map hardLinkFileMap, boolean onlySstFile) throws IOException { long bytesWritten = 0L; int filesWritten = 0; long lastLoggedTime = Time.monotonicNow(); - try (Stream files = Files.list(dbDir)) { - Iterable iterable = files::iterator; - for (Path dbFile : iterable) { - if (!Files.isDirectory(dbFile)) { - if (onlySstFile && !dbFile.toString().endsWith(ROCKSDB_SST_SUFFIX)) { - continue; + Iterable iterable = files::iterator; + for (Path dbFile : iterable) { + if (!Files.isDirectory(dbFile)) { + if (onlySstFile && !dbFile.toString().endsWith(ROCKSDB_SST_SUFFIX)) { + continue; + } + String fileId = OmSnapshotUtils.getFileInodeAndLastModifiedTimeString(dbFile); + if (hardLinkFileMap != null) { + String path = dbFile.toFile().getAbsolutePath(); + // if the file is in the om checkpoint dir, then we need to change the path to point to the OM DB. + if (path.contains(OM_CHECKPOINT_DIR)) { + path = getDbStore().getDbLocation().toPath().resolve(dbFile.getFileName()).toAbsolutePath().toString(); } - String fileId = OmSnapshotUtils.getFileInodeAndLastModifiedTimeString(dbFile); - if (hardLinkFileMap != null) { - String path = dbFile.toFile().getAbsolutePath(); - if (destDir != null) { - path = destDir.resolve(dbFile.getFileName()).toString(); - } - // if the file is in the om checkpoint dir, then we need to change the path to point to the OM DB. - if (path.contains(OM_CHECKPOINT_DIR)) { - path = getDbStore().getDbLocation().toPath().resolve(dbFile.getFileName()).toAbsolutePath().toString(); - } - hardLinkFileMap.put(path, fileId); + hardLinkFileMap.put(path, fileId); + } + if (!sstFilesToExclude.contains(fileId)) { + long fileSize = Files.size(dbFile); + if (maxTotalSstSize.get() - fileSize <= 0) { + return false; } - if (!sstFilesToExclude.contains(fileId)) { - long fileSize = Files.size(dbFile); - if (maxTotalSstSize.get() - fileSize <= 0) { - return false; - } - bytesWritten += linkAndIncludeFile(dbFile.toFile(), fileId, archiveOutputStream, tmpDir); - filesWritten++; - maxTotalSstSize.addAndGet(-fileSize); - sstFilesToExclude.add(fileId); - if (Time.monotonicNow() - lastLoggedTime >= 30000) { - LOG.info("Transferred {} KB, #files {} to checkpoint tarball stream...", - bytesWritten / (1024), filesWritten); - lastLoggedTime = Time.monotonicNow(); - } + bytesWritten += linkAndIncludeFile(dbFile.toFile(), fileId, archiveOutputStream, tmpDir); + filesWritten++; + maxTotalSstSize.addAndGet(-fileSize); + sstFilesToExclude.add(fileId); + if (Time.monotonicNow() - lastLoggedTime >= 30000) { + LOG.info("Transferred {} KB, #files {} to checkpoint tarball stream...", + bytesWritten / (1024), filesWritten); + lastLoggedTime = Time.monotonicNow(); } } } @@ -480,21 +495,33 @@ private boolean writeDBToArchive(Set sstFilesToExclude, Path dbDir, Atom * The copy to the temporary directory for compaction log and SST backup files * is done to maintain a consistent view of the files in these directories. * - * @param tmpdir Temporary directory for storing checkpoint-related files. * @param flush If true, flushes in-memory data to disk before checkpointing. - * @return The created database checkpoint. * @throws IOException If an error occurs during checkpoint creation or file copying. */ - private DBCheckpoint createAndPrepareCheckpoint(Path tmpdir, boolean flush) throws IOException { - // make tmp directories to contain the copies - Path tmpCompactionLogDir = tmpdir.resolve(getCompactionLogDir().getFileName()); - Path tmpSstBackupDir = tmpdir.resolve(getSstBackupDir().getFileName()); + DBCheckpoint createAndPrepareCheckpoint(boolean flush) throws IOException { + // Create & return the checkpoint. + return getDbStore().getCheckpoint(flush); + } + + private List extractSSTFilesFromCompactionLog(DBCheckpoint dbCheckpoint) throws IOException { + List sstFiles = new ArrayList<>(); + try (OmMetadataManagerImpl checkpointMetadataManager = + OmMetadataManagerImpl.createCheckpointMetadataManager(getConf(), dbCheckpoint)) { + try (Table.KeyValueIterator + iterator = checkpointMetadataManager.getCompactionLogTable().iterator()) { + iterator.seekToFirst(); - // Create checkpoint and then copy the files so that it has all the compaction entries and files. - DBCheckpoint dbCheckpoint = getDbStore().getCheckpoint(flush); - FileUtils.copyDirectory(getCompactionLogDir().toFile(), tmpCompactionLogDir.toFile()); - OmSnapshotUtils.linkFiles(getSstBackupDir().toFile(), tmpSstBackupDir.toFile()); + Path sstBackupDir = getSstBackupDir(); - return dbCheckpoint; + while (iterator.hasNext()) { + CompactionLogEntry logEntry = iterator.next().getValue(); + logEntry.getInputFileInfoList().forEach(f -> + sstFiles.add(sstBackupDir.resolve(f.getFileName() + ROCKSDB_SST_SUFFIX))); + } + } + } catch (Exception e) { + throw new IOException("Error reading compaction log from checkpoint", e); + } + return sstFiles; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java index c7b071a6e8d9..4ca647e4ea6d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java @@ -80,6 +80,7 @@ import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.BlockGroup; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.ozone.om.codec.OMDBDefinition; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes; @@ -1814,12 +1815,13 @@ public List getBlocksForKeyDelete(String deletedKey) for (OmKeyInfo info : omKeyInfo.cloneOmKeyInfoList()) { for (OmKeyLocationInfoGroup keyLocations : info.getKeyLocationVersions()) { - List item = keyLocations.getLocationList().stream() - .map(b -> new BlockID(b.getContainerID(), b.getLocalID())) + List item = keyLocations.getLocationList().stream() + .map(b -> new DeletedBlock( + new BlockID(b.getContainerID(), b.getLocalID()), info.getDataSize(), info.getReplicatedSize())) .collect(Collectors.toList()); BlockGroup keyBlocks = BlockGroup.newBuilder() .setKeyName(deletedKey) - .addAllBlockIDs(item) + .addAllDeletedBlocks(item) .build(); result.add(keyBlocks); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataReader.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataReader.java index cbcb7e2dc065..c413c96956f7 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataReader.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataReader.java @@ -275,9 +275,13 @@ public OzoneFileStatus getFileStatus(OmKeyArgs args) throws IOException { args = bucket.update(args); try { + if (isAclEnabled) { + checkAcls(getResourceType(args), StoreType.OZONE, ACLType.READ, + bucket, args.getKeyName()); + } metrics.incNumGetFileStatus(); return keyManager.getFileStatus(args, getClientAddress()); - } catch (IOException ex) { + } catch (Exception ex) { metrics.incNumGetFileStatusFails(); auditSuccess = false; audit.logReadFailure( diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotLocalData.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotLocalData.java index 83ad02fb14bc..91ec8b673a89 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotLocalData.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotLocalData.java @@ -30,9 +30,10 @@ import java.util.UUID; import java.util.stream.Collectors; import org.apache.commons.codec.digest.DigestUtils; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.CopyObject; import org.apache.hadoop.ozone.util.WithChecksum; -import org.apache.ozone.compaction.log.SstFileInfo; +import org.apache.ozone.rocksdb.util.SstFileInfo; import org.rocksdb.LiveFileMetaData; import org.yaml.snakeyaml.Yaml; @@ -63,6 +64,9 @@ public class OmSnapshotLocalData implements WithChecksum { // Previous snapshotId based on which the snapshot local data is built. private UUID previousSnapshotId; + // Stores the transactionInfo corresponding to OM when the snaphot is purged. + private TransactionInfo transactionInfo; + // Map of version to VersionMeta, using linkedHashMap since the order of the map needs to be deterministic for // checksum computation. private final LinkedHashMap versionSstFileInfos; @@ -73,7 +77,8 @@ public class OmSnapshotLocalData implements WithChecksum { /** * Creates a OmSnapshotLocalData object with default values. */ - public OmSnapshotLocalData(UUID snapshotId, List notDefraggedSSTFileList, UUID previousSnapshotId) { + public OmSnapshotLocalData(UUID snapshotId, List notDefraggedSSTFileList, UUID previousSnapshotId, + TransactionInfo transactionInfo) { this.snapshotId = snapshotId; this.isSSTFiltered = false; this.lastDefragTime = 0L; @@ -83,6 +88,7 @@ public OmSnapshotLocalData(UUID snapshotId, List notDefraggedS new VersionMeta(0, notDefraggedSSTFileList.stream().map(SstFileInfo::new).collect(Collectors.toList()))); this.version = 0; this.previousSnapshotId = previousSnapshotId; + this.transactionInfo = transactionInfo; setChecksumTo0ByteArray(); } @@ -101,6 +107,15 @@ public OmSnapshotLocalData(OmSnapshotLocalData source) { this.previousSnapshotId = source.previousSnapshotId; this.versionSstFileInfos = new LinkedHashMap<>(); setVersionSstFileInfos(source.versionSstFileInfos); + this.transactionInfo = source.transactionInfo; + } + + public TransactionInfo getTransactionInfo() { + return transactionInfo; + } + + public void setTransactionInfo(TransactionInfo transactionInfo) { + this.transactionInfo = transactionInfo; } /** @@ -163,7 +178,7 @@ public Map getVersionSstFileInfos() { * Sets the defragged SST file list. * @param versionSstFileInfos Map of version to defragged SST file list */ - public void setVersionSstFileInfos(Map versionSstFileInfos) { + void setVersionSstFileInfos(Map versionSstFileInfos) { this.versionSstFileInfos.clear(); this.versionSstFileInfos.putAll(versionSstFileInfos); } @@ -184,9 +199,14 @@ public void setPreviousSnapshotId(UUID previousSnapshotId) { * Adds an entry to the defragged SST file list. * @param sstFiles SST file name */ - public void addVersionSSTFileInfos(List sstFiles, int previousSnapshotVersion) { + public void addVersionSSTFileInfos(List sstFiles, int previousSnapshotVersion) { version++; - this.versionSstFileInfos.put(version, new VersionMeta(previousSnapshotVersion, sstFiles)); + this.versionSstFileInfos.put(version, new VersionMeta(previousSnapshotVersion, sstFiles.stream() + .map(SstFileInfo::new).collect(Collectors.toList()))); + } + + public void removeVersionSSTFileInfos(int snapshotVersion) { + this.versionSstFileInfos.remove(snapshotVersion); } /** @@ -274,7 +294,7 @@ public OmSnapshotLocalData copyObject() { * maintain immutability. */ public static class VersionMeta implements CopyObject { - private final int previousSnapshotVersion; + private int previousSnapshotVersion; private final List sstFiles; public VersionMeta(int previousSnapshotVersion, List sstFiles) { @@ -286,6 +306,10 @@ public int getPreviousSnapshotVersion() { return previousSnapshotVersion; } + public void setPreviousSnapshotVersion(int previousSnapshotVersion) { + this.previousSnapshotVersion = previousSnapshotVersion; + } + public List getSstFiles() { return sstFiles; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotLocalDataYaml.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotLocalDataYaml.java index c376e9a332c0..ad8046d719e0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotLocalDataYaml.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotLocalDataYaml.java @@ -24,9 +24,10 @@ import org.apache.commons.pool2.BasePooledObjectFactory; import org.apache.commons.pool2.PooledObject; import org.apache.commons.pool2.impl.DefaultPooledObject; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.OmSnapshotLocalData.VersionMeta; -import org.apache.ozone.compaction.log.SstFileInfo; +import org.apache.ozone.rocksdb.util.SstFileInfo; import org.yaml.snakeyaml.DumperOptions; import org.yaml.snakeyaml.LoaderOptions; import org.yaml.snakeyaml.TypeDescription; @@ -71,6 +72,8 @@ private static class OmSnapshotLocalDataRepresenter extends Representer { this.addClassTag(SstFileInfo.class, SST_FILE_INFO_TAG); representers.put(SstFileInfo.class, new RepresentSstFileInfo()); representers.put(VersionMeta.class, new RepresentVersionMeta()); + representers.put(TransactionInfo.class, data -> new ScalarNode(Tag.STR, data.toString(), null, null, + DumperOptions.ScalarStyle.PLAIN)); representers.put(UUID.class, data -> new ScalarNode(Tag.STR, data.toString(), null, null, DumperOptions.ScalarStyle.PLAIN)); } @@ -168,7 +171,10 @@ public Object construct(Node node) { UUID snapId = UUID.fromString(snapIdStr); final String prevSnapIdStr = (String) nodes.get(OzoneConsts.OM_SLD_PREV_SNAP_ID); UUID prevSnapId = prevSnapIdStr != null ? UUID.fromString(prevSnapIdStr) : null; - OmSnapshotLocalData snapshotLocalData = new OmSnapshotLocalData(snapId, Collections.emptyList(), prevSnapId); + final String purgeTxInfoStr = (String) nodes.get(OzoneConsts.OM_SLD_TXN_INFO); + TransactionInfo transactionInfo = purgeTxInfoStr != null ? TransactionInfo.valueOf(purgeTxInfoStr) : null; + OmSnapshotLocalData snapshotLocalData = new OmSnapshotLocalData(snapId, Collections.emptyList(), prevSnapId, + transactionInfo); // Set version from YAML Integer version = (Integer) nodes.get(OzoneConsts.OM_SLD_VERSION); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 7b9beb80cf6f..0954b029ab67 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -415,8 +415,12 @@ public OmSnapshot load(@Nonnull UUID snapshotId) throws IOException { "' with txnId : '" + TransactionInfo.fromByteString(snapshotInfo.getCreateTransactionInfo()) + "' has not been flushed yet. Please wait a few more seconds before retrying", TIMEOUT); } - snapshotMetadataManager = new OmMetadataManagerImpl(conf, - snapshotInfo.getCheckpointDirName(), maxOpenSstFilesInSnapshotDb); + try (OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataMetaProvider snapshotLocalDataProvider = + snapshotLocalDataManager.getOmSnapshotLocalDataMeta(snapshotInfo)) { + snapshotMetadataManager = new OmMetadataManagerImpl(conf, + snapshotInfo.getCheckpointDirName(snapshotLocalDataProvider.getMeta().getVersion()), + maxOpenSstFilesInSnapshotDb); + } } catch (IOException e) { LOG.error("Failed to retrieve snapshot: {}", snapshotTableKey, e); throw e; @@ -505,14 +509,12 @@ public static DBCheckpoint createOmSnapshotCheckpoint( boolean snapshotDirExist = false; // Create DB checkpoint for snapshot - String checkpointPrefix = store.getDbLocation().getName(); - Path snapshotDirPath = Paths.get(store.getSnapshotsParentDir(), - checkpointPrefix + snapshotInfo.getCheckpointDir()); + Path snapshotDirPath = getSnapshotPath(omMetadataManager, snapshotInfo, 0); if (Files.exists(snapshotDirPath)) { snapshotDirExist = true; dbCheckpoint = new RocksDBCheckpoint(snapshotDirPath); } else { - dbCheckpoint = store.getSnapshot(snapshotInfo.getCheckpointDirName()); + dbCheckpoint = store.getSnapshot(snapshotInfo.getCheckpointDirName(0)); } OmSnapshotManager omSnapshotManager = ((OmMetadataManagerImpl) omMetadataManager).getOzoneManager().getOmSnapshotManager(); @@ -796,27 +798,23 @@ public static String getSnapshotPrefix(String snapshotName) { snapshotName + OM_KEY_PREFIX; } - public static Path getSnapshotPath(OMMetadataManager omMetadataManager, SnapshotInfo snapshotInfo) { - RDBStore store = (RDBStore) omMetadataManager.getStore(); - String checkpointPrefix = store.getDbLocation().getName(); - return Paths.get(store.getSnapshotsParentDir(), - checkpointPrefix + snapshotInfo.getCheckpointDir()); + public static Path getSnapshotPath(OMMetadataManager omMetadataManager, SnapshotInfo snapshotInfo, int version) { + return getSnapshotPath(omMetadataManager, snapshotInfo.getSnapshotId(), version); } - public static Path getSnapshotPath(OMMetadataManager omMetadataManager, UUID snapshotId) { + public static Path getSnapshotPath(OMMetadataManager omMetadataManager, UUID snapshotId, int version) { RDBStore store = (RDBStore) omMetadataManager.getStore(); String checkpointPrefix = store.getDbLocation().getName(); return Paths.get(store.getSnapshotsParentDir(), - checkpointPrefix + SnapshotInfo.getCheckpointDirName(snapshotId)); + checkpointPrefix + SnapshotInfo.getCheckpointDirName(snapshotId, version)); } public static String getSnapshotPath(OzoneConfiguration conf, - SnapshotInfo snapshotInfo) { - return getSnapshotPath(conf, snapshotInfo.getCheckpointDirName()); + SnapshotInfo snapshotInfo, int version) { + return getSnapshotPath(conf, snapshotInfo.getCheckpointDirName(version)); } - public static String getSnapshotPath(OzoneConfiguration conf, - String checkpointDirName) { + private static String getSnapshotPath(OzoneConfiguration conf, String checkpointDirName) { return OMStorage.getOmDbDir(conf) + OM_KEY_PREFIX + OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + OM_DB_NAME + checkpointDirName; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 56e51cf4026e..3cf263e50135 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -3301,12 +3301,24 @@ public List getServiceList() throws IOException { .build()); } - // Since this OM is processing the request, we can assume it to be the - // leader OM - + RaftPeerRole selfRole; + RaftPeerId leaderId = null; + if (omRatisServer == null) { + selfRole = RaftPeerRole.LEADER; + } else { + leaderId = omRatisServer.getLeaderId(); + RaftPeerId selfPeerId = omRatisServer.getRaftPeerId(); + if (leaderId != null && leaderId.equals(selfPeerId)) { + selfRole = RaftPeerRole.LEADER; + } else if (omNodeDetails.isRatisListener()) { + selfRole = RaftPeerRole.LISTENER; + } else { + selfRole = RaftPeerRole.FOLLOWER; + } + } OMRoleInfo omRole = OMRoleInfo.newBuilder() .setNodeId(getOMNodeId()) - .setServerRole(RaftPeerRole.LEADER.name()) + .setServerRole(selfRole.name()) .build(); omServiceInfoBuilder.setOmRoleInfo(omRole); @@ -3330,10 +3342,17 @@ public List getServiceList() throws IOException { .setValue(peerNode.getRpcPort()) .build()); - String role = peerNode.isRatisListener() ? RaftPeerRole.LISTENER.name() : RaftPeerRole.FOLLOWER.name(); + RaftPeerRole roleForPeer; + if (leaderId != null && peerNode.getNodeId().equals(leaderId.toString())) { + roleForPeer = RaftPeerRole.LEADER; + } else if (peerNode.isRatisListener()) { + roleForPeer = RaftPeerRole.LISTENER; + } else { + roleForPeer = RaftPeerRole.FOLLOWER; + } OMRoleInfo peerOmRole = OMRoleInfo.newBuilder() .setNodeId(peerNode.getNodeId()) - .setServerRole(role) + .setServerRole(roleForPeer.name()) .build(); peerOmServiceInfoBuilder.setOmRoleInfo(peerOmRole); @@ -3554,6 +3573,43 @@ public boolean triggerRangerBGSync(boolean noWait) throws IOException { } } + public boolean triggerSnapshotDefrag(boolean noWait) throws IOException { + + // Note: Any OM (leader or follower) can run snapshot defrag + + final UserGroupInformation ugi = getRemoteUser(); + // Check Ozone admin privilege + if (!isAdmin(ugi)) { + throw new OMException("Only Ozone admins are allowed to trigger " + + "snapshot defragmentation manually", PERMISSION_DENIED); + } + + // Get the SnapshotDefragService from KeyManager + final SnapshotDefragService defragService = keyManager.getSnapshotDefragService(); + if (defragService == null) { + throw new OMException("Snapshot defragmentation service is not initialized", + FEATURE_NOT_ENABLED); + } + + // Trigger Snapshot Defragmentation + if (noWait) { + final Thread t = new Thread(() -> { + try { + defragService.triggerSnapshotDefragOnce(); + } catch (Exception e) { + LOG.error("Error during snapshot defragmentation", e); + } + }, threadPrefix + "SnapshotDefragTrigger-" + System.currentTimeMillis()); + t.start(); + LOG.info("User '{}' manually triggered Snapshot Defragmentation without waiting" + + " in a new thread, tid = {}", ugi, t.getId()); + return true; + } else { + LOG.info("User '{}' manually triggered Snapshot Defragmentation and is waiting", ugi); + return defragService.triggerSnapshotDefragOnce(); + } + } + @Override public StatusAndMessages finalizeUpgrade(String upgradeClientID) throws IOException { @@ -4716,7 +4772,7 @@ private void addS3GVolumeToDB() throws IOException { // Add to cache. metadataManager.getVolumeTable().addCacheEntry( new CacheKey<>(dbVolumeKey), - CacheValue.get(transactionID, omVolumeArgs)); + CacheValue.get(DEFAULT_OM_UPDATE_ID, omVolumeArgs)); metadataManager.getUserTable().addCacheEntry( new CacheKey<>(dbUserKey), CacheValue.get(transactionID, userVolumeInfo)); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotDefragService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotDefragService.java index 9747bb7c8942..3eb1bfadf259 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotDefragService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotDefragService.java @@ -23,6 +23,7 @@ import com.google.common.annotations.VisibleForTesting; import java.io.IOException; +import java.nio.file.Path; import java.util.Collections; import java.util.Iterator; import java.util.Optional; @@ -43,6 +44,7 @@ import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; import org.apache.hadoop.ozone.om.snapshot.MultiSnapshotLocks; +import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,6 +80,7 @@ public class SnapshotDefragService extends BackgroundService private final AtomicBoolean running; private final MultiSnapshotLocks snapshotIdLocks; + private final OzoneConfiguration conf; private final BootstrapStateHandler.Lock lock = new BootstrapStateHandler.Lock(); @@ -89,10 +92,11 @@ public SnapshotDefragService(long interval, TimeUnit unit, long serviceTimeout, this.snapshotLimitPerTask = configuration .getLong(SNAPSHOT_DEFRAG_LIMIT_PER_TASK, SNAPSHOT_DEFRAG_LIMIT_PER_TASK_DEFAULT); + this.conf = configuration; snapshotsDefraggedCount = new AtomicLong(0); running = new AtomicBoolean(false); IOzoneManagerLock omLock = ozoneManager.getMetadataManager().getLock(); - this.snapshotIdLocks = new MultiSnapshotLocks(omLock, SNAPSHOT_GC_LOCK, true); + this.snapshotIdLocks = new MultiSnapshotLocks(omLock, SNAPSHOT_GC_LOCK, true, 1); } @Override @@ -127,19 +131,18 @@ private boolean isRocksToolsNativeLibAvailable() { * Checks if a snapshot needs defragmentation by examining its YAML metadata. */ private boolean needsDefragmentation(SnapshotInfo snapshotInfo) { - String snapshotPath = OmSnapshotManager.getSnapshotPath( - ozoneManager.getConfiguration(), snapshotInfo); - - try { + if (!SstFilteringService.isSstFiltered(conf, snapshotInfo)) { + return false; + } + try (OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataProvider readableOmSnapshotLocalDataProvider = + ozoneManager.getOmSnapshotManager().getSnapshotLocalDataManager().getOmSnapshotLocalData(snapshotInfo)) { + Path snapshotPath = OmSnapshotManager.getSnapshotPath( + ozoneManager.getMetadataManager(), snapshotInfo, + readableOmSnapshotLocalDataProvider.getSnapshotLocalData().getVersion()); // Read snapshot local metadata from YAML - OmSnapshotLocalData snapshotLocalData = ozoneManager.getOmSnapshotManager() - .getSnapshotLocalDataManager() - .getOmSnapshotLocalData(snapshotInfo); - // Check if snapshot needs compaction (defragmentation) - boolean needsDefrag = snapshotLocalData.getNeedsDefrag(); - LOG.debug("Snapshot {} needsDefragmentation field value: {}", - snapshotInfo.getName(), needsDefrag); + boolean needsDefrag = readableOmSnapshotLocalDataProvider.needsDefrag(); + LOG.debug("Snapshot {} needsDefragmentation field value: {}", snapshotInfo.getName(), needsDefrag); return needsDefrag; } catch (IOException e) { @@ -175,10 +178,6 @@ private final class SnapshotDefragTask implements BackgroundTask { public BackgroundTaskResult call() throws Exception { // Check OM leader and readiness if (shouldRun()) { - final long count = runCount.incrementAndGet(); - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating Snapshot Defragmentation Task: run # {}", count); - } triggerSnapshotDefragOnce(); } @@ -187,6 +186,12 @@ public BackgroundTaskResult call() throws Exception { } public synchronized boolean triggerSnapshotDefragOnce() throws IOException { + + final long count = runCount.incrementAndGet(); + if (LOG.isDebugEnabled()) { + LOG.debug("Initiating Snapshot Defragmentation Task: run # {}", count); + } + // Check if rocks-tools native lib is available if (!isRocksToolsNativeLibAvailable()) { LOG.warn("Rocks-tools native library is not available. " + diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SstFilteringService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SstFilteringService.java index 522ea7df6de5..4b5002eb6c4a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SstFilteringService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SstFilteringService.java @@ -87,7 +87,7 @@ public class SstFilteringService extends BackgroundService public static boolean isSstFiltered(OzoneConfiguration ozoneConfiguration, SnapshotInfo snapshotInfo) { Path sstFilteredFile = Paths.get(OmSnapshotManager.getSnapshotPath(ozoneConfiguration, - snapshotInfo), SST_FILTERED_FILE); + snapshotInfo, 0), SST_FILTERED_FILE); return snapshotInfo.isSstFiltered() || sstFilteredFile.toFile().exists(); } @@ -138,7 +138,8 @@ private void markSSTFilteredFlagForSnapshot(SnapshotInfo snapshotInfo) throws IO .acquireReadLock(SNAPSHOT_DB_LOCK, snapshotInfo.getSnapshotId().toString()); boolean acquiredSnapshotLock = omLockDetails.isLockAcquired(); if (acquiredSnapshotLock) { - String snapshotDir = OmSnapshotManager.getSnapshotPath(ozoneManager.getConfiguration(), snapshotInfo); + // Ensure snapshot is sstFiltered before defrag. + String snapshotDir = OmSnapshotManager.getSnapshotPath(ozoneManager.getConfiguration(), snapshotInfo, 0); try { // mark the snapshot as filtered by creating a file. if (Files.exists(Paths.get(snapshotDir))) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeySetTimesRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeySetTimesRequest.java index 353a17757025..e137847eb396 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeySetTimesRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeySetTimesRequest.java @@ -78,6 +78,27 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { OzoneManagerProtocolProtos.KeyArgs newKeyArgs = resolveBucketLink(ozoneManager, keyArgs); + // ACL check during preExecute + if (ozoneManager.getAclsEnabled()) { + try { + checkAcls(ozoneManager, OzoneObj.ResourceType.KEY, + OzoneObj.StoreType.OZONE, IAccessAuthorizer.ACLType.WRITE_ACL, + newKeyArgs.getVolumeName(), newKeyArgs.getBucketName(), newKeyArgs.getKeyName()); + } catch (IOException ex) { + // Ensure audit log captures preExecute failures + Map auditMap = new LinkedHashMap<>(); + auditMap.put(OzoneConsts.VOLUME, newKeyArgs.getVolumeName()); + auditMap.put(OzoneConsts.BUCKET, newKeyArgs.getBucketName()); + auditMap.put(OzoneConsts.KEY, newKeyArgs.getKeyName()); + auditMap.put(OzoneConsts.MODIFICATION_TIME, + String.valueOf(getModificationTime())); + markForAudit(ozoneManager.getAuditLogger(), + buildAuditMessage(OMAction.SET_TIMES, auditMap, ex, + getOmRequest().getUserInfo())); + throw ex; + } + } + return request.toBuilder() .setSetTimesRequest( setTimesRequest.toBuilder() @@ -194,12 +215,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut bucket = getBucketName(); key = getKeyName(); - // check Acl - if (ozoneManager.getAclsEnabled()) { - checkAcls(ozoneManager, OzoneObj.ResourceType.KEY, - OzoneObj.StoreType.OZONE, IAccessAuthorizer.ACLType.WRITE_ACL, - volume, bucket, key); - } mergeOmLockDetails( omMetadataManager.getLock().acquireWriteLock(BUCKET_LOCK, volume, bucket)); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeySetTimesRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeySetTimesRequestWithFSO.java index 009bcd1662c1..6cc68b6f718a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeySetTimesRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeySetTimesRequestWithFSO.java @@ -42,8 +42,6 @@ import org.apache.hadoop.ozone.om.response.key.OMKeySetTimesResponseWithFSO; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; -import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer; -import org.apache.hadoop.ozone.security.acl.OzoneObj; /** * Handle set times request for bucket for prefix layout. @@ -53,6 +51,7 @@ public class OMKeySetTimesRequestWithFSO extends OMKeySetTimesRequest { @Override public OzoneManagerProtocolProtos.OMRequest preExecute( OzoneManager ozoneManager) throws IOException { + // The parent class handles ACL checks in preExecute, so just call super return super.preExecute(ozoneManager); } @@ -82,12 +81,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut bucket = getBucketName(); key = getKeyName(); - // check Acl - if (ozoneManager.getAclsEnabled()) { - checkAcls(ozoneManager, OzoneObj.ResourceType.KEY, - OzoneObj.StoreType.OZONE, IAccessAuthorizer.ACLType.WRITE_ACL, - volume, bucket, key); - } mergeOmLockDetails(omMetadataManager.getLock() .acquireWriteLock(BUCKET_LOCK, volume, bucket)); lockAcquired = getOmLockDetails().isLockAcquired(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java index 5524371bf1e2..a1a1d306c238 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java @@ -91,6 +91,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut List snapshotDbKeys = snapshotPurgeRequest .getSnapshotDBKeysList(); + TransactionInfo transactionInfo = TransactionInfo.valueOf(context.getTermIndex()); try { // Each snapshot purge operation does three things: @@ -123,12 +124,13 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut } // Update the snapshotInfo lastTransactionInfo. for (SnapshotInfo snapshotInfo : updatedSnapshotInfos.values()) { - snapshotInfo.setLastTransactionInfo(TransactionInfo.valueOf(context.getTermIndex()).toByteString()); + snapshotInfo.setLastTransactionInfo(transactionInfo.toByteString()); omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(snapshotInfo.getTableKey()), CacheValue.get(context.getIndex(), snapshotInfo)); } - omClientResponse = new OMSnapshotPurgeResponse(omResponse.build(), snapshotDbKeys, updatedSnapshotInfos); + omClientResponse = new OMSnapshotPurgeResponse(omResponse.build(), snapshotDbKeys, updatedSnapshotInfos, + transactionInfo); omSnapshotIntMetrics.incNumSnapshotPurges(); LOG.info("Successfully executed snapshotPurgeRequest: {{}} along with updating snapshots:{}.", diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java index 1cf078bca0a8..1beddd253130 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java @@ -22,12 +22,14 @@ import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DIRECTORY_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.FILE_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.SNAPSHOT_INFO_TABLE; +import static org.apache.hadoop.ozone.om.lock.FlatResource.SNAPSHOT_DB_CONTENT_LOCK; import com.google.common.annotations.VisibleForTesting; import jakarta.annotation.Nonnull; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.DBStore; @@ -36,11 +38,14 @@ import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; +import org.apache.hadoop.ozone.om.lock.OMLockDetails; import org.apache.hadoop.ozone.om.request.key.OMDirectoriesPurgeRequestWithFSO; import org.apache.hadoop.ozone.om.response.CleanupTableInfo; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; @@ -86,9 +91,15 @@ public void addToDBBatch(OMMetadataManager metadataManager, OmSnapshotManager omSnapshotManager = ((OmMetadataManagerImpl) metadataManager) .getOzoneManager().getOmSnapshotManager(); - + IOzoneManagerLock lock = metadataManager.getLock(); + UUID fromSnapshotId = fromSnapshotInfo.getSnapshotId(); + OMLockDetails lockDetails = lock.acquireReadLock(SNAPSHOT_DB_CONTENT_LOCK, fromSnapshotId.toString()); + if (!lockDetails.isLockAcquired()) { + throw new OMException("Unable to acquire read lock on " + SNAPSHOT_DB_CONTENT_LOCK + " for snapshot: " + + fromSnapshotId, OMException.ResultCodes.INTERNAL_ERROR); + } try (UncheckedAutoCloseableSupplier - rcFromSnapshotInfo = omSnapshotManager.getSnapshot(fromSnapshotInfo.getSnapshotId())) { + rcFromSnapshotInfo = omSnapshotManager.getSnapshot(fromSnapshotId)) { OmSnapshot fromSnapshot = rcFromSnapshotInfo.get(); DBStore fromSnapshotStore = fromSnapshot.getMetadataManager() .getStore(); @@ -98,6 +109,8 @@ public void addToDBBatch(OMMetadataManager metadataManager, processPaths(metadataManager, fromSnapshot.getMetadataManager(), batchOp, writeBatch); fromSnapshotStore.commitBatchOperation(writeBatch); } + } finally { + lock.releaseReadLock(SNAPSHOT_DB_CONTENT_LOCK, fromSnapshotId.toString()); } metadataManager.getSnapshotInfoTable().putWithBatch(batchOp, fromSnapshotInfo.getTableKey(), fromSnapshotInfo); } else { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyPurgeResponse.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyPurgeResponse.java index 38ce0a6266c2..b9ba768f6cb6 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyPurgeResponse.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyPurgeResponse.java @@ -19,21 +19,26 @@ import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DELETED_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.SNAPSHOT_INFO_TABLE; +import static org.apache.hadoop.ozone.om.lock.FlatResource.SNAPSHOT_DB_CONTENT_LOCK; import static org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotMoveDeletedKeysResponse.createRepeatedOmKeyInfo; import jakarta.annotation.Nonnull; import java.io.IOException; import java.util.Collections; import java.util.List; +import java.util.UUID; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; +import org.apache.hadoop.ozone.om.lock.OMLockDetails; import org.apache.hadoop.ozone.om.request.key.OMKeyPurgeRequest; import org.apache.hadoop.ozone.om.response.CleanupTableInfo; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.KeyInfo; @@ -82,10 +87,15 @@ public void addToDBBatch(OMMetadataManager omMetadataManager, if (fromSnapshot != null) { OmSnapshotManager omSnapshotManager = ((OmMetadataManagerImpl) omMetadataManager).getOzoneManager().getOmSnapshotManager(); - + IOzoneManagerLock lock = omMetadataManager.getLock(); + UUID fromSnapshotId = fromSnapshot.getSnapshotId(); + OMLockDetails lockDetails = lock.acquireReadLock(SNAPSHOT_DB_CONTENT_LOCK, fromSnapshotId.toString()); + if (!lockDetails.isLockAcquired()) { + throw new OMException("Unable to acquire read lock on " + SNAPSHOT_DB_CONTENT_LOCK + " for snapshot: " + + fromSnapshotId, OMException.ResultCodes.INTERNAL_ERROR); + } try (UncheckedAutoCloseableSupplier rcOmFromSnapshot = - omSnapshotManager.getSnapshot(fromSnapshot.getSnapshotId())) { - + omSnapshotManager.getSnapshot(fromSnapshotId)) { OmSnapshot fromOmSnapshot = rcOmFromSnapshot.get(); DBStore fromSnapshotStore = fromOmSnapshot.getMetadataManager().getStore(); // Init Batch Operation for snapshot db. @@ -95,6 +105,8 @@ public void addToDBBatch(OMMetadataManager omMetadataManager, processKeysToUpdate(writeBatch, fromOmSnapshot.getMetadataManager()); fromSnapshotStore.commitBatchOperation(writeBatch); } + } finally { + lock.releaseReadLock(SNAPSHOT_DB_CONTENT_LOCK, fromSnapshotId.toString()); } omMetadataManager.getSnapshotInfoTable().putWithBatch(batchOperation, fromSnapshot.getTableKey(), fromSnapshot); } else { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotMoveTableKeysResponse.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotMoveTableKeysResponse.java index 3c40bafd0b06..1d85ca0f22a2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotMoveTableKeysResponse.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotMoveTableKeysResponse.java @@ -18,8 +18,10 @@ package org.apache.hadoop.ozone.om.response.snapshot; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.SNAPSHOT_INFO_TABLE; +import static org.apache.hadoop.ozone.om.lock.FlatResource.SNAPSHOT_DB_CONTENT_LOCK; import static org.apache.hadoop.ozone.om.snapshot.SnapshotUtils.createMergedRepeatedOmKeyInfoFromDeletedTableEntry; +import com.google.common.collect.Lists; import jakarta.annotation.Nonnull; import java.io.IOException; import java.util.List; @@ -30,9 +32,12 @@ import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; +import org.apache.hadoop.ozone.om.lock.OMLockDetails; import org.apache.hadoop.ozone.om.response.CleanupTableInfo; import org.apache.hadoop.ozone.om.response.OMClientResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; @@ -80,7 +85,15 @@ public OMSnapshotMoveTableKeysResponse(@Nonnull OMResponse omResponse) { protected void addToDBBatch(OMMetadataManager omMetadataManager, BatchOperation batchOperation) throws IOException { OmSnapshotManager omSnapshotManager = ((OmMetadataManagerImpl) omMetadataManager) .getOzoneManager().getOmSnapshotManager(); - + IOzoneManagerLock lock = omMetadataManager.getLock(); + String[] fromSnapshotId = new String[] {fromSnapshot.getSnapshotId().toString()}; + String[] nextSnapshotId = nextSnapshot == null ? null : new String[] {nextSnapshot.getSnapshotId().toString()}; + List snapshotIds = Lists.newArrayList(fromSnapshotId, nextSnapshotId); + OMLockDetails lockDetails = lock.acquireReadLocks(SNAPSHOT_DB_CONTENT_LOCK, snapshotIds); + if (!lockDetails.isLockAcquired()) { + throw new OMException("Unable to acquire read lock on " + SNAPSHOT_DB_CONTENT_LOCK + " for snapshot: " + + snapshotIds, OMException.ResultCodes.INTERNAL_ERROR); + } try (UncheckedAutoCloseableSupplier rcOmFromSnapshot = omSnapshotManager.getSnapshot(fromSnapshot.getSnapshotId())) { @@ -113,6 +126,8 @@ protected void addToDBBatch(OMMetadataManager omMetadataManager, BatchOperation fromSnapshotStore.getDb().flushWal(true); fromSnapshotStore.getDb().flush(); } + } finally { + lock.releaseReadLocks(SNAPSHOT_DB_CONTENT_LOCK, snapshotIds); } // Flush snapshot info to rocksDB. diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java index 3797b3fcf2eb..3bc8a8dc27bf 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; @@ -36,6 +37,7 @@ import org.apache.hadoop.ozone.om.response.CleanupTableInfo; import org.apache.hadoop.ozone.om.response.OMClientResponse; import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager; +import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager.WritableOmSnapshotLocalDataProvider; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,15 +51,18 @@ public class OMSnapshotPurgeResponse extends OMClientResponse { LoggerFactory.getLogger(OMSnapshotPurgeResponse.class); private final List snapshotDbKeys; private final Map updatedSnapInfos; + private final TransactionInfo transactionInfo; public OMSnapshotPurgeResponse( @Nonnull OMResponse omResponse, @Nonnull List snapshotDbKeys, - Map updatedSnapInfos + Map updatedSnapInfos, + TransactionInfo transactionInfo ) { super(omResponse); this.snapshotDbKeys = snapshotDbKeys; this.updatedSnapInfos = updatedSnapInfos; + this.transactionInfo = transactionInfo; } /** @@ -69,6 +74,7 @@ public OMSnapshotPurgeResponse(@Nonnull OMResponse omResponse) { checkStatusNotOK(); this.snapshotDbKeys = null; this.updatedSnapInfos = null; + this.transactionInfo = null; } @Override @@ -96,9 +102,13 @@ protected void addToDBBatch(OMMetadataManager omMetadataManager, // Remove the snapshot from snapshotId to snapshotTableKey map. ((OmMetadataManagerImpl) omMetadataManager).getSnapshotChainManager() .removeFromSnapshotIdToTable(snapshotInfo.getSnapshotId()); - // Delete Snapshot checkpoint directory. + OmSnapshotLocalDataManager snapshotLocalDataManager = ((OmMetadataManagerImpl) omMetadataManager) .getOzoneManager().getOmSnapshotManager().getSnapshotLocalDataManager(); + // Update snapshot local data to update purge transaction info. This would be used to check whether the + // snapshot purged txn is flushed to rocksdb. + updateLocalData(snapshotLocalDataManager, snapshotInfo); + // Delete Snapshot checkpoint directory. deleteCheckpointDirectory(snapshotLocalDataManager, omMetadataManager, snapshotInfo); // Delete snapshotInfo from the table. omMetadataManager.getSnapshotInfoTable().deleteWithBatch(batchOperation, dbKey); @@ -115,11 +125,19 @@ private void updateSnapInfo(OmMetadataManagerImpl metadataManager, } } + private void updateLocalData(OmSnapshotLocalDataManager localDataManager, SnapshotInfo snapshotInfo) + throws IOException { + try (WritableOmSnapshotLocalDataProvider snap = localDataManager.getWritableOmSnapshotLocalData(snapshotInfo)) { + snap.setTransactionInfo(this.transactionInfo); + snap.commit(); + } + } + /** * Deletes the checkpoint directory for a snapshot. */ private void deleteCheckpointDirectory(OmSnapshotLocalDataManager snapshotLocalDataManager, - OMMetadataManager omMetadataManager, SnapshotInfo snapshotInfo) { + OMMetadataManager omMetadataManager, SnapshotInfo snapshotInfo) throws IOException { // Acquiring write lock to avoid race condition with sst filtering service which creates a sst filtered file // inside the snapshot directory. Any operation apart which doesn't create/delete files under this snapshot // directory can run in parallel along with this operation. @@ -127,14 +145,18 @@ private void deleteCheckpointDirectory(OmSnapshotLocalDataManager snapshotLocalD .acquireWriteLock(SNAPSHOT_DB_LOCK, snapshotInfo.getSnapshotId().toString()); boolean acquiredSnapshotLock = omLockDetails.isLockAcquired(); if (acquiredSnapshotLock) { - Path snapshotDirPath = OmSnapshotManager.getSnapshotPath(omMetadataManager, snapshotInfo); - try { - FileUtils.deleteDirectory(snapshotDirPath.toFile()); - } catch (IOException ex) { - LOG.error("Failed to delete snapshot directory {} for snapshot {}", - snapshotDirPath, snapshotInfo.getTableKey(), ex); - } finally { - omMetadataManager.getLock().releaseWriteLock(SNAPSHOT_DB_LOCK, snapshotInfo.getSnapshotId().toString()); + try (OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataMetaProvider snapMetaProvider = + snapshotLocalDataManager.getOmSnapshotLocalDataMeta(snapshotInfo)) { + Path snapshotDirPath = OmSnapshotManager.getSnapshotPath(omMetadataManager, snapshotInfo, + snapMetaProvider.getMeta().getVersion()); + try { + FileUtils.deleteDirectory(snapshotDirPath.toFile()); + } catch (IOException ex) { + LOG.error("Failed to delete snapshot directory {} for snapshot {}", + snapshotDirPath, snapshotInfo.getTableKey(), ex); + } finally { + omMetadataManager.getLock().releaseWriteLock(SNAPSHOT_DB_LOCK, snapshotInfo.getSnapshotId().toString()); + } } } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index db44337ee411..ab40a0530fce 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -117,7 +117,7 @@ public SnapshotDeletingService(long interval, long serviceTimeout, OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK, OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT); IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); - this.snapshotIdLocks = new MultiSnapshotLocks(lock, SNAPSHOT_GC_LOCK, true); + this.snapshotIdLocks = new MultiSnapshotLocks(lock, SNAPSHOT_GC_LOCK, true, 2); this.lockIds = new ArrayList<>(2); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/MultiSnapshotLocks.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/MultiSnapshotLocks.java index 525877306965..bb8161f0faeb 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/MultiSnapshotLocks.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/MultiSnapshotLocks.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.om.snapshot; +import com.google.common.annotations.VisibleForTesting; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -39,11 +40,16 @@ public class MultiSnapshotLocks { private final boolean writeLock; private OMLockDetails lockDetails; + @VisibleForTesting public MultiSnapshotLocks(IOzoneManagerLock lock, Resource resource, boolean writeLock) { + this(lock, resource, writeLock, 0); + } + + public MultiSnapshotLocks(IOzoneManagerLock lock, Resource resource, boolean writeLock, int maxNumberOfLocks) { this.writeLock = writeLock; this.resource = resource; this.lock = lock; - this.objectLocks = new ArrayList<>(); + this.objectLocks = new ArrayList<>(maxNumberOfLocks); this.lockDetails = OMLockDetails.EMPTY_DETAILS_LOCK_NOT_ACQUIRED; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotLocalDataManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotLocalDataManager.java index 3c529abaf3c8..33caddc92327 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotLocalDataManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotLocalDataManager.java @@ -27,16 +27,25 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.Stack; import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmSnapshotLocalData; @@ -44,8 +53,15 @@ import org.apache.hadoop.ozone.om.OmSnapshotLocalDataYaml; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.FlatResource; +import org.apache.hadoop.ozone.om.lock.HierarchicalResourceLockManager; +import org.apache.hadoop.ozone.om.lock.HierarchicalResourceLockManager.HierarchicalResourceLock; +import org.apache.hadoop.ozone.om.lock.OMLockDetails; import org.apache.hadoop.ozone.util.ObjectSerializer; import org.apache.hadoop.ozone.util.YamlSerializer; +import org.apache.ratis.util.function.CheckedSupplier; +import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; +import org.rocksdb.LiveFileMetaData; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.yaml.snakeyaml.Yaml; @@ -59,9 +75,32 @@ public class OmSnapshotLocalDataManager implements AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(OmSnapshotLocalDataManager.class); private final ObjectSerializer snapshotLocalDataSerializer; + // In-memory DAG of snapshot-version dependencies. Each node represents a + // specific (snapshotId, version) pair, and a directed edge points to the + // corresponding (previousSnapshotId, previousSnapshotVersion) it depends on. + // The durable state is stored in each snapshot's YAML (previousSnapshotId and + // VersionMeta.previousSnapshotVersion). This graph mirrors that persisted + // structure to validate adds/removes and to resolve versions across chains. + // This graph is maintained only in memory and is not persisted to disk. + // Example (linear chain, arrows point to previous): + // (S0, v1) <- (S1, v4) <- (S2, v5) <- (S3, v7) + // where each node is (snapshotId, version) and each arrow points to its + // corresponding (previousSnapshotId, previousSnapshotVersion) dependency. + // + // Example (multiple versions for a single snapshotId S2): + // (S1, v4) <- (S2, v6) <- (S3, v8) + // (S1, v3) <- (S2, v5) + // Here S2 has two distinct versions (v6 and v5), each represented as its own + // node, and each version can depend on a different previousSnapshotVersion on S1. private final MutableGraph localDataGraph; private final Map versionNodeMap; private final OMMetadataManager omMetadataManager; + // Used for acquiring locks on the entire data structure. + private final ReadWriteLock fullLock; + // Used for taking a lock on internal data structure Map and Graph to ensure thread safety; + private final ReadWriteLock internalLock; + // Locks should be always acquired by iterating through the snapshot chain to avoid deadlocks. + private HierarchicalResourceLockManager locks; public OmSnapshotLocalDataManager(OMMetadataManager omMetadataManager) throws IOException { this.localDataGraph = GraphBuilder.directed().build(); @@ -74,7 +113,9 @@ public void computeAndSetChecksum(Yaml yaml, OmSnapshotLocalData data) throws IO data.computeAndSetChecksum(yaml); } }; - this.versionNodeMap = new HashMap<>(); + this.versionNodeMap = new ConcurrentHashMap<>(); + this.fullLock = new ReentrantReadWriteLock(); + this.internalLock = new ReentrantReadWriteLock(); init(); } @@ -99,12 +140,14 @@ public static String getSnapshotLocalPropertyYamlPath(Path snapshotPath) { * @param snapshotInfo snapshot metadata * @return the path to the snapshot's local property YAML file */ + @VisibleForTesting public String getSnapshotLocalPropertyYamlPath(SnapshotInfo snapshotInfo) { return getSnapshotLocalPropertyYamlPath(snapshotInfo.getSnapshotId()); } + @VisibleForTesting public String getSnapshotLocalPropertyYamlPath(UUID snapshotId) { - Path snapshotPath = OmSnapshotManager.getSnapshotPath(omMetadataManager, snapshotId); + Path snapshotPath = OmSnapshotManager.getSnapshotPath(omMetadataManager, snapshotId, 0); return getSnapshotLocalPropertyYamlPath(snapshotPath); } @@ -114,34 +157,58 @@ public String getSnapshotLocalPropertyYamlPath(UUID snapshotId) { * @param snapshotInfo snapshot info instance corresponding to snapshot. */ public void createNewOmSnapshotLocalDataFile(RDBStore snapshotStore, SnapshotInfo snapshotInfo) throws IOException { - Path snapshotLocalDataPath = Paths.get( - getSnapshotLocalPropertyYamlPath(snapshotStore.getDbLocation().toPath())); - Files.deleteIfExists(snapshotLocalDataPath); - OmSnapshotLocalData snapshotLocalDataYaml = new OmSnapshotLocalData(snapshotInfo.getSnapshotId(), - OmSnapshotManager.getSnapshotSSTFileList(snapshotStore), snapshotInfo.getPathPreviousSnapshotId()); - snapshotLocalDataSerializer.save(snapshotLocalDataPath.toFile(), snapshotLocalDataYaml); + try (WritableOmSnapshotLocalDataProvider snapshotLocalData = + new WritableOmSnapshotLocalDataProvider(snapshotInfo.getSnapshotId(), + () -> Pair.of(new OmSnapshotLocalData(snapshotInfo.getSnapshotId(), + OmSnapshotManager.getSnapshotSSTFileList(snapshotStore), + snapshotInfo.getPathPreviousSnapshotId(), null), + null))) { + snapshotLocalData.commit(); + } } - public OmSnapshotLocalData getOmSnapshotLocalData(SnapshotInfo snapshotInfo) throws IOException { + public ReadableOmSnapshotLocalDataMetaProvider getOmSnapshotLocalDataMeta(SnapshotInfo snapInfo) throws IOException { + return getOmSnapshotLocalDataMeta(snapInfo.getSnapshotId()); + } + + public ReadableOmSnapshotLocalDataMetaProvider getOmSnapshotLocalDataMeta(UUID snapshotId) throws IOException { + return new ReadableOmSnapshotLocalDataMetaProvider(snapshotId); + } + + public ReadableOmSnapshotLocalDataProvider getOmSnapshotLocalData(SnapshotInfo snapshotInfo) throws IOException { return getOmSnapshotLocalData(snapshotInfo.getSnapshotId()); } - public OmSnapshotLocalData getOmSnapshotLocalData(UUID snapshotId) throws IOException { - Path snapshotLocalDataPath = Paths.get(getSnapshotLocalPropertyYamlPath(snapshotId)); - OmSnapshotLocalData snapshotLocalData = snapshotLocalDataSerializer.load(snapshotLocalDataPath.toFile()); - if (!Objects.equals(snapshotLocalData.getSnapshotId(), snapshotId)) { - throw new IOException("SnapshotId in path : " + snapshotLocalDataPath + " contains snapshotLocalData " + - "corresponding to snapshotId " + snapshotLocalData.getSnapshotId() + ". Expected snapshotId " + snapshotId); - } - return snapshotLocalData; + public ReadableOmSnapshotLocalDataProvider getOmSnapshotLocalData(UUID snapshotId) throws IOException { + return new ReadableOmSnapshotLocalDataProvider(snapshotId); + } + + public ReadableOmSnapshotLocalDataProvider getOmSnapshotLocalData(UUID snapshotId, UUID previousSnapshotID) + throws IOException { + return new ReadableOmSnapshotLocalDataProvider(snapshotId, previousSnapshotID); } - public OmSnapshotLocalData getOmSnapshotLocalData(File snapshotDataPath) throws IOException { + public WritableOmSnapshotLocalDataProvider getWritableOmSnapshotLocalData(SnapshotInfo snapshotInfo) + throws IOException { + return getWritableOmSnapshotLocalData(snapshotInfo.getSnapshotId(), snapshotInfo.getPathPreviousSnapshotId()); + } + + public WritableOmSnapshotLocalDataProvider getWritableOmSnapshotLocalData(UUID snapshotId, UUID previousSnapshotId) + throws IOException { + return new WritableOmSnapshotLocalDataProvider(snapshotId, previousSnapshotId); + } + + public WritableOmSnapshotLocalDataProvider getWritableOmSnapshotLocalData(UUID snapshotId) + throws IOException { + return new WritableOmSnapshotLocalDataProvider(snapshotId); + } + + OmSnapshotLocalData getOmSnapshotLocalData(File snapshotDataPath) throws IOException { return snapshotLocalDataSerializer.load(snapshotDataPath); } private LocalDataVersionNode getVersionNode(UUID snapshotId, int version) { - if (!versionNodeMap.containsKey(snapshotId)) { + if (snapshotId == null || !versionNodeMap.containsKey(snapshotId)) { return null; } return versionNodeMap.get(snapshotId).getVersionNode(version); @@ -151,15 +218,9 @@ private void addSnapshotVersionMeta(UUID snapshotId, SnapshotVersionsMeta snapsh throws IOException { if (!versionNodeMap.containsKey(snapshotId)) { for (LocalDataVersionNode versionNode : snapshotVersionsMeta.getSnapshotVersions().values()) { - if (getVersionNode(versionNode.snapshotId, versionNode.version) != null) { - throw new IOException("Unable to add " + versionNode + " since it already exists"); - } - LocalDataVersionNode previousVersionNode = versionNode.previousSnapshotId == null ? null : + validateVersionAddition(versionNode); + LocalDataVersionNode previousVersionNode = getVersionNode(versionNode.previousSnapshotId, versionNode.previousSnapshotVersion); - if (versionNode.previousSnapshotId != null && previousVersionNode == null) { - throw new IOException("Unable to add " + versionNode + " since previous snapshot with version hasn't been " + - "loaded"); - } localDataGraph.addNode(versionNode); if (previousVersionNode != null) { localDataGraph.putEdge(versionNode, previousVersionNode); @@ -186,7 +247,13 @@ void addVersionNodeWithDependents(OmSnapshotLocalData snapshotLocalData) throws } else { UUID prevSnapId = snapshotVersionsMeta.getPreviousSnapshotId(); if (prevSnapId != null && !versionNodeMap.containsKey(prevSnapId)) { - OmSnapshotLocalData prevSnapshotLocalData = getOmSnapshotLocalData(prevSnapId); + File previousSnapshotLocalDataFile = new File(getSnapshotLocalPropertyYamlPath(prevSnapId)); + OmSnapshotLocalData prevSnapshotLocalData = snapshotLocalDataSerializer.load(previousSnapshotLocalDataFile); + if (!prevSnapId.equals(prevSnapshotLocalData.getSnapshotId())) { + throw new IOException("SnapshotId mismatch: expected " + prevSnapId + + " but found " + prevSnapshotLocalData.getSnapshotId() + + " in file " + previousSnapshotLocalDataFile.getAbsolutePath()); + } stack.push(Pair.of(prevSnapshotLocalData.getSnapshotId(), new SnapshotVersionsMeta(prevSnapshotLocalData))); } visitedSnapshotIds.add(snapId); @@ -195,6 +262,7 @@ void addVersionNodeWithDependents(OmSnapshotLocalData snapshotLocalData) throws } private void init() throws IOException { + this.locks = omMetadataManager.getHierarchicalLockManager(); RDBStore store = (RDBStore) omMetadataManager.getStore(); String checkpointPrefix = store.getDbLocation().getName(); File snapshotDir = new File(store.getSnapshotsParentDir()); @@ -217,6 +285,46 @@ private void init() throws IOException { } } + /** + * Acquires a write lock and provides an auto-closeable supplier for specifying details + * of the lock acquisition. The lock is released when the returned supplier is closed. + * + * @return an instance of {@code UncheckedAutoCloseableSupplier} representing + * the acquired lock details, where the lock will automatically be released on close. + */ + public UncheckedAutoCloseableSupplier lock() { + this.fullLock.writeLock().lock(); + return new UncheckedAutoCloseableSupplier() { + @Override + public OMLockDetails get() { + return OMLockDetails.EMPTY_DETAILS_LOCK_ACQUIRED; + } + + @Override + public void close() { + fullLock.writeLock().unlock(); + } + }; + } + + private void validateVersionRemoval(UUID snapshotId, int version) throws IOException { + LocalDataVersionNode versionNode = getVersionNode(snapshotId, version); + if (versionNode != null && localDataGraph.inDegree(versionNode) != 0) { + Set versionNodes = localDataGraph.predecessors(versionNode); + throw new IOException(String.format("Cannot remove Snapshot %s with version : %d since it still has " + + "predecessors : %s", snapshotId, version, versionNodes)); + } + } + + private void validateVersionAddition(LocalDataVersionNode versionNode) throws IOException { + LocalDataVersionNode previousVersionNode = getVersionNode(versionNode.previousSnapshotId, + versionNode.previousSnapshotVersion); + if (versionNode.previousSnapshotId != null && previousVersionNode == null) { + throw new IOException("Unable to add " + versionNode + " since previous snapshot with version hasn't been " + + "loaded"); + } + } + @Override public void close() { if (snapshotLocalDataSerializer != null) { @@ -228,6 +336,465 @@ public void close() { } } + private HierarchicalResourceLock acquireLock(UUID snapId, boolean readLock) throws IOException { + HierarchicalResourceLock acquiredLock = readLock ? locks.acquireReadLock(FlatResource.SNAPSHOT_LOCAL_DATA_LOCK, + snapId.toString()) : locks.acquireWriteLock(FlatResource.SNAPSHOT_LOCAL_DATA_LOCK, snapId.toString()); + if (!acquiredLock.isLockAcquired()) { + throw new IOException("Unable to acquire lock for snapshotId: " + snapId); + } + return acquiredLock; + } + + private static final class LockDataProviderInitResult { + private final OmSnapshotLocalData snapshotLocalData; + private final HierarchicalResourceLock lock; + private final HierarchicalResourceLock previousLock; + private final UUID previousSnapshotId; + + private LockDataProviderInitResult(HierarchicalResourceLock lock, OmSnapshotLocalData snapshotLocalData, + HierarchicalResourceLock previousLock, UUID previousSnapshotId) { + this.lock = lock; + this.snapshotLocalData = snapshotLocalData; + this.previousLock = previousLock; + this.previousSnapshotId = previousSnapshotId; + } + + private HierarchicalResourceLock getLock() { + return lock; + } + + private HierarchicalResourceLock getPreviousLock() { + return previousLock; + } + + private UUID getPreviousSnapshotId() { + return previousSnapshotId; + } + + private OmSnapshotLocalData getSnapshotLocalData() { + return snapshotLocalData; + } + } + + /** + * Provides LocalData's metadata stored in memory for a snapshot after acquiring a read lock on this. + */ + public final class ReadableOmSnapshotLocalDataMetaProvider implements AutoCloseable { + private final SnapshotVersionsMeta meta; + private final HierarchicalResourceLock lock; + private boolean closed; + + private ReadableOmSnapshotLocalDataMetaProvider(UUID snapshotId) throws IOException { + this.lock = acquireLock(snapshotId, true); + this.meta = versionNodeMap.get(snapshotId); + this.closed = false; + } + + public synchronized SnapshotVersionsMeta getMeta() throws IOException { + if (closed) { + throw new IOException("Resource has already been closed."); + } + return meta; + } + + @Override + public synchronized void close() throws IOException { + closed = true; + lock.close(); + } + } + + /** + * The ReadableOmSnapshotLocalDataProvider class is responsible for managing the + * access and initialization of local snapshot data in a thread-safe manner. + * It provides mechanisms to handle snapshot data, retrieve associated previous + * snapshot data, and manage lock synchronization for safe concurrent operations. + * + * This class works with snapshot identifiers and ensures that the appropriate + * local data for a given snapshot is loaded and accessible. Additionally, it + * maintains locking mechanisms to ensure thread-safe initialization and access + * to both the current and previous snapshot local data. The implementation also + * supports handling errors in the snapshot data initialization process. + * + * Key Functionalities: + * - Initializes and provides access to snapshot local data associated with a + * given snapshot identifier. + * - Resolves and retrieves data for the previous snapshot if applicable. + * - Ensures safe concurrent read operations using locking mechanisms. + * - Validates the integrity and consistency of snapshot data during initialization. + * - Ensures that appropriate locks are released upon closing. + * + * Thread-Safety: + * This class utilizes locks to guarantee thread-safe operations when accessing + * or modifying snapshot data. State variables relating to snapshot data are + * properly synchronized to ensure consistency during concurrent operations. + * + * Usage Considerations: + * - Ensure proper handling of exceptions while interacting with this class, + * particularly during initialization and cleanup. + * - Always invoke the {@code close()} method after usage to release acquired locks + * and avoid potential deadlocks. + */ + public class ReadableOmSnapshotLocalDataProvider implements AutoCloseable { + + private final UUID snapshotId; + private final HierarchicalResourceLock lock; + private final HierarchicalResourceLock previousLock; + private final OmSnapshotLocalData snapshotLocalData; + private OmSnapshotLocalData previousSnapshotLocalData; + private volatile boolean isPreviousSnapshotLoaded = false; + private final UUID resolvedPreviousSnapshotId; + + protected ReadableOmSnapshotLocalDataProvider(UUID snapshotId) throws IOException { + this(snapshotId, true); + } + + protected ReadableOmSnapshotLocalDataProvider(UUID snapshotId, UUID snapIdToResolve) throws IOException { + this(snapshotId, true, null, snapIdToResolve, true); + } + + protected ReadableOmSnapshotLocalDataProvider(UUID snapshotId, boolean readLock) throws IOException { + this(snapshotId, readLock, null, null, false); + } + + protected ReadableOmSnapshotLocalDataProvider(UUID snapshotId, boolean readLock, + CheckedSupplier, IOException> snapshotLocalDataSupplier, + UUID snapshotIdToBeResolved, boolean isSnapshotToBeResolvedNullable) throws IOException { + this.snapshotId = snapshotId; + LockDataProviderInitResult result = initialize(readLock, snapshotId, snapshotIdToBeResolved, + isSnapshotToBeResolvedNullable, snapshotLocalDataSupplier); + this.snapshotLocalData = result.getSnapshotLocalData(); + this.lock = result.getLock(); + this.previousLock = result.getPreviousLock(); + this.resolvedPreviousSnapshotId = result.getPreviousSnapshotId(); + this.previousSnapshotLocalData = null; + this.isPreviousSnapshotLoaded = false; + } + + public OmSnapshotLocalData getSnapshotLocalData() { + return snapshotLocalData; + } + + public synchronized OmSnapshotLocalData getPreviousSnapshotLocalData() throws IOException { + if (!isPreviousSnapshotLoaded) { + if (resolvedPreviousSnapshotId != null) { + File previousSnapshotLocalDataFile = new File(getSnapshotLocalPropertyYamlPath(resolvedPreviousSnapshotId)); + this.previousSnapshotLocalData = snapshotLocalDataSerializer.load(previousSnapshotLocalDataFile); + } + this.isPreviousSnapshotLoaded = true; + } + return previousSnapshotLocalData; + } + + /** + * Intializes the snapshot local data by acquiring the lock on the snapshot and also acquires a read lock on the + * snapshotId to be resolved by iterating through the chain of previous snapshot ids. + */ + private LockDataProviderInitResult initialize( + boolean readLock, UUID snapId, UUID toResolveSnapshotId, boolean isSnapshotToBeResolvedNullable, + CheckedSupplier, IOException> snapshotLocalDataSupplier) throws IOException { + HierarchicalResourceLock snapIdLock = null; + HierarchicalResourceLock previousReadLockAcquired = null; + try { + snapIdLock = acquireLock(snapId, readLock); + snapshotLocalDataSupplier = snapshotLocalDataSupplier == null ? () -> { + File snapshotLocalDataFile = new File(getSnapshotLocalPropertyYamlPath(snapId)); + return Pair.of(snapshotLocalDataSerializer.load(snapshotLocalDataFile), snapshotLocalDataFile); + } : snapshotLocalDataSupplier; + Pair pair = snapshotLocalDataSupplier.get(); + OmSnapshotLocalData ssLocalData = pair.getKey(); + if (!Objects.equals(ssLocalData.getSnapshotId(), snapId)) { + String loadPath = pair.getValue() == null ? null : pair.getValue().getAbsolutePath(); + throw new IOException("SnapshotId in path : " + loadPath + " contains snapshotLocalData corresponding " + + "to snapshotId " + ssLocalData.getSnapshotId() + ". Expected snapshotId " + snapId); + } + // Get previous snapshotId and acquire read lock on the id. We need to do this outside the loop instead of a + // do while loop since the nodes that need be added may not be present in the graph so it may not be possible + // to iterate through the chain. + UUID previousSnapshotId = ssLocalData.getPreviousSnapshotId(); + // if flag toResolveSnapshotIdIsNull is true or toResolveSnapshotId is not null, then we resolve snapshot + // with previous snapshot id as null, which would mean if the snapshot local data is committed the snapshot + // local data would become first snapshot in the chain with no previous snapshot id. + toResolveSnapshotId = (isSnapshotToBeResolvedNullable || toResolveSnapshotId != null) ? toResolveSnapshotId : + ssLocalData.getPreviousSnapshotId(); + if (toResolveSnapshotId != null && previousSnapshotId != null) { + previousReadLockAcquired = acquireLock(previousSnapshotId, true); + if (!versionNodeMap.containsKey(previousSnapshotId)) { + throw new IOException(String.format("Operating on snapshot id : %s with previousSnapshotId: %s invalid " + + "since previousSnapshotId is not loaded.", snapId, previousSnapshotId)); + } + // Create a copy of the previous versionMap to get the previous versions corresponding to the previous + // snapshot. This map would mutated to resolve the previous snapshot's version corresponding to the + // toResolveSnapshotId by iterating through the chain of previous snapshot ids. + Map previousVersionNodeMap = + new HashMap<>(versionNodeMap.get(previousSnapshotId).getSnapshotVersions()); + UUID currentIteratedSnapshotId = previousSnapshotId; + // Iterate through the chain of previous snapshot ids until the snapshot id to be resolved is found. + while (!Objects.equals(currentIteratedSnapshotId, toResolveSnapshotId)) { + // All versions for the snapshot should point to the same previous snapshot id. Otherwise this is a sign + // of corruption. + Set previousIds = + previousVersionNodeMap.values().stream().map(LocalDataVersionNode::getPreviousSnapshotId) + .collect(Collectors.toSet()); + if (previousIds.size() > 1) { + throw new IOException(String.format("Snapshot %s versions has multiple previous snapshotIds %s", + currentIteratedSnapshotId, previousIds)); + } + if (previousIds.isEmpty()) { + throw new IOException(String.format("Snapshot %s versions doesn't have previous Id thus snapshot " + + "%s cannot be resolved against id %s", + currentIteratedSnapshotId, snapId, toResolveSnapshotId)); + } + UUID previousId = previousIds.iterator().next(); + HierarchicalResourceLock previousToPreviousReadLockAcquired = acquireLock(previousId, true); + try { + // Get the version node for the snapshot and update the version node to the successor to point to the + // previous node. + for (Map.Entry entry : previousVersionNodeMap.entrySet()) { + internalLock.readLock().lock(); + try { + Set versionNode = localDataGraph.successors(entry.getValue()); + if (versionNode.size() > 1) { + throw new IOException(String.format("Snapshot %s version %d has multiple successors %s", + currentIteratedSnapshotId, entry.getValue().getVersion(), versionNode)); + } + if (versionNode.isEmpty()) { + throw new IOException(String.format("Snapshot %s version %d doesn't have successor", + currentIteratedSnapshotId, entry.getValue().getVersion())); + } + // Set the version node for iterated version to the successor corresponding to the previous snapshot + // id. + entry.setValue(versionNode.iterator().next()); + } finally { + internalLock.readLock().unlock(); + } + } + } finally { + // Release the read lock acquired on the previous snapshot id acquired. Now that the instance + // is no longer needed we can release the read lock for the snapshot iterated in the previous snapshot. + // Make previousToPrevious previous for next iteration. + previousReadLockAcquired.close(); + previousReadLockAcquired = previousToPreviousReadLockAcquired; + currentIteratedSnapshotId = previousId; + } + } + ssLocalData.setPreviousSnapshotId(toResolveSnapshotId); + Map versionMetaMap = ssLocalData.getVersionSstFileInfos(); + for (Map.Entry entry : versionMetaMap.entrySet()) { + OmSnapshotLocalData.VersionMeta versionMeta = entry.getValue(); + // Get the relative version node which corresponds to the toResolveSnapshotId corresponding to the + // versionMeta which points to a particular version in the previous snapshot + LocalDataVersionNode relativePreviousVersionNode = + previousVersionNodeMap.get(versionMeta.getPreviousSnapshotVersion()); + if (relativePreviousVersionNode == null) { + throw new IOException(String.format("Unable to resolve previous version node for snapshot: %s" + + " with version : %d against previous snapshot %s previous version : %d", + snapId, entry.getKey(), toResolveSnapshotId, versionMeta.getPreviousSnapshotVersion())); + } + // Set the previous snapshot version to the relativePreviousVersionNode which was captured. + versionMeta.setPreviousSnapshotVersion(relativePreviousVersionNode.getVersion()); + } + } else { + toResolveSnapshotId = null; + ssLocalData.setPreviousSnapshotId(null); + } + return new LockDataProviderInitResult(snapIdLock, ssLocalData, previousReadLockAcquired, toResolveSnapshotId); + } catch (IOException e) { + // Release all the locks in case of an exception and rethrow the exception. + if (previousReadLockAcquired != null) { + previousReadLockAcquired.close(); + } + if (snapIdLock != null) { + snapIdLock.close(); + } + throw e; + } + } + + public boolean needsDefrag() { + if (snapshotLocalData.getNeedsDefrag()) { + return true; + } + if (resolvedPreviousSnapshotId != null) { + int snapshotVersion = snapshotLocalData.getVersion(); + int previousResolvedSnapshotVersion = snapshotLocalData.getVersionSstFileInfos().get(snapshotVersion) + .getPreviousSnapshotVersion(); + return previousResolvedSnapshotVersion < getVersionNodeMap().get(resolvedPreviousSnapshotId).getVersion(); + } + return false; + } + + @Override + public void close() throws IOException { + if (previousLock != null) { + previousLock.close(); + } + if (lock != null) { + lock.close(); + } + } + } + + /** + * This class represents a writable provider for managing local data of + * OmSnapshot. It extends the functionality of {@code ReadableOmSnapshotLocalDataProvider} + * and provides support for write operations, such as committing changes. + * + * The writable snapshot data provider interacts with version nodes and + * facilitates atomic updates to snapshot properties and files. + * + * This class is designed to ensure thread-safe operations and uses locks to + * guarantee consistent state across concurrent activities. + * + * The default usage includes creating an instance of this provider with + * specific snapshot identifiers and optionally handling additional parameters + * such as data resolution or a supplier for snapshot data. + */ + public final class WritableOmSnapshotLocalDataProvider extends ReadableOmSnapshotLocalDataProvider { + + private boolean dirty; + + private WritableOmSnapshotLocalDataProvider(UUID snapshotId) throws IOException { + super(snapshotId, false); + fullLock.readLock().lock(); + } + + private WritableOmSnapshotLocalDataProvider(UUID snapshotId, UUID snapshotIdToBeResolved) throws IOException { + super(snapshotId, false, null, snapshotIdToBeResolved, true); + fullLock.readLock().lock(); + } + + private WritableOmSnapshotLocalDataProvider(UUID snapshotId, + CheckedSupplier, IOException> snapshotLocalDataSupplier) throws IOException { + super(snapshotId, false, snapshotLocalDataSupplier, null, false); + fullLock.readLock().lock(); + } + + private SnapshotVersionsMeta validateModification(OmSnapshotLocalData snapshotLocalData) + throws IOException { + internalLock.readLock().lock(); + try { + SnapshotVersionsMeta versionsToBeAdded = new SnapshotVersionsMeta(snapshotLocalData); + SnapshotVersionsMeta existingVersionsMeta = getVersionNodeMap().get(snapshotLocalData.getSnapshotId()); + for (LocalDataVersionNode node : versionsToBeAdded.getSnapshotVersions().values()) { + validateVersionAddition(node); + } + UUID snapshotId = snapshotLocalData.getSnapshotId(); + Map existingVersions = getVersionNodeMap().containsKey(snapshotId) ? + getVersionNodeMap().get(snapshotId).getSnapshotVersions() : Collections.emptyMap(); + for (Map.Entry entry : existingVersions.entrySet()) { + if (!versionsToBeAdded.getSnapshotVersions().containsKey(entry.getKey())) { + validateVersionRemoval(snapshotId, entry.getKey()); + } + } + // Set Dirty if the snapshot doesn't exist or previousSnapshotId has changed. + if (existingVersionsMeta == null || !Objects.equals(versionsToBeAdded.getPreviousSnapshotId(), + existingVersionsMeta.getPreviousSnapshotId())) { + setDirty(); + // Set the needsDefrag if the new previous snapshotId is different from the existing one or if this is a new + // snapshot yaml file. + snapshotLocalData.setNeedsDefrag(true); + } + return versionsToBeAdded; + } finally { + internalLock.readLock().unlock(); + } + } + + public void addSnapshotVersion(RDBStore snapshotStore) throws IOException { + List sstFiles = OmSnapshotManager.getSnapshotSSTFileList(snapshotStore); + OmSnapshotLocalData previousSnapshotLocalData = getPreviousSnapshotLocalData(); + this.getSnapshotLocalData().addVersionSSTFileInfos(sstFiles, previousSnapshotLocalData == null ? 0 : + previousSnapshotLocalData.getVersion()); + // Adding a new snapshot version means it has been defragged thus the flag needs to be reset. + this.getSnapshotLocalData().setNeedsDefrag(false); + // Set Dirty if a version is added. + setDirty(); + } + + public void removeVersion(int version) { + this.getSnapshotLocalData().removeVersionSSTFileInfos(version); + // Set Dirty if a version is removed. + setDirty(); + } + + public void setTransactionInfo(TransactionInfo transactionInfo) { + this.getSnapshotLocalData().setTransactionInfo(transactionInfo); + // Set Dirty when the transactionInfo is set. + setDirty(); + } + + public synchronized void commit() throws IOException { + // Validate modification and commit the changes. + SnapshotVersionsMeta localDataVersionNodes = validateModification(super.snapshotLocalData); + // Need to update the disk state if and only if the dirty bit is set. + if (isDirty()) { + String filePath = getSnapshotLocalPropertyYamlPath(super.snapshotId); + String tmpFilePath = filePath + ".tmp"; + File tmpFile = new File(tmpFilePath); + boolean tmpFileExists = tmpFile.exists(); + if (tmpFileExists) { + tmpFileExists = !tmpFile.delete(); + } + if (tmpFileExists) { + throw new IOException("Unable to delete tmp file " + tmpFilePath); + } + snapshotLocalDataSerializer.save(new File(tmpFilePath), super.snapshotLocalData); + Files.move(tmpFile.toPath(), Paths.get(filePath), StandardCopyOption.ATOMIC_MOVE, + StandardCopyOption.REPLACE_EXISTING); + upsertNode(super.snapshotId, localDataVersionNodes); + // Reset dirty bit + resetDirty(); + } + } + + private void upsertNode(UUID snapshotId, SnapshotVersionsMeta snapshotVersions) throws IOException { + internalLock.writeLock().lock(); + try { + SnapshotVersionsMeta existingSnapVersions = getVersionNodeMap().remove(snapshotId); + Map existingVersions = existingSnapVersions == null ? Collections.emptyMap() : + existingSnapVersions.getSnapshotVersions(); + Map> predecessors = new HashMap<>(); + // Track all predecessors of the existing versions and remove the node from the graph. + for (Map.Entry existingVersion : existingVersions.entrySet()) { + LocalDataVersionNode existingVersionNode = existingVersion.getValue(); + // Create a copy of predecessors since the list of nodes returned would be a mutable set and it changes as the + // nodes in the graph would change. + predecessors.put(existingVersion.getKey(), new ArrayList<>(localDataGraph.predecessors(existingVersionNode))); + localDataGraph.removeNode(existingVersionNode); + } + // Add the nodes to be added in the graph and map. + addSnapshotVersionMeta(snapshotId, snapshotVersions); + // Reconnect all the predecessors for existing nodes. + for (Map.Entry entry : snapshotVersions.getSnapshotVersions().entrySet()) { + for (LocalDataVersionNode predecessor : predecessors.getOrDefault(entry.getKey(), Collections.emptyList())) { + localDataGraph.putEdge(predecessor, entry.getValue()); + } + } + } finally { + internalLock.writeLock().unlock(); + } + } + + private void setDirty() { + dirty = true; + } + + private void resetDirty() { + dirty = false; + } + + private boolean isDirty() { + return dirty; + } + + @Override + public void close() throws IOException { + super.close(); + fullLock.readLock().unlock(); + } + } + static final class LocalDataVersionNode { private final UUID snapshotId; private final int version; @@ -241,6 +808,14 @@ private LocalDataVersionNode(UUID snapshotId, int version, UUID previousSnapshot this.version = version; } + private UUID getPreviousSnapshotId() { + return previousSnapshotId; + } + + private int getVersion() { + return version; + } + @Override public boolean equals(Object o) { if (!(o instanceof LocalDataVersionNode)) { @@ -255,9 +830,22 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(snapshotId, version, previousSnapshotId, previousSnapshotVersion); } + + @Override + public String toString() { + return "LocalDataVersionNode{" + + "snapshotId=" + snapshotId + + ", version=" + version + + ", previousSnapshotId=" + previousSnapshotId + + ", previousSnapshotVersion=" + previousSnapshotVersion + + '}'; + } } - static final class SnapshotVersionsMeta { + /** + * Class that encapsulates the metadata corresponding to a snapshot's local data. + */ + public static final class SnapshotVersionsMeta { private final UUID previousSnapshotId; private final Map snapshotVersions; private int version; @@ -279,16 +867,16 @@ private Map getVersionNodes(OmSnapshotLocalData s return versionNodes; } - UUID getPreviousSnapshotId() { + public UUID getPreviousSnapshotId() { return previousSnapshotId; } - int getVersion() { + public int getVersion() { return version; } - Map getSnapshotVersions() { - return snapshotVersions; + private Map getSnapshotVersions() { + return Collections.unmodifiableMap(snapshotVersions); } LocalDataVersionNode getVersionNode(int snapshotVersion) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index 6867f819b9c3..ce79c32fc4ee 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -288,14 +288,26 @@ public void release(UUID key) { */ public UncheckedAutoCloseableSupplier lock() { return lock(() -> lock.acquireResourceWriteLock(SNAPSHOT_DB_LOCK), - () -> lock.releaseResourceWriteLock(SNAPSHOT_DB_LOCK)); + () -> lock.releaseResourceWriteLock(SNAPSHOT_DB_LOCK), () -> cleanup(true)); } - private UncheckedAutoCloseableSupplier lock( - Supplier lockFunction, Supplier unlockFunction) { + /** + * Acquires a write lock on a specific snapshot database and returns an auto-closeable supplier for lock details. + * The lock ensures that the operations accessing the snapshot database are performed in a thread safe manner. The + * returned supplier automatically releases the lock acquired when closed, preventing potential resource + * contention or deadlocks. + */ + public UncheckedAutoCloseableSupplier lock(UUID snapshotId) { + return lock(() -> lock.acquireWriteLock(SNAPSHOT_DB_LOCK, snapshotId.toString()), + () -> lock.releaseWriteLock(SNAPSHOT_DB_LOCK, snapshotId.toString()), + () -> cleanup(snapshotId)); + } + + private UncheckedAutoCloseableSupplier lock(Supplier lockFunction, + Supplier unlockFunction, Supplier cleanupFunction) { AtomicReference lockDetails = new AtomicReference<>(lockFunction.get()); if (lockDetails.get().isLockAcquired()) { - cleanup(true); + cleanupFunction.get(); if (!dbMap.isEmpty()) { lockDetails.set(unlockFunction.get()); } @@ -324,43 +336,49 @@ public OMLockDetails get() { * If cache size exceeds soft limit, attempt to clean up and close the instances that has zero reference count. */ - private synchronized void cleanup(boolean force) { + private synchronized Void cleanup(boolean force) { if (force || dbMap.size() > cacheSizeLimit) { for (UUID evictionKey : pendingEvictionQueue) { - ReferenceCounted snapshot = dbMap.get(evictionKey); - if (snapshot != null && snapshot.getTotalRefCount() == 0) { - try { - compactSnapshotDB(snapshot.get()); - } catch (IOException e) { - LOG.warn("Failed to compact snapshot DB for snapshotId {}: {}", - evictionKey, e.getMessage()); - } - } - - dbMap.compute(evictionKey, (k, v) -> { - pendingEvictionQueue.remove(k); - if (v == null) { - throw new IllegalStateException("SnapshotId '" + k + "' does not exist in cache. The RocksDB " + - "instance of the Snapshot may not be closed properly."); - } + cleanup(evictionKey); + } + } + return null; + } - if (v.getTotalRefCount() > 0) { - LOG.debug("SnapshotId {} is still being referenced ({}), skipping its clean up.", k, v.getTotalRefCount()); - return v; - } else { - LOG.debug("Closing SnapshotId {}. It is not being referenced anymore.", k); - // Close the instance, which also closes its DB handle. - try { - v.get().close(); - } catch (IOException ex) { - throw new IllegalStateException("Error while closing snapshot DB.", ex); - } - omMetrics.decNumSnapshotCacheSize(); - return null; - } - }); + private synchronized Void cleanup(UUID evictionKey) { + ReferenceCounted snapshot = dbMap.get(evictionKey); + if (snapshot != null && snapshot.getTotalRefCount() == 0) { + try { + compactSnapshotDB(snapshot.get()); + } catch (IOException e) { + LOG.warn("Failed to compact snapshot DB for snapshotId {}: {}", + evictionKey, e.getMessage()); } } + + dbMap.compute(evictionKey, (k, v) -> { + pendingEvictionQueue.remove(k); + if (v == null) { + throw new IllegalStateException("SnapshotId '" + k + "' does not exist in cache. The RocksDB " + + "instance of the Snapshot may not be closed properly."); + } + + if (v.getTotalRefCount() > 0) { + LOG.debug("SnapshotId {} is still being referenced ({}), skipping its clean up.", k, v.getTotalRefCount()); + return v; + } else { + LOG.debug("Closing SnapshotId {}. It is not being referenced anymore.", k); + // Close the instance, which also closes its DB handle. + try { + v.get().close(); + } catch (IOException ex) { + throw new IllegalStateException("Error while closing snapshot DB.", ex); + } + omMetrics.decNumSnapshotCacheSize(); + return null; + } + }); + return null; } /** diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/filter/ReclaimableFilter.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/filter/ReclaimableFilter.java index 7d227dfb641c..89c0e4c46e20 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/filter/ReclaimableFilter.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/filter/ReclaimableFilter.java @@ -90,7 +90,8 @@ public ReclaimableFilter( this.omSnapshotManager = omSnapshotManager; this.currentSnapshotInfo = currentSnapshotInfo; this.snapshotChainManager = snapshotChainManager; - this.snapshotIdLocks = new MultiSnapshotLocks(lock, SNAPSHOT_GC_LOCK, false); + this.snapshotIdLocks = new MultiSnapshotLocks(lock, SNAPSHOT_GC_LOCK, false, + numberOfPreviousSnapshotsFromChain + 1); this.keyManager = keyManager; this.numberOfPreviousSnapshotsFromChain = numberOfPreviousSnapshotsFromChain; this.previousOmSnapshots = new ArrayList<>(numberOfPreviousSnapshotsFromChain); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java index 42ca2113f40f..8184b39642e4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java @@ -38,6 +38,8 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMNodeInfo; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.TriggerSnapshotDefragRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.TriggerSnapshotDefragResponse; /** * This class is the server-side translator that forwards requests received on @@ -128,4 +130,22 @@ public CompactResponse compactDB(RpcController controller, CompactRequest compac return CompactResponse.newBuilder() .setSuccess(true).build(); } + + @Override + public TriggerSnapshotDefragResponse triggerSnapshotDefrag( + RpcController controller, TriggerSnapshotDefragRequest request) + throws ServiceException { + try { + boolean result = ozoneManager.triggerSnapshotDefrag(request.getNoWait()); + return TriggerSnapshotDefragResponse.newBuilder() + .setSuccess(true) + .setResult(result) + .build(); + } catch (Exception ex) { + return TriggerSnapshotDefragResponse.newBuilder() + .setSuccess(false) + .setErrorMsg(ex.getMessage()) + .build(); + } + } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java index 5edd683a43d2..823a64052570 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestingClient.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.ozone.common.BlockGroup; import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -148,35 +149,39 @@ private Pipeline createPipeline(DatanodeDetails datanode) { public List deleteKeyBlocks( List keyBlocksInfoList) throws IOException { List results = new ArrayList<>(); - List blockResultList = new ArrayList<>(); - Result result; for (BlockGroup keyBlocks : keyBlocksInfoList) { - for (BlockID blockKey : keyBlocks.getBlockIDList()) { - currentCall++; - switch (this.failCallsFrequency) { - case 0: - result = success; - numBlocksDeleted++; - break; - case 1: - result = unknownFailure; - break; - default: - if (currentCall % this.failCallsFrequency == 0) { - result = unknownFailure; - } else { - result = success; - numBlocksDeleted++; - } - } - blockResultList.add(new DeleteBlockResult(blockKey, result)); + List blockResultList = new ArrayList<>(); + // Process BlockIDs directly if present + for (DeletedBlock deletedBlock : keyBlocks.getDeletedBlocks()) { + blockResultList.add(processBlock(deletedBlock.getBlockID())); } - results.add(new DeleteBlockGroupResult(keyBlocks.getGroupID(), - blockResultList)); + results.add(new DeleteBlockGroupResult(keyBlocks.getGroupID(), blockResultList)); } return results; } + private DeleteBlockResult processBlock(BlockID blockID) { + currentCall++; + Result result; + switch (failCallsFrequency) { + case 0: + result = success; + numBlocksDeleted++; + break; + case 1: + result = unknownFailure; + break; + default: + if (currentCall % failCallsFrequency == 0) { + result = unknownFailure; + } else { + result = success; + numBlocksDeleted++; + } + } + return new DeleteBlockResult(blockID, result); + } + @Override public ScmInfo getScmInfo() throws IOException { ScmInfo.Builder builder = diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotLocalDataYaml.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotLocalDataYaml.java index 23d332ae75b9..81f111e8464b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotLocalDataYaml.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotLocalDataYaml.java @@ -37,16 +37,18 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.StringUtils; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.OmSnapshotLocalData.VersionMeta; import org.apache.hadoop.ozone.util.ObjectSerializer; import org.apache.hadoop.ozone.util.YamlSerializer; -import org.apache.ozone.compaction.log.SstFileInfo; +import org.apache.ozone.rocksdb.util.SstFileInfo; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; @@ -106,7 +108,8 @@ private LiveFileMetaData createLiveFileMetaData(String fileName, String table, S /** * Creates a snapshot local data YAML file. */ - private Pair writeToYaml(UUID snapshotId, String snapshotName) throws IOException { + private Pair writeToYaml(UUID snapshotId, String snapshotName, TransactionInfo transactionInfo) + throws IOException { String yamlFilePath = snapshotName + ".yaml"; UUID previousSnapshotId = UUID.randomUUID(); // Create snapshot data with not defragged SST files @@ -115,7 +118,7 @@ private Pair writeToYaml(UUID snapshotId, String snapshotName) throw createLiveFileMetaData("sst2", "table1", "k3", "k4"), createLiveFileMetaData("sst3", "table2", "k4", "k5")); OmSnapshotLocalData dataYaml = new OmSnapshotLocalData(snapshotId, notDefraggedSSTFileList, - previousSnapshotId); + previousSnapshotId, transactionInfo); // Set version dataYaml.setVersion(42); @@ -130,11 +133,11 @@ private Pair writeToYaml(UUID snapshotId, String snapshotName) throw // Add some defragged SST files dataYaml.addVersionSSTFileInfos(ImmutableList.of( - new SstFileInfo("defragged-sst1", "k1", "k2", "table1"), - new SstFileInfo("defragged-sst2", "k3", "k4", "table2")), + createLiveFileMetaData("defragged-sst1", "table1", "k1", "k2"), + createLiveFileMetaData("defragged-sst2", "table2", "k3", "k4")), 1); dataYaml.addVersionSSTFileInfos(Collections.singletonList( - new SstFileInfo("defragged-sst3", "k4", "k5", "table1")), 3); + createLiveFileMetaData("defragged-sst3", "table1", "k4", "k5")), 3); File yamlFile = new File(testRoot, yamlFilePath); @@ -150,7 +153,9 @@ private Pair writeToYaml(UUID snapshotId, String snapshotName) throw @Test public void testWriteToYaml() throws IOException { UUID snapshotId = UUID.randomUUID(); - Pair yamlFilePrevIdPair = writeToYaml(snapshotId, "snapshot1"); + TransactionInfo transactionInfo = TransactionInfo.valueOf(ThreadLocalRandom.current().nextLong(), + ThreadLocalRandom.current().nextLong()); + Pair yamlFilePrevIdPair = writeToYaml(snapshotId, "snapshot1", transactionInfo); File yamlFile = yamlFilePrevIdPair.getLeft(); UUID prevSnapId = yamlFilePrevIdPair.getRight(); @@ -160,6 +165,7 @@ public void testWriteToYaml() throws IOException { // Verify fields assertEquals(44, snapshotData.getVersion()); assertTrue(snapshotData.getSstFiltered()); + assertEquals(transactionInfo, snapshotData.getTransactionInfo()); VersionMeta notDefraggedSSTFiles = snapshotData.getVersionSstFileInfos().get(0); assertEquals(new VersionMeta(0, @@ -192,17 +198,19 @@ public void testWriteToYaml() throws IOException { @Test public void testUpdateSnapshotDataFile() throws IOException { UUID snapshotId = UUID.randomUUID(); - Pair yamlFilePrevIdPair = writeToYaml(snapshotId, "snapshot2"); + Pair yamlFilePrevIdPair = writeToYaml(snapshotId, "snapshot2", null); File yamlFile = yamlFilePrevIdPair.getLeft(); // Read from YAML file OmSnapshotLocalData dataYaml = omSnapshotLocalDataSerializer.load(yamlFile); - + TransactionInfo transactionInfo = TransactionInfo.valueOf(ThreadLocalRandom.current().nextLong(), + ThreadLocalRandom.current().nextLong()); // Update snapshot data dataYaml.setSstFiltered(false); dataYaml.setNeedsDefrag(false); dataYaml.addVersionSSTFileInfos( - singletonList(new SstFileInfo("defragged-sst4", "k5", "k6", "table3")), 5); + singletonList(createLiveFileMetaData("defragged-sst4", "table3", "k5", "k6")), 5); + dataYaml.setTransactionInfo(transactionInfo); // Write updated data back to file omSnapshotLocalDataSerializer.save(yamlFile, dataYaml); @@ -213,6 +221,7 @@ public void testUpdateSnapshotDataFile() throws IOException { // Verify updated data assertThat(dataYaml.getSstFiltered()).isFalse(); assertThat(dataYaml.getNeedsDefrag()).isFalse(); + assertEquals(transactionInfo, dataYaml.getTransactionInfo()); Map defraggedFiles = dataYaml.getVersionSstFileInfos(); assertEquals(4, defraggedFiles.size()); @@ -234,7 +243,9 @@ public void testEmptyFile() throws IOException { @Test public void testChecksum() throws IOException { UUID snapshotId = UUID.randomUUID(); - Pair yamlFilePrevIdPair = writeToYaml(snapshotId, "snapshot3"); + TransactionInfo transactionInfo = TransactionInfo.valueOf(ThreadLocalRandom.current().nextLong(), + ThreadLocalRandom.current().nextLong()); + Pair yamlFilePrevIdPair = writeToYaml(snapshotId, "snapshot3", transactionInfo); File yamlFile = yamlFilePrevIdPair.getLeft(); // Read from YAML file OmSnapshotLocalData snapshotData = omSnapshotLocalDataSerializer.load(yamlFile); @@ -251,7 +262,9 @@ public void testChecksum() throws IOException { @Test public void testYamlContainsAllFields() throws IOException { UUID snapshotId = UUID.randomUUID(); - Pair yamlFilePrevIdPair = writeToYaml(snapshotId, "snapshot4"); + TransactionInfo transactionInfo = TransactionInfo.valueOf(ThreadLocalRandom.current().nextLong(), + ThreadLocalRandom.current().nextLong()); + Pair yamlFilePrevIdPair = writeToYaml(snapshotId, "snapshot4", transactionInfo); File yamlFile = yamlFilePrevIdPair.getLeft(); String content = FileUtils.readFileToString(yamlFile, Charset.defaultCharset()); @@ -264,5 +277,6 @@ public void testYamlContainsAllFields() throws IOException { assertThat(content).contains(OzoneConsts.OM_SLD_VERSION_SST_FILE_INFO); assertThat(content).contains(OzoneConsts.OM_SLD_SNAP_ID); assertThat(content).contains(OzoneConsts.OM_SLD_PREV_SNAP_ID); + assertThat(content).contains(OzoneConsts.OM_SLD_TXN_INFO); } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index 7f808df3f978..8c5ec7e5ab45 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.om; import static org.apache.commons.io.file.PathUtils.copyDirectory; -import static org.apache.hadoop.hdds.StringUtils.string2Bytes; import static org.apache.hadoop.hdds.utils.HAUtils.getExistingFiles; import static org.apache.hadoop.ozone.OzoneConsts.OM_CHECKPOINT_DIR; import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME; @@ -28,11 +27,7 @@ import static org.apache.hadoop.ozone.OzoneConsts.SNAPSHOT_INFO_TABLE; import static org.apache.hadoop.ozone.om.OMDBCheckpointServlet.processFile; import static org.apache.hadoop.ozone.om.OmSnapshotManager.OM_HARDLINK_FILE; -import static org.apache.hadoop.ozone.om.OmSnapshotManager.getSnapshotPath; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.BUCKET_TABLE; -import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DIRECTORY_TABLE; -import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.FILE_TABLE; -import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.KEY_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.VOLUME_TABLE; import static org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils.getINode; import static org.assertj.core.api.Assertions.assertThat; @@ -42,13 +37,14 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.io.File; import java.io.IOException; @@ -56,7 +52,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; @@ -64,18 +59,14 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeMap; import java.util.UUID; import java.util.concurrent.TimeoutException; -import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.HddsWhiteboxTestUtils; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.RDBBatchOperation; import org.apache.hadoop.hdds.utils.db.RDBStore; -import org.apache.hadoop.hdds.utils.db.RocksDatabase; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TypedTable; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -85,7 +76,6 @@ import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager; import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils; import org.apache.hadoop.util.Time; -import org.apache.ozone.compaction.log.SstFileInfo; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.GenericTestUtils.LogCapturer; import org.junit.jupiter.api.AfterAll; @@ -95,7 +85,9 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.io.TempDir; -import org.rocksdb.LiveFileMetaData; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.MockedStatic; import org.slf4j.event.Level; /** @@ -272,71 +264,6 @@ public void testCloseOnEviction() throws IOException, }, 100, 30_000); } - private LiveFileMetaData createMockLiveFileMetadata(String cfname, String fileName) { - LiveFileMetaData lfm = mock(LiveFileMetaData.class); - when(lfm.columnFamilyName()).thenReturn(cfname.getBytes(StandardCharsets.UTF_8)); - when(lfm.fileName()).thenReturn(fileName); - when(lfm.smallestKey()).thenReturn(string2Bytes("k1")); - when(lfm.largestKey()).thenReturn(string2Bytes("k2")); - return lfm; - } - - @Test - public void testCreateNewSnapshotLocalYaml() throws IOException { - SnapshotInfo snapshotInfo = createSnapshotInfo("vol1", "buck1"); - - Map> expNotDefraggedSSTFileList = new TreeMap<>(); - OmSnapshotLocalData.VersionMeta notDefraggedVersionMeta = new OmSnapshotLocalData.VersionMeta(0, - ImmutableList.of(new SstFileInfo("dt1.sst", "k1", "k2", DIRECTORY_TABLE), - new SstFileInfo("dt2.sst", "k1", "k2", DIRECTORY_TABLE), - new SstFileInfo("ft1.sst", "k1", "k2", FILE_TABLE), - new SstFileInfo("ft2.sst", "k1", "k2", FILE_TABLE), - new SstFileInfo("kt1.sst", "k1", "k2", KEY_TABLE), - new SstFileInfo("kt2.sst", "k1", "k2", KEY_TABLE))); - expNotDefraggedSSTFileList.put(KEY_TABLE, Stream.of("kt1.sst", "kt2.sst").collect(Collectors.toList())); - expNotDefraggedSSTFileList.put(FILE_TABLE, Stream.of("ft1.sst", "ft2.sst").collect(Collectors.toList())); - expNotDefraggedSSTFileList.put(DIRECTORY_TABLE, Stream.of("dt1.sst", "dt2.sst").collect(Collectors.toList())); - - List mockedLiveFiles = new ArrayList<>(); - for (Map.Entry> entry : expNotDefraggedSSTFileList.entrySet()) { - String cfname = entry.getKey(); - for (String fname : entry.getValue()) { - mockedLiveFiles.add(createMockLiveFileMetadata(cfname, fname)); - } - } - // Add some other column families and files that should be ignored - mockedLiveFiles.add(createMockLiveFileMetadata("otherTable", "ot1.sst")); - mockedLiveFiles.add(createMockLiveFileMetadata("otherTable", "ot2.sst")); - - RDBStore mockedStore = mock(RDBStore.class); - RocksDatabase mockedDb = mock(RocksDatabase.class); - when(mockedStore.getDb()).thenReturn(mockedDb); - when(mockedDb.getLiveFilesMetaData()).thenReturn(mockedLiveFiles); - - Path snapshotYaml = Paths.get(snapshotLocalDataManager.getSnapshotLocalPropertyYamlPath(snapshotInfo)); - when(mockedStore.getDbLocation()).thenReturn(getSnapshotPath(omMetadataManager, snapshotInfo).toFile()); - // Create an existing YAML file for the snapshot - assertTrue(snapshotYaml.toFile().createNewFile()); - assertEquals(0, Files.size(snapshotYaml)); - // Create a new YAML file for the snapshot - snapshotLocalDataManager.createNewOmSnapshotLocalDataFile(mockedStore, snapshotInfo); - // Verify that previous file was overwritten - assertTrue(Files.exists(snapshotYaml)); - assertTrue(Files.size(snapshotYaml) > 0); - // Verify the contents of the YAML file - OmSnapshotLocalData localData = snapshotLocalDataManager.getOmSnapshotLocalData(snapshotYaml.toFile()); - assertNotNull(localData); - assertEquals(0, localData.getVersion()); - assertEquals(notDefraggedVersionMeta, localData.getVersionSstFileInfos().get(0)); - assertFalse(localData.getSstFiltered()); - assertEquals(0L, localData.getLastDefragTime()); - assertFalse(localData.getNeedsDefrag()); - assertEquals(1, localData.getVersionSstFileInfos().size()); - - // Cleanup - Files.delete(snapshotYaml); - } - @Test public void testValidateSnapshotLimit() throws IOException { TypedTable snapshotInfoTable = mock(TypedTable.class); @@ -741,6 +668,43 @@ void testProcessFileWithDestDirParameter(@TempDir File testDir) throws IOExcepti destAddNonSstToCopiedFiles); } + @ParameterizedTest + @ValueSource(ints = {0, 1, 10, 100}) + public void testGetSnapshotPath(int version) { + OMMetadataManager metadataManager = mock(OMMetadataManager.class); + RDBStore store = mock(RDBStore.class); + when(metadataManager.getStore()).thenReturn(store); + File file = new File("test-db"); + when(store.getDbLocation()).thenReturn(file); + String path = "dir1/dir2"; + when(store.getSnapshotsParentDir()).thenReturn(path); + UUID snapshotId = UUID.randomUUID(); + String snapshotPath = OmSnapshotManager.getSnapshotPath(metadataManager, snapshotId, version).toString(); + String expectedPath = "dir1/dir2/test-db-" + snapshotId; + if (version != 0) { + expectedPath = expectedPath + "-" + version; + } + assertEquals(expectedPath, snapshotPath); + } + + @ParameterizedTest + @ValueSource(ints = {0, 1, 10, 100}) + public void testGetSnapshotPathFromConf(int version) { + try (MockedStatic mocked = mockStatic(OMStorage.class)) { + String omDir = "dir1/dir2"; + mocked.when(() -> OMStorage.getOmDbDir(any())).thenReturn(new File(omDir)); + OzoneConfiguration conf = mock(OzoneConfiguration.class); + SnapshotInfo snapshotInfo = createSnapshotInfo("volumeName", "bucketname"); + String snapshotPath = OmSnapshotManager.getSnapshotPath(conf, snapshotInfo, version); + String expectedPath = omDir + OM_KEY_PREFIX + OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + + OM_DB_NAME + "-" + snapshotInfo.getSnapshotId(); + if (version != 0) { + expectedPath = expectedPath + "-" + version; + } + assertEquals(expectedPath, snapshotPath); + } + } + @Test public void testCreateSnapshotIdempotent() throws Exception { // set up db tables @@ -774,6 +738,7 @@ public void testCreateSnapshotIdempotent() throws Exception { when(bucketTable.get(dbBucketKey)).thenReturn(omBucketInfo); SnapshotInfo first = createSnapshotInfo(volumeName, bucketName); + first.setPathPreviousSnapshotId(null); when(snapshotInfoTable.get(first.getTableKey())).thenReturn(first); // Create first checkpoint for the snapshot checkpoint @@ -797,10 +762,13 @@ public void testCreateSnapshotIdempotent() throws Exception { private SnapshotInfo createSnapshotInfo(String volumeName, String bucketName) { - return SnapshotInfo.newInstance(volumeName, + SnapshotInfo snapshotInfo = SnapshotInfo.newInstance(volumeName, bucketName, UUID.randomUUID().toString(), UUID.randomUUID(), Time.now()); + snapshotInfo.setPathPreviousSnapshotId(null); + snapshotInfo.setGlobalPreviousSnapshotId(null); + return snapshotInfo; } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java index 54087fa64dc1..881a4dff939d 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om.request.key; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; +import static org.apache.hadoop.ozone.om.lock.FlatResource.SNAPSHOT_DB_CONTENT_LOCK; import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.LeveledResource.BUCKET_LOCK; import static org.apache.hadoop.ozone.om.request.file.OMFileRequest.getOmKeyInfo; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -32,6 +33,7 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; +import com.google.common.collect.Lists; import jakarta.annotation.Nonnull; import java.io.IOException; import java.util.ArrayList; @@ -433,7 +435,24 @@ public void testDirectoryPurge(boolean fromSnapshot, boolean purgeDirectory, int OMDirectoriesPurgeRequestWithFSO omKeyPurgeRequest = new OMDirectoriesPurgeRequestWithFSO(preExecutedRequest); OMDirectoriesPurgeResponseWithFSO omClientResponse = (OMDirectoriesPurgeResponseWithFSO) omKeyPurgeRequest .validateAndUpdateCache(ozoneManager, 100L); + + IOzoneManagerLock lock = spy(omMetadataManager.getLock()); + when(omMetadataManager.getLock()).thenReturn(lock); + List locks = Lists.newArrayList(); + doAnswer(i -> { + locks.add(i.getArgument(1)); + return i.callRealMethod(); + }).when(lock).acquireReadLock(eq(SNAPSHOT_DB_CONTENT_LOCK), anyString()); + + List snapshotIds; + if (fromSnapshot) { + snapshotIds = Collections.singletonList(snapshotInfo.getSnapshotId().toString()); + } else { + snapshotIds = Collections.emptyList(); + } + performBatchOperationCommit(omClientResponse); + assertEquals(snapshotIds, locks); OmBucketInfo updatedBucketInfo = purgeDirectory || numberOfSubEntries > 0 ? omMetadataManager.getBucketTable().getSkipCache(bucketKey) : omMetadataManager.getBucketTable().get(bucketKey); long currentSnapshotUsedNamespace = updatedBucketInfo.getSnapshotUsedNamespace(); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyPurgeRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyPurgeRequestAndResponse.java index aa566859cb46..a7a738ba0000 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyPurgeRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyPurgeRequestAndResponse.java @@ -17,14 +17,21 @@ package org.apache.hadoop.ozone.om.request.key; +import static org.apache.hadoop.ozone.om.lock.FlatResource.SNAPSHOT_DB_CONTENT_LOCK; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; +import com.google.common.collect.Lists; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.UUID; import org.apache.commons.lang3.tuple.Pair; @@ -35,6 +42,7 @@ import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; import org.apache.hadoop.ozone.om.response.key.OMKeyPurgeResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeletedKeys; @@ -186,7 +194,6 @@ public void testKeyPurgeInSnapshot() throws Exception { .thenReturn(RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE)); // Create and Delete keys. The keys should be moved to DeletedKeys table Pair, List> deleteKeysAndRenamedEntry = createAndDeleteKeysAndRenamedEntry(1, null); - SnapshotInfo snapInfo = createSnapshot("snap1"); assertEquals(snapInfo.getLastTransactionInfo(), TransactionInfo.valueOf(TransactionInfo.getTermIndex(1L)).toByteString()); @@ -235,6 +242,14 @@ public void testKeyPurgeInSnapshot() throws Exception { .setStatus(Status.OK) .build(); + IOzoneManagerLock lock = spy(omMetadataManager.getLock()); + when(omMetadataManager.getLock()).thenReturn(lock); + List locks = Lists.newArrayList(); + doAnswer(i -> { + locks.add(i.getArgument(1)); + return i.callRealMethod(); + }).when(lock).acquireReadLock(eq(SNAPSHOT_DB_CONTENT_LOCK), anyString()); + List snapshotIds = Collections.singletonList(snapInfo.getSnapshotId().toString()); try (BatchOperation batchOperation = omMetadataManager.getStore().initBatchOperation()) { @@ -245,6 +260,7 @@ public void testKeyPurgeInSnapshot() throws Exception { // Do manual commit and see whether addToBatch is successful or not. omMetadataManager.getStore().commitBatchOperation(batchOperation); } + assertEquals(snapshotIds, locks); snapshotInfoOnDisk = omMetadataManager.getSnapshotInfoTable().getSkipCache(snapInfo.getTableKey()); assertEquals(snapshotInfoOnDisk, snapInfo); // The keys should not exist in the DeletedKeys table diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java index f0e32ac405ba..b84294370c58 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java @@ -349,5 +349,4 @@ protected SnapshotInfo createSnapshot(String volume, String bucket, String snaps assertNotNull(snapshotInfo); return snapshotInfo; } - } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotPurgeRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotPurgeRequestAndResponse.java index 35053882eeda..b78975ef0816 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotPurgeRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotPurgeRequestAndResponse.java @@ -52,6 +52,7 @@ import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; import org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotPurgeResponse; import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager; +import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataProvider; import org.apache.hadoop.ozone.om.snapshot.TestSnapshotRequestAndResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotPurgeRequest; @@ -159,6 +160,10 @@ public void testValidateAndUpdateCache() throws Exception { List snapshotDbKeysToPurge = createSnapshots(10); assertFalse(getOmMetadataManager().getSnapshotInfoTable().isEmpty()); + List snapshotInfos = new ArrayList<>(); + for (String snapshotKey : snapshotDbKeysToPurge) { + snapshotInfos.add(getOmMetadataManager().getSnapshotInfoTable().get(snapshotKey)); + } // Check if all the checkpoints are created. for (Path checkpoint : checkpointPaths) { @@ -171,9 +176,9 @@ public void testValidateAndUpdateCache() throws Exception { snapshotDbKeysToPurge); OMSnapshotPurgeRequest omSnapshotPurgeRequest = preExecute(snapshotPurgeRequest); - + TransactionInfo transactionInfo = TransactionInfo.valueOf(TransactionInfo.getTermIndex(200L)); OMSnapshotPurgeResponse omSnapshotPurgeResponse = (OMSnapshotPurgeResponse) - omSnapshotPurgeRequest.validateAndUpdateCache(getOzoneManager(), 200L); + omSnapshotPurgeRequest.validateAndUpdateCache(getOzoneManager(), transactionInfo.getTransactionIndex()); for (String snapshotTableKey: snapshotDbKeysToPurge) { assertNull(getOmMetadataManager().getSnapshotInfoTable().get(snapshotTableKey)); @@ -191,6 +196,15 @@ public void testValidateAndUpdateCache() throws Exception { for (Path checkpoint : checkpointPaths) { assertFalse(Files.exists(checkpoint)); } + OmSnapshotLocalDataManager snapshotLocalDataManager = + getOzoneManager().getOmSnapshotManager().getSnapshotLocalDataManager(); + for (SnapshotInfo snapshotInfo : snapshotInfos) { + try (ReadableOmSnapshotLocalDataProvider snapProvider = + snapshotLocalDataManager.getOmSnapshotLocalData(snapshotInfo)) { + assertEquals(transactionInfo, snapProvider.getSnapshotLocalData().getTransactionInfo()); + } + } + assertEquals(initialSnapshotPurgeCount + 1, getOmSnapshotIntMetrics().getNumSnapshotPurges()); assertEquals(initialSnapshotPurgeFailCount, getOmSnapshotIntMetrics().getNumSnapshotPurgeFails()); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java index 2cafae138fd4..6bef4b84247b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java @@ -131,7 +131,7 @@ public void testAddToDBBatch(int numberOfKeys) throws Exception { omMetadataManager.getStore().commitBatchOperation(batchOperation); // Confirm snapshot directory was created - String snapshotDir = getSnapshotPath(ozoneConfiguration, snapshotInfo); + String snapshotDir = getSnapshotPath(ozoneConfiguration, snapshotInfo, 0); assertTrue((new File(snapshotDir)).exists()); // Confirm table has 1 entry diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java index 2d5d7b2870f7..bdb23b65f2c8 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java @@ -117,7 +117,7 @@ public void testAddToDBBatch() throws Exception { // Confirm snapshot directory was created String snapshotDir = OmSnapshotManager.getSnapshotPath(ozoneConfiguration, - snapshotInfo); + snapshotInfo, 0); assertTrue((new File(snapshotDir)).exists()); // Confirm table has 1 entry diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotMoveTableKeysResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotMoveTableKeysResponse.java index 0425dd84546f..db72781f753c 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotMoveTableKeysResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotMoveTableKeysResponse.java @@ -17,8 +17,19 @@ package org.apache.hadoop.ozone.om.response.snapshot; +import static org.apache.hadoop.ozone.om.lock.FlatResource.SNAPSHOT_DB_CONTENT_LOCK; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -37,6 +48,7 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; import org.apache.hadoop.ozone.om.request.key.OMKeyRequest; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.om.snapshot.TestSnapshotRequestAndResponse; @@ -107,13 +119,24 @@ private void addDataToTable(Table table, List> va @ParameterizedTest @ValueSource(booleans = {true, false}) public void testMoveTableKeysToNextSnapshot(boolean nextSnapshotExists) throws Exception { + IOzoneManagerLock lock = spy(getOmMetadataManager().getLock()); + when(getOmMetadataManager().getLock()).thenReturn(lock); OmBucketInfo omBucketInfo = OMKeyRequest.getBucketInfo(getOmMetadataManager(), getVolumeName(), getBucketName()); createSnapshots(nextSnapshotExists, omBucketInfo.getObjectID()); try (UncheckedAutoCloseableSupplier snapshot1 = getOmSnapshotManager().getSnapshot( getVolumeName(), getBucketName(), snapshotName1); UncheckedAutoCloseableSupplier snapshot2 = nextSnapshotExists ? getOmSnapshotManager().getSnapshot( getVolumeName(), getBucketName(), snapshotName2) : null) { - + List> expectedSnapshotIdLocks = + Arrays.asList(Collections.singletonList(snapshot1.get().getSnapshotID().toString()), + nextSnapshotExists ? Collections.singletonList(snapshot2.get().getSnapshotID().toString()) : null); + List> locks = new ArrayList<>(); + doAnswer(i -> { + for (String[] id : (Collection)i.getArgument(1)) { + locks.add(id == null ? null : Arrays.stream(id).collect(Collectors.toList())); + } + return i.callRealMethod(); + }).when(lock).acquireReadLocks(eq(SNAPSHOT_DB_CONTENT_LOCK), anyList()); OmSnapshot snapshot = snapshot1.get(); List deletedTable = new ArrayList<>(); List deletedDirTable = new ArrayList<>(); @@ -144,6 +167,7 @@ public void testMoveTableKeysToNextSnapshot(boolean nextSnapshotExists) throws E response.addToDBBatch(getOmMetadataManager(), batchOperation); getOmMetadataManager().getStore().commitBatchOperation(batchOperation); } + assertEquals(expectedSnapshotIdLocks, locks); Assertions.assertTrue(snapshot.getMetadataManager().getDeletedTable().isEmpty()); Assertions.assertTrue(snapshot.getMetadataManager().getDeletedDirTable().isEmpty()); Assertions.assertTrue(snapshot.getMetadataManager().getSnapshotRenamedTable().isEmpty()); @@ -153,7 +177,7 @@ public void testMoveTableKeysToNextSnapshot(boolean nextSnapshotExists) throws E nextMetadataManager.getDeletedTable().iterator().forEachRemaining(entry -> { count.getAndIncrement(); int maxCount = count.get() >= 6 && count.get() <= 8 ? 20 : 10; - Assertions.assertEquals(maxCount, entry.getValue().getOmKeyInfoList().size()); + assertEquals(maxCount, entry.getValue().getOmKeyInfoList().size()); List versions = entry.getValue().getOmKeyInfoList().stream().map(OmKeyInfo::getKeyLocationVersions) .map(omKeyInfo -> omKeyInfo.get(0).getVersion()).collect(Collectors.toList()); List expectedVersions = new ArrayList<>(); @@ -161,20 +185,20 @@ public void testMoveTableKeysToNextSnapshot(boolean nextSnapshotExists) throws E expectedVersions.addAll(LongStream.range(10, 20).boxed().collect(Collectors.toList())); } expectedVersions.addAll(LongStream.range(0, 10).boxed().collect(Collectors.toList())); - Assertions.assertEquals(expectedVersions, versions); + assertEquals(expectedVersions, versions); }); - Assertions.assertEquals(15, count.get()); + assertEquals(15, count.get()); count.set(0); nextMetadataManager.getDeletedDirTable().iterator().forEachRemaining(entry -> count.getAndIncrement()); - Assertions.assertEquals(15, count.get()); + assertEquals(15, count.get()); count.set(0); nextMetadataManager.getSnapshotRenamedTable().iterator().forEachRemaining(entry -> { String expectedValue = renameEntries.getOrDefault(entry.getKey(), entry.getValue()); - Assertions.assertEquals(expectedValue, entry.getValue()); + assertEquals(expectedValue, entry.getValue()); count.getAndIncrement(); }); - Assertions.assertEquals(15, count.get()); + assertEquals(15, count.get()); } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index 8c51527b10d4..05cfca2fe55b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hdds.utils.db.DBConfigFromFile; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.common.BlockGroup; +import org.apache.hadoop.ozone.common.DeletedBlock; import org.apache.hadoop.ozone.om.DeletingServiceMetrics; import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.KeyManagerImpl; @@ -826,7 +827,8 @@ public void testFailingModifiedKeyPurge() throws IOException, InterruptedExcepti .setStatus(OzoneManagerProtocolProtos.Status.TIMEOUT).build(); }); BlockGroup blockGroup = BlockGroup.newBuilder().setKeyName("key1/1") - .addAllBlockIDs(Collections.singletonList(new BlockID(1, 1))).build(); + .addAllDeletedBlocks(Collections.singletonList(new DeletedBlock( + new BlockID(1, 1), 1, 3))).build(); Map blockGroups = Collections.singletonMap(blockGroup.getGroupID(), new PurgedKey("vol", "buck", 1, blockGroup, "key1", 30, true)); List renameEntriesToBeDeleted = Collections.singletonList("key2"); @@ -1393,7 +1395,7 @@ private long countBlocksPendingDeletion() { .getPurgedKeys().values() .stream() .map(PurgedKey::getBlockGroup) - .map(BlockGroup::getBlockIDList) + .map(BlockGroup::getDeletedBlocks) .mapToLong(Collection::size) .sum(); } catch (IOException e) { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotLocalDataManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotLocalDataManager.java index 34bde4814a6e..df26fa742e84 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotLocalDataManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotLocalDataManager.java @@ -17,48 +17,76 @@ package org.apache.hadoop.ozone.om.snapshot; +import static org.apache.hadoop.hdds.StringUtils.bytes2String; import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_SEPARATOR; import static org.apache.hadoop.ozone.om.OmSnapshotLocalDataYaml.YAML_FILE_EXTENSION; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DIRECTORY_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.FILE_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.KEY_TABLE; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; import static org.mockito.Mockito.when; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import java.io.File; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.TreeMap; import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; +import org.apache.commons.compress.utils.Sets; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.StringUtils; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.hdds.utils.db.RocksDatabase; +import org.apache.hadoop.hdds.utils.db.RocksDatabaseException; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmSnapshotLocalData; import org.apache.hadoop.ozone.om.OmSnapshotLocalDataYaml; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.FlatResource; +import org.apache.hadoop.ozone.om.lock.HierarchicalResourceLockManager; +import org.apache.hadoop.ozone.om.lock.HierarchicalResourceLockManager.HierarchicalResourceLock; +import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager.ReadableOmSnapshotLocalDataProvider; +import org.apache.hadoop.ozone.om.snapshot.OmSnapshotLocalDataManager.WritableOmSnapshotLocalDataProvider; import org.apache.hadoop.ozone.util.YamlSerializer; -import org.apache.ozone.compaction.log.SstFileInfo; +import org.apache.ozone.rocksdb.util.SstFileInfo; +import org.assertj.core.util.Lists; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.mockito.Mock; import org.mockito.MockitoAnnotations; import org.rocksdb.LiveFileMetaData; @@ -67,13 +95,18 @@ /** * Test class for OmSnapshotLocalDataManager. */ +@Timeout(value = 30, unit = TimeUnit.SECONDS) public class TestOmSnapshotLocalDataManager { private static YamlSerializer snapshotLocalDataYamlSerializer; + private static List lockCapturor; @Mock private OMMetadataManager omMetadataManager; + @Mock + private HierarchicalResourceLockManager lockManager; + @Mock private RDBStore rdbStore; @@ -88,6 +121,11 @@ public class TestOmSnapshotLocalDataManager { private File snapshotsDir; + private static final String READ_LOCK_MESSAGE_ACQUIRE = "readLock acquire"; + private static final String READ_LOCK_MESSAGE_UNLOCK = "readLock unlock"; + private static final String WRITE_LOCK_MESSAGE_ACQUIRE = "writeLock acquire"; + private static final String WRITE_LOCK_MESSAGE_UNLOCK = "writeLock unlock"; + @BeforeAll public static void setupClass() { snapshotLocalDataYamlSerializer = new YamlSerializer( @@ -98,10 +136,11 @@ public void computeAndSetChecksum(Yaml yaml, OmSnapshotLocalData data) throws IO data.computeAndSetChecksum(yaml); } }; + lockCapturor = new ArrayList<>(); } @AfterAll - public static void teardownClass() throws IOException { + public static void teardownClass() { snapshotLocalDataYamlSerializer.close(); snapshotLocalDataYamlSerializer = null; } @@ -112,15 +151,15 @@ public void setUp() throws IOException { // Setup mock behavior when(omMetadataManager.getStore()).thenReturn(rdbStore); - + when(omMetadataManager.getHierarchicalLockManager()).thenReturn(lockManager); this.snapshotsDir = tempDir.resolve("snapshots").toFile(); FileUtils.deleteDirectory(snapshotsDir); assertTrue(snapshotsDir.exists() || snapshotsDir.mkdirs()); File dbLocation = tempDir.resolve("db").toFile(); FileUtils.deleteDirectory(dbLocation); assertTrue(dbLocation.exists() || dbLocation.mkdirs()); + mockLockManager(); - when(rdbStore.getSnapshotsParentDir()).thenReturn(snapshotsDir.getAbsolutePath()); when(rdbStore.getDbLocation()).thenReturn(dbLocation); } @@ -135,6 +174,457 @@ public void tearDown() throws Exception { } } + private String getReadLockMessageAcquire(UUID snapshotId) { + return READ_LOCK_MESSAGE_ACQUIRE + " " + FlatResource.SNAPSHOT_LOCAL_DATA_LOCK + " " + snapshotId; + } + + private String getReadLockMessageRelease(UUID snapshotId) { + return READ_LOCK_MESSAGE_UNLOCK + " " + FlatResource.SNAPSHOT_LOCAL_DATA_LOCK + " " + snapshotId; + } + + private String getWriteLockMessageAcquire(UUID snapshotId) { + return WRITE_LOCK_MESSAGE_ACQUIRE + " " + FlatResource.SNAPSHOT_LOCAL_DATA_LOCK + " " + snapshotId; + } + + private String getWriteLockMessageRelease(UUID snapshotId) { + return WRITE_LOCK_MESSAGE_UNLOCK + " " + FlatResource.SNAPSHOT_LOCAL_DATA_LOCK + " " + snapshotId; + } + + private HierarchicalResourceLock getHierarchicalResourceLock(FlatResource resource, String key, boolean isWriteLock) { + return new HierarchicalResourceLock() { + @Override + public boolean isLockAcquired() { + return true; + } + + @Override + public void close() { + if (isWriteLock) { + lockCapturor.add(WRITE_LOCK_MESSAGE_UNLOCK + " " + resource + " " + key); + } else { + lockCapturor.add(READ_LOCK_MESSAGE_UNLOCK + " " + resource + " " + key); + } + } + }; + } + + private void mockLockManager() throws IOException { + lockCapturor.clear(); + reset(lockManager); + when(lockManager.acquireReadLock(any(FlatResource.class), anyString())) + .thenAnswer(i -> { + lockCapturor.add(READ_LOCK_MESSAGE_ACQUIRE + " " + i.getArgument(0) + " " + i.getArgument(1)); + return getHierarchicalResourceLock(i.getArgument(0), i.getArgument(1), false); + }); + when(lockManager.acquireWriteLock(any(FlatResource.class), anyString())) + .thenAnswer(i -> { + lockCapturor.add(WRITE_LOCK_MESSAGE_ACQUIRE + " " + i.getArgument(0) + " " + i.getArgument(1)); + return getHierarchicalResourceLock(i.getArgument(0), i.getArgument(1), true); + }); + } + + private List createSnapshotLocalData(OmSnapshotLocalDataManager snapshotLocalDataManager, + int numberOfSnapshots) throws IOException { + SnapshotInfo previousSnapshotInfo = null; + int counter = 0; + Map> liveFileMetaDataMap = new HashMap<>(); + liveFileMetaDataMap.put(KEY_TABLE, + Lists.newArrayList(createMockLiveFileMetaData("file1.sst", KEY_TABLE, "key1", "key2"))); + liveFileMetaDataMap.put(FILE_TABLE, Lists.newArrayList(createMockLiveFileMetaData("file2.sst", FILE_TABLE, "key1", + "key2"))); + liveFileMetaDataMap.put(DIRECTORY_TABLE, Lists.newArrayList(createMockLiveFileMetaData("file2.sst", + DIRECTORY_TABLE, "key1", "key2"))); + liveFileMetaDataMap.put("col1", Lists.newArrayList(createMockLiveFileMetaData("file2.sst", "col1", "key1", + "key2"))); + List snapshotIds = new ArrayList<>(); + for (int i = 0; i < numberOfSnapshots; i++) { + UUID snapshotId = UUID.randomUUID(); + SnapshotInfo snapshotInfo = createMockSnapshotInfo(snapshotId, previousSnapshotInfo == null ? null + : previousSnapshotInfo.getSnapshotId()); + mockSnapshotStore(snapshotId, liveFileMetaDataMap.values().stream() + .flatMap(Collection::stream).collect(Collectors.toList())); + snapshotLocalDataManager.createNewOmSnapshotLocalDataFile(snapshotStore, snapshotInfo); + previousSnapshotInfo = snapshotInfo; + for (Map.Entry> tableEntry : liveFileMetaDataMap.entrySet()) { + String table = tableEntry.getKey(); + tableEntry.getValue().add(createMockLiveFileMetaData("file" + counter++ + ".sst", table, "key1", "key4")); + } + snapshotIds.add(snapshotId); + } + return snapshotIds; + } + + private void mockSnapshotStore(UUID snapshotId, List sstFiles) throws RocksDatabaseException { + // Setup snapshot store mock + File snapshotDbLocation = OmSnapshotManager.getSnapshotPath(omMetadataManager, snapshotId, 0).toFile(); + assertTrue(snapshotDbLocation.exists() || snapshotDbLocation.mkdirs()); + + when(snapshotStore.getDbLocation()).thenReturn(snapshotDbLocation); + RocksDatabase rocksDatabase = mock(RocksDatabase.class); + when(snapshotStore.getDb()).thenReturn(rocksDatabase); + when(rocksDatabase.getLiveFilesMetaData()).thenReturn(sstFiles); + } + + /** + * Checks lock orders taken i.e. while reading a snapshot against the previous snapshot. + * Depending on read or write locks are acquired on the snapshotId and read lock is acquired on the previous + * snapshot. Once the instance is closed the read lock on previous snapshot is released followed by releasing the + * lock on the snapshotId. + * @param read + * @throws IOException + */ + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testLockOrderingAgainstAnotherSnapshot(boolean read) throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + List snapshotIds = new ArrayList<>(); + snapshotIds.add(null); + snapshotIds.addAll(createSnapshotLocalData(localDataManager, 20)); + for (int start = 0; start < snapshotIds.size(); start++) { + for (int end = start + 1; end < snapshotIds.size(); end++) { + UUID startSnapshotId = snapshotIds.get(start); + UUID endSnapshotId = snapshotIds.get(end); + lockCapturor.clear(); + int logCaptorIdx = 0; + try (ReadableOmSnapshotLocalDataProvider omSnapshotLocalDataProvider = + read ? localDataManager.getOmSnapshotLocalData(endSnapshotId, startSnapshotId) : + localDataManager.getWritableOmSnapshotLocalData(endSnapshotId, startSnapshotId)) { + OmSnapshotLocalData snapshotLocalData = omSnapshotLocalDataProvider.getSnapshotLocalData(); + OmSnapshotLocalData previousSnapshot = omSnapshotLocalDataProvider.getPreviousSnapshotLocalData(); + assertEquals(endSnapshotId, snapshotLocalData.getSnapshotId()); + if (startSnapshotId == null) { + assertNull(previousSnapshot); + assertNull(snapshotLocalData.getPreviousSnapshotId()); + continue; + } + assertEquals(startSnapshotId, previousSnapshot.getSnapshotId()); + assertEquals(startSnapshotId, snapshotLocalData.getPreviousSnapshotId()); + if (read) { + assertEquals(getReadLockMessageAcquire(endSnapshotId), lockCapturor.get(logCaptorIdx++)); + } else { + assertEquals(getWriteLockMessageAcquire(endSnapshotId), lockCapturor.get(logCaptorIdx++)); + } + int idx = end - 1; + UUID previousSnapId = snapshotIds.get(idx--); + assertEquals(getReadLockMessageAcquire(previousSnapId), lockCapturor.get(logCaptorIdx++)); + while (idx >= start) { + UUID prevPrevSnapId = snapshotIds.get(idx--); + assertEquals(getReadLockMessageAcquire(prevPrevSnapId), lockCapturor.get(logCaptorIdx++)); + assertEquals(getReadLockMessageRelease(previousSnapId), lockCapturor.get(logCaptorIdx++)); + previousSnapId = prevPrevSnapId; + } + } + assertEquals(getReadLockMessageRelease(startSnapshotId), lockCapturor.get(logCaptorIdx++)); + if (read) { + assertEquals(getReadLockMessageRelease(endSnapshotId), lockCapturor.get(logCaptorIdx++)); + } else { + assertEquals(getWriteLockMessageRelease(endSnapshotId), lockCapturor.get(logCaptorIdx++)); + } + assertEquals(lockCapturor.size(), logCaptorIdx); + } + } + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testVersionLockResolution(boolean read) throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + List snapshotIds = createSnapshotLocalData(localDataManager, 5); + for (int snapIdx = 0; snapIdx < snapshotIds.size(); snapIdx++) { + UUID snapId = snapshotIds.get(snapIdx); + UUID expectedPreviousSnapId = snapIdx - 1 >= 0 ? snapshotIds.get(snapIdx - 1) : null; + lockCapturor.clear(); + int logCaptorIdx = 0; + try (ReadableOmSnapshotLocalDataProvider omSnapshotLocalDataProvider = + read ? localDataManager.getOmSnapshotLocalData(snapId) : + localDataManager.getWritableOmSnapshotLocalData(snapId)) { + OmSnapshotLocalData snapshotLocalData = omSnapshotLocalDataProvider.getSnapshotLocalData(); + OmSnapshotLocalData previousSnapshot = omSnapshotLocalDataProvider.getPreviousSnapshotLocalData(); + assertEquals(snapId, snapshotLocalData.getSnapshotId()); + assertEquals(expectedPreviousSnapId, previousSnapshot == null ? null : + previousSnapshot.getSnapshotId()); + if (read) { + assertEquals(getReadLockMessageAcquire(snapId), lockCapturor.get(logCaptorIdx++)); + } else { + assertEquals(getWriteLockMessageAcquire(snapId), lockCapturor.get(logCaptorIdx++)); + } + if (expectedPreviousSnapId != null) { + assertEquals(getReadLockMessageAcquire(expectedPreviousSnapId), lockCapturor.get(logCaptorIdx++)); + } + } + if (expectedPreviousSnapId != null) { + assertEquals(getReadLockMessageRelease(expectedPreviousSnapId), lockCapturor.get(logCaptorIdx++)); + } + if (read) { + assertEquals(getReadLockMessageRelease(snapId), lockCapturor.get(logCaptorIdx++)); + } else { + assertEquals(getWriteLockMessageRelease(snapId), lockCapturor.get(logCaptorIdx++)); + } + assertEquals(lockCapturor.size(), logCaptorIdx); + } + } + + @Test + public void testWriteVersionAdditionValidationWithoutPreviousSnapshotVersionExisting() throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + List snapshotIds = createSnapshotLocalData(localDataManager, 2); + UUID snapId = snapshotIds.get(1); + try (WritableOmSnapshotLocalDataProvider omSnapshotLocalDataProvider = + localDataManager.getWritableOmSnapshotLocalData(snapId)) { + OmSnapshotLocalData snapshotLocalData = omSnapshotLocalDataProvider.getSnapshotLocalData(); + snapshotLocalData.addVersionSSTFileInfos(Lists.newArrayList(createMockLiveFileMetaData("file1.sst", KEY_TABLE, + "key1", "key2")), 3); + + IOException ex = assertThrows(IOException.class, omSnapshotLocalDataProvider::commit); + assertTrue(ex.getMessage().contains("since previous snapshot with version hasn't been loaded")); + } + } + + @Test + public void testUpdateTransactionInfo() throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + TransactionInfo transactionInfo = TransactionInfo.valueOf(ThreadLocalRandom.current().nextLong(), + ThreadLocalRandom.current().nextLong()); + UUID snapshotId = createSnapshotLocalData(localDataManager, 1).get(0); + try (WritableOmSnapshotLocalDataProvider snap = localDataManager.getWritableOmSnapshotLocalData(snapshotId)) { + OmSnapshotLocalData snapshotLocalData = snap.getSnapshotLocalData(); + assertNull(snapshotLocalData.getTransactionInfo()); + snap.setTransactionInfo(transactionInfo); + snap.commit(); + } + + try (ReadableOmSnapshotLocalDataProvider snap = localDataManager.getOmSnapshotLocalData(snapshotId)) { + OmSnapshotLocalData snapshotLocalData = snap.getSnapshotLocalData(); + assertEquals(transactionInfo, snapshotLocalData.getTransactionInfo()); + } + } + + @Test + public void testAddVersionFromRDB() throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + List snapshotIds = createSnapshotLocalData(localDataManager, 2); + addVersionsToLocalData(localDataManager, snapshotIds.get(0), ImmutableMap.of(4, 5, 6, 8)); + UUID snapId = snapshotIds.get(1); + List newVersionSstFiles = + Lists.newArrayList(createMockLiveFileMetaData("file5.sst", KEY_TABLE, "key1", "key2"), + createMockLiveFileMetaData("file6.sst", FILE_TABLE, "key1", "key2"), + createMockLiveFileMetaData("file7.sst", KEY_TABLE, "key1", "key2"), + createMockLiveFileMetaData("file1.sst", "col1", "key1", "key2")); + try (WritableOmSnapshotLocalDataProvider snap = + localDataManager.getWritableOmSnapshotLocalData(snapId)) { + mockSnapshotStore(snapId, newVersionSstFiles); + snap.addSnapshotVersion(snapshotStore); + snap.commit(); + } + validateVersions(localDataManager, snapId, 1, Sets.newHashSet(0, 1)); + try (ReadableOmSnapshotLocalDataProvider snap = localDataManager.getOmSnapshotLocalData(snapId)) { + OmSnapshotLocalData snapshotLocalData = snap.getSnapshotLocalData(); + OmSnapshotLocalData.VersionMeta versionMeta = snapshotLocalData.getVersionSstFileInfos().get(1); + assertEquals(6, versionMeta.getPreviousSnapshotVersion()); + List expectedLiveFileMetaData = + newVersionSstFiles.subList(0, 3).stream().map(SstFileInfo::new).collect(Collectors.toList()); + assertEquals(expectedLiveFileMetaData, versionMeta.getSstFiles()); + } + } + + private void validateVersions(OmSnapshotLocalDataManager snapshotLocalDataManager, UUID snapId, int expectedVersion, + Set expectedVersions) throws IOException { + try (ReadableOmSnapshotLocalDataProvider snap = snapshotLocalDataManager.getOmSnapshotLocalData(snapId)) { + assertEquals(expectedVersion, snap.getSnapshotLocalData().getVersion()); + assertEquals(expectedVersions, snap.getSnapshotLocalData().getVersionSstFileInfos().keySet()); + } + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testWriteWithChainUpdate(boolean previousSnapshotExisting) throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + List snapshotIds = createSnapshotLocalData(localDataManager, 3 + (previousSnapshotExisting ? 1 : 0)); + int snapshotIdx = 1 + (previousSnapshotExisting ? 1 : 0); + for (UUID snapshotId : snapshotIds) { + addVersionsToLocalData(localDataManager, snapshotId, ImmutableMap.of(1, 1)); + } + + UUID snapshotId = snapshotIds.get(snapshotIdx); + UUID toUpdatePreviousSnapshotId = snapshotIdx - 2 >= 0 ? snapshotIds.get(snapshotIdx - 2) : null; + + try (WritableOmSnapshotLocalDataProvider snap = + localDataManager.getWritableOmSnapshotLocalData(snapshotId, toUpdatePreviousSnapshotId)) { + assertFalse(snap.needsDefrag()); + snap.commit(); + assertTrue(snap.needsDefrag()); + } + try (ReadableOmSnapshotLocalDataProvider snap = + localDataManager.getOmSnapshotLocalData(snapshotId)) { + assertEquals(toUpdatePreviousSnapshotId, snap.getSnapshotLocalData().getPreviousSnapshotId()); + assertTrue(snap.needsDefrag()); + } + } + + /** + * Validates write-time version propagation and removal rules when the previous + * snapshot already has a concrete version recorded. + * + * Test flow: + * 1) Create two snapshots in a chain: {@code prevSnapId -> snapId}. + * 2) For {@code prevSnapId}: set {@code version=3} and add SST metadata for version {@code 0}; commit. + * 3) For {@code snapId}: set {@code version=4} and add SST metadata for version {@code 4}; commit. + * After commit, versions resolve to {@code prev.version=4} and {@code snap.version=5}, and their + * version maps are {@code {0,4}} and {@code {0,5}} respectively (base version 0 plus the current one). + * 4) If {@code nextVersionExisting} is {@code true}: + * - Attempt to remove version {@code 4} from {@code prevSnapId}; expect {@link IOException} because + * the successor snapshot still exists at version {@code 5} and depends on {@code prevSnapId}. + * - Validate that versions and version maps remain unchanged. + * Else ({@code false}): + * - Remove version {@code 5} from {@code snapId} and commit, then remove version {@code 4} from + * {@code prevSnapId} and commit. + * - Validate that both snapshots now only contain the base version {@code 0}. + * + * This ensures a snapshot cannot drop a version that still has a dependent successor, and that removals + * are allowed only after dependents are cleared. + * + * @param nextVersionExisting whether the successor snapshot's version still exists ({@code true}) or is + * removed first ({@code false}) + * @throws IOException if commit validation fails as expected in the protected case + */ + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testWriteVersionValidation(boolean nextVersionExisting) throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + List snapshotIds = createSnapshotLocalData(localDataManager, 3); + UUID prevSnapId = snapshotIds.get(0); + UUID snapId = snapshotIds.get(1); + UUID nextSnapId = snapshotIds.get(2); + addVersionsToLocalData(localDataManager, prevSnapId, ImmutableMap.of(4, 1)); + addVersionsToLocalData(localDataManager, snapId, ImmutableMap.of(5, 4)); + addVersionsToLocalData(localDataManager, nextSnapId, ImmutableMap.of(6, 0)); + + validateVersions(localDataManager, snapId, 5, Sets.newHashSet(0, 5)); + validateVersions(localDataManager, prevSnapId, 4, Sets.newHashSet(0, 4)); + + if (nextVersionExisting) { + try (WritableOmSnapshotLocalDataProvider prevSnap = localDataManager.getWritableOmSnapshotLocalData(prevSnapId)) { + prevSnap.removeVersion(4); + IOException ex = assertThrows(IOException.class, prevSnap::commit); + assertTrue(ex.getMessage().contains("Cannot remove Snapshot " + prevSnapId + " with version : 4 since it " + + "still has predecessors")); + } + validateVersions(localDataManager, snapId, 5, Sets.newHashSet(0, 5)); + validateVersions(localDataManager, prevSnapId, 4, Sets.newHashSet(0, 4)); + } else { + try (WritableOmSnapshotLocalDataProvider snap = localDataManager.getWritableOmSnapshotLocalData(snapId)) { + snap.removeVersion(5); + snap.commit(); + } + + try (WritableOmSnapshotLocalDataProvider prevSnap = localDataManager.getWritableOmSnapshotLocalData(prevSnapId)) { + prevSnap.removeVersion(4); + prevSnap.commit(); + } + validateVersions(localDataManager, snapId, 5, Sets.newHashSet(0)); + validateVersions(localDataManager, prevSnapId, 4, Sets.newHashSet(0)); + // Check next snapshot is able to resolve to previous snapshot. + try (ReadableOmSnapshotLocalDataProvider nextSnap = localDataManager.getOmSnapshotLocalData(nextSnapId, + prevSnapId)) { + OmSnapshotLocalData snapshotLocalData = nextSnap.getSnapshotLocalData(); + assertEquals(prevSnapId, snapshotLocalData.getPreviousSnapshotId()); + snapshotLocalData.getVersionSstFileInfos() + .forEach((version, versionMeta) -> { + assertEquals(0, versionMeta.getPreviousSnapshotVersion()); + }); + } + } + } + + private void addVersionsToLocalData(OmSnapshotLocalDataManager snapshotLocalDataManager, + UUID snapId, Map versionMap) throws IOException { + try (WritableOmSnapshotLocalDataProvider snap = snapshotLocalDataManager.getWritableOmSnapshotLocalData(snapId)) { + OmSnapshotLocalData snapshotLocalData = snap.getSnapshotLocalData(); + for (Map.Entry version : versionMap.entrySet().stream() + .sorted(Map.Entry.comparingByKey()).collect(Collectors.toList())) { + snapshotLocalData.setVersion(version.getKey() - 1); + snapshotLocalData.addVersionSSTFileInfos(ImmutableList.of(createMockLiveFileMetaData("file" + version + + ".sst", KEY_TABLE, "key1", "key2")), version.getValue()); + } + mockSnapshotStore(snapId, ImmutableList.of(createMockLiveFileMetaData("file" + + snapshotLocalData.getVersion() + 1 + ".sst", KEY_TABLE, "key1", "key2"))); + snap.addSnapshotVersion(snapshotStore); + snap.removeVersion(snapshotLocalData.getVersion()); + snapshotLocalData.setVersion(snapshotLocalData.getVersion() - 1); + snap.commit(); + } + try (ReadableOmSnapshotLocalDataProvider snap = snapshotLocalDataManager.getOmSnapshotLocalData(snapId)) { + OmSnapshotLocalData snapshotLocalData = snap.getSnapshotLocalData(); + for (int version : versionMap.keySet()) { + assertTrue(snapshotLocalData.getVersionSstFileInfos().containsKey(version)); + } + } + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3}) + public void testNeedsDefrag(int previousVersion) throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + List snapshotIds = createSnapshotLocalData(localDataManager, 2); + for (UUID snapshotId : snapshotIds) { + try (ReadableOmSnapshotLocalDataProvider snap = localDataManager.getOmSnapshotLocalData(snapshotId)) { + assertTrue(snap.needsDefrag()); + } + } + addVersionsToLocalData(localDataManager, snapshotIds.get(0), ImmutableMap.of(1, 1, 2, 2, 3, 3)); + try (ReadableOmSnapshotLocalDataProvider snap = localDataManager.getOmSnapshotLocalData(snapshotIds.get(0))) { + assertFalse(snap.needsDefrag()); + } + addVersionsToLocalData(localDataManager, snapshotIds.get(1), ImmutableMap.of(1, 3, 2, previousVersion)); + try (ReadableOmSnapshotLocalDataProvider snap = localDataManager.getOmSnapshotLocalData(snapshotIds.get(1))) { + assertEquals(previousVersion < snap.getPreviousSnapshotLocalData().getVersion(), snap.needsDefrag()); + } + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testVersionResolution(boolean read) throws IOException { + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + List snapshotIds = createSnapshotLocalData(localDataManager, 5); + List> versionMaps = Arrays.asList( + ImmutableMap.of(4, 1, 6, 3, 8, 9, 11, 15), + ImmutableMap.of(5, 4, 6, 8, 10, 11), + ImmutableMap.of(1, 5, 3, 5, 8, 10), + ImmutableMap.of(1, 1, 2, 3, 5, 8), + ImmutableMap.of(1, 1, 11, 2, 20, 5, 30, 2) + ); + for (int i = 0; i < snapshotIds.size(); i++) { + addVersionsToLocalData(localDataManager, snapshotIds.get(i), versionMaps.get(i)); + } + for (int start = 0; start < snapshotIds.size(); start++) { + for (int end = start + 1; end < snapshotIds.size(); end++) { + UUID prevSnapId = snapshotIds.get(start); + UUID snapId = snapshotIds.get(end); + Map versionMap = new HashMap<>(versionMaps.get(end)); + versionMap.put(0, 0); + for (int idx = end - 1; idx > start; idx--) { + for (Map.Entry version : versionMap.entrySet()) { + version.setValue(versionMaps.get(idx).getOrDefault(version.getValue(), 0)); + } + } + try (ReadableOmSnapshotLocalDataProvider snap = read ? + localDataManager.getOmSnapshotLocalData(snapId, prevSnapId) : + localDataManager.getWritableOmSnapshotLocalData(snapId, prevSnapId)) { + OmSnapshotLocalData snapshotLocalData = snap.getSnapshotLocalData(); + OmSnapshotLocalData prevSnapshotLocalData = snap.getPreviousSnapshotLocalData(); + assertEquals(prevSnapshotLocalData.getSnapshotId(), snapshotLocalData.getPreviousSnapshotId()); + assertEquals(prevSnapId, snapshotLocalData.getPreviousSnapshotId()); + assertEquals(snapId, snapshotLocalData.getSnapshotId()); + assertTrue(snapshotLocalData.getVersionSstFileInfos().size() > 1); + snapshotLocalData.getVersionSstFileInfos() + .forEach((version, versionMeta) -> { + assertEquals(versionMap.get(version), versionMeta.getPreviousSnapshotVersion()); + }); + } + } + } + } + @Test public void testConstructor() throws IOException { localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); @@ -147,7 +637,7 @@ public void testGetSnapshotLocalPropertyYamlPathWithSnapshotInfo() throws IOExce SnapshotInfo snapshotInfo = createMockSnapshotInfo(snapshotId, null); localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); - + File yamlPath = new File(localDataManager.getSnapshotLocalPropertyYamlPath(snapshotInfo)); assertNotNull(yamlPath); Path expectedYamlPath = Paths.get(snapshotsDir.getAbsolutePath(), "db" + OM_SNAPSHOT_SEPARATOR + snapshotId @@ -155,14 +645,61 @@ public void testGetSnapshotLocalPropertyYamlPathWithSnapshotInfo() throws IOExce assertEquals(expectedYamlPath.toAbsolutePath().toString(), yamlPath.getAbsolutePath()); } + @Test + public void testCreateNewSnapshotLocalYaml() throws IOException { + UUID snapshotId = UUID.randomUUID(); + SnapshotInfo snapshotInfo = createMockSnapshotInfo(snapshotId, null); + + Map> expNotDefraggedSSTFileList = new TreeMap<>(); + OmSnapshotLocalData.VersionMeta notDefraggedVersionMeta = new OmSnapshotLocalData.VersionMeta(0, + ImmutableList.of(new SstFileInfo("dt1", "k1", "k2", DIRECTORY_TABLE), + new SstFileInfo("dt2", "k1", "k2", DIRECTORY_TABLE), + new SstFileInfo("ft1", "k1", "k2", FILE_TABLE), + new SstFileInfo("ft2", "k1", "k2", FILE_TABLE), + new SstFileInfo("kt1", "k1", "k2", KEY_TABLE), + new SstFileInfo("kt2", "k1", "k2", KEY_TABLE))); + expNotDefraggedSSTFileList.put(KEY_TABLE, Stream.of("kt1", "kt2").collect(Collectors.toList())); + expNotDefraggedSSTFileList.put(FILE_TABLE, Stream.of("ft1", "ft2").collect(Collectors.toList())); + expNotDefraggedSSTFileList.put(DIRECTORY_TABLE, Stream.of("dt1", "dt2").collect(Collectors.toList())); + + List mockedLiveFiles = new ArrayList<>(); + for (Map.Entry> entry : expNotDefraggedSSTFileList.entrySet()) { + String cfname = entry.getKey(); + for (String fname : entry.getValue()) { + mockedLiveFiles.add(createMockLiveFileMetaData("/" + fname + ".sst", cfname, "k1", "k2")); + } + } + // Add some other column families and files that should be ignored + mockedLiveFiles.add(createMockLiveFileMetaData("ot1.sst", "otherTable", "k1", "k2")); + mockedLiveFiles.add(createMockLiveFileMetaData("ot2.sst", "otherTable", "k1", "k2")); + + mockSnapshotStore(snapshotId, mockedLiveFiles); + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); + Path snapshotYaml = Paths.get(localDataManager.getSnapshotLocalPropertyYamlPath(snapshotInfo)); + // Create an existing YAML file for the snapshot + assertTrue(snapshotYaml.toFile().createNewFile()); + assertEquals(0, Files.size(snapshotYaml)); + // Create a new YAML file for the snapshot + localDataManager.createNewOmSnapshotLocalDataFile(snapshotStore, snapshotInfo); + // Verify that previous file was overwritten + assertTrue(Files.exists(snapshotYaml)); + assertTrue(Files.size(snapshotYaml) > 0); + // Verify the contents of the YAML file + OmSnapshotLocalData localData = localDataManager.getOmSnapshotLocalData(snapshotYaml.toFile()); + assertNotNull(localData); + assertEquals(0, localData.getVersion()); + assertEquals(notDefraggedVersionMeta, localData.getVersionSstFileInfos().get(0)); + assertFalse(localData.getSstFiltered()); + assertEquals(0L, localData.getLastDefragTime()); + assertTrue(localData.getNeedsDefrag()); + assertEquals(1, localData.getVersionSstFileInfos().size()); + } + @Test public void testCreateNewOmSnapshotLocalDataFile() throws IOException { UUID snapshotId = UUID.randomUUID(); SnapshotInfo snapshotInfo = createMockSnapshotInfo(snapshotId, null); - // Setup snapshot store mock - File snapshotDbLocation = OmSnapshotManager.getSnapshotPath(omMetadataManager, snapshotId).toFile(); - assertTrue(snapshotDbLocation.exists() || snapshotDbLocation.mkdirs()); List sstFiles = new ArrayList<>(); sstFiles.add(createMockLiveFileMetaData("file1.sst", KEY_TABLE, "key1", "key7")); @@ -172,21 +709,27 @@ public void testCreateNewOmSnapshotLocalDataFile() throws IOException { sstFiles.add(createMockLiveFileMetaData("file5.sst", DIRECTORY_TABLE, "key1", "key7")); sstFiles.add(createMockLiveFileMetaData("file6.sst", "colFamily1", "key1", "key7")); List sstFileInfos = IntStream.range(0, sstFiles.size() - 1) - .mapToObj(sstFiles::get).map(SstFileInfo::new).collect(Collectors.toList()); - when(snapshotStore.getDbLocation()).thenReturn(snapshotDbLocation); - RocksDatabase rocksDatabase = mock(RocksDatabase.class); - when(snapshotStore.getDb()).thenReturn(rocksDatabase); - when(rocksDatabase.getLiveFilesMetaData()).thenReturn(sstFiles); + .mapToObj(sstFiles::get).map(lfm -> + new SstFileInfo(lfm.fileName().replace(".sst", ""), + bytes2String(lfm.smallestKey()), + bytes2String(lfm.largestKey()), bytes2String(lfm.columnFamilyName()))).collect(Collectors.toList()); + mockSnapshotStore(snapshotId, sstFiles); + localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); - + localDataManager.createNewOmSnapshotLocalDataFile(snapshotStore, snapshotInfo); // Verify file was created - OmSnapshotLocalData snapshotLocalData = localDataManager.getOmSnapshotLocalData(snapshotId); - assertEquals(1, snapshotLocalData.getVersionSstFileInfos().size()); - OmSnapshotLocalData.VersionMeta versionMeta = snapshotLocalData.getVersionSstFileInfos().get(0); - OmSnapshotLocalData.VersionMeta expectedVersionMeta = new OmSnapshotLocalData.VersionMeta(0, sstFileInfos); - assertEquals(expectedVersionMeta, versionMeta); + OmSnapshotLocalData.VersionMeta versionMeta; + try (ReadableOmSnapshotLocalDataProvider snapshotLocalData = localDataManager.getOmSnapshotLocalData(snapshotId)) { + assertEquals(1, snapshotLocalData.getSnapshotLocalData().getVersionSstFileInfos().size()); + versionMeta = snapshotLocalData.getSnapshotLocalData().getVersionSstFileInfos().get(0); + OmSnapshotLocalData.VersionMeta expectedVersionMeta = + new OmSnapshotLocalData.VersionMeta(0, sstFileInfos); + assertEquals(expectedVersionMeta, versionMeta); + // New Snapshot create needs to be defragged always. + assertTrue(snapshotLocalData.needsDefrag()); + } } @Test @@ -198,16 +741,16 @@ public void testGetOmSnapshotLocalDataWithSnapshotInfo() throws IOException { OmSnapshotLocalData localData = createMockLocalData(snapshotId, null); localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); - + // Write the file manually for testing Path yamlPath = Paths.get(localDataManager.getSnapshotLocalPropertyYamlPath(snapshotInfo.getSnapshotId())); writeLocalDataToFile(localData, yamlPath); // Test retrieval - OmSnapshotLocalData retrieved = localDataManager.getOmSnapshotLocalData(snapshotInfo); - - assertNotNull(retrieved); - assertEquals(snapshotId, retrieved.getSnapshotId()); + try (ReadableOmSnapshotLocalDataProvider retrieved = localDataManager.getOmSnapshotLocalData(snapshotInfo)) { + assertNotNull(retrieved.getSnapshotLocalData()); + assertEquals(snapshotId, retrieved.getSnapshotLocalData().getSnapshotId()); + } } @Test @@ -219,7 +762,7 @@ public void testGetOmSnapshotLocalDataWithMismatchedSnapshotId() throws IOExcept OmSnapshotLocalData localData = createMockLocalData(wrongSnapshotId, null); localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); - + Path yamlPath = Paths.get(localDataManager.getSnapshotLocalPropertyYamlPath(snapshotId)); writeLocalDataToFile(localData, yamlPath); // Should throw IOException due to mismatched IDs @@ -235,7 +778,7 @@ public void testGetOmSnapshotLocalDataWithFile() throws IOException { OmSnapshotLocalData localData = createMockLocalData(snapshotId, null); localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); - + Path yamlPath = tempDir.resolve("test-snapshot.yaml"); writeLocalDataToFile(localData, yamlPath); @@ -269,7 +812,7 @@ public void testAddVersionNodeWithDependentsAlreadyExists() throws IOException { createSnapshotLocalDataFile(snapshotId, null); localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); - + OmSnapshotLocalData localData = createMockLocalData(snapshotId, null); // First addition @@ -291,7 +834,7 @@ public void testInitWithExistingYamlFiles() throws IOException { // Initialize - should load existing files localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); - + assertNotNull(localDataManager); Map versionMap = localDataManager.getVersionNodeMap(); @@ -317,7 +860,7 @@ public void testInitWithInvalidPathThrowsException() throws IOException { @Test public void testClose() throws IOException { localDataManager = new OmSnapshotLocalDataManager(omMetadataManager); - + // Should not throw exception localDataManager.close(); } @@ -352,7 +895,7 @@ private OmSnapshotLocalData createMockLocalData(UUID snapshotId, UUID previousSn sstFiles.add(createMockLiveFileMetaData("file2.sst", "columnFamily1", "key3", "key10")); sstFiles.add(createMockLiveFileMetaData("file3.sst", "columnFamily2", "key1", "key8")); sstFiles.add(createMockLiveFileMetaData("file4.sst", "columnFamily2", "key0", "key10")); - return new OmSnapshotLocalData(snapshotId, sstFiles, previousSnapshotId); + return new OmSnapshotLocalData(snapshotId, sstFiles, previousSnapshotId, null); } private void createSnapshotLocalDataFile(UUID snapshotId, UUID previousSnapshotId) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java index 9406d74c5ff6..e3de9653f1fd 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java @@ -173,7 +173,7 @@ public void testGetHoldsReadLock(int numberOfLocks) throws IOException { @ParameterizedTest @ValueSource(ints = {0, 1, 5, 10}) @DisplayName("Tests lock() holds a write lock") - public void testGetHoldsWriteLock(int numberOfLocks) { + public void testLockHoldsWriteLock(int numberOfLocks) { clearInvocations(lock); for (int i = 0; i < numberOfLocks; i++) { snapshotCache.lock(); @@ -181,6 +181,18 @@ public void testGetHoldsWriteLock(int numberOfLocks) { verify(lock, times(numberOfLocks)).acquireResourceWriteLock(eq(SNAPSHOT_DB_LOCK)); } + @ParameterizedTest + @ValueSource(ints = {0, 1, 5, 10}) + @DisplayName("Tests lock(snapshotId) holds a write lock") + public void testLockHoldsWriteLockSnapshotId(int numberOfLocks) { + clearInvocations(lock); + UUID snapshotId = UUID.randomUUID(); + for (int i = 0; i < numberOfLocks; i++) { + snapshotCache.lock(snapshotId); + } + verify(lock, times(numberOfLocks)).acquireWriteLock(eq(SNAPSHOT_DB_LOCK), eq(snapshotId.toString())); + } + @Test @DisplayName("get() same entry twice yields one cache entry only") void testGetTwice() throws IOException { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotChain.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotChain.java index 3f53a66f4f95..e62b64893254 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotChain.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotChain.java @@ -93,7 +93,6 @@ private SnapshotInfo createSnapshotInfo(UUID snapshotID, .setPathPreviousSnapshotId(pathPrevID) .setGlobalPreviousSnapshotId(globalPrevID) .setSnapshotPath(String.join("/", "vol1", "bucket1")) - .setCheckpointDir("checkpoint.testdir") .build(); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java index 0ea625a0e064..ec896cb3dda3 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java @@ -289,7 +289,6 @@ public void init() throws RocksDBException, IOException, ExecutionException { .setBucketName(BUCKET_NAME) .setName(baseSnapshotName) .setSnapshotPath(snapshotPath) - .setCheckpointDir(snapshotCheckpointDir) .build(); for (JobStatus jobStatus : jobStatuses) { @@ -302,7 +301,6 @@ public void init() throws RocksDBException, IOException, ExecutionException { .setBucketName(BUCKET_NAME) .setName(targetSnapshotName) .setSnapshotPath(snapshotPath) - .setCheckpointDir(snapshotCheckpointDir) .build(); SnapshotDiffJob diffJob = new SnapshotDiffJob(System.currentTimeMillis(), @@ -1395,7 +1393,6 @@ public void testThreadPoolIsFull(String description, .setBucketName(BUCKET_NAME) .setName(snapshotName) .setSnapshotPath("fromSnapshotPath") - .setCheckpointDir("fromSnapshotCheckpointDir") .build(); snapshotInfos.add(snapInfo); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotInfo.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotInfo.java index ca27d9bc8938..a39d907038fb 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotInfo.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotInfo.java @@ -75,7 +75,6 @@ private SnapshotInfo createSnapshotInfo() { .setPathPreviousSnapshotId(EXPECTED_PREVIOUS_SNAPSHOT_ID) .setGlobalPreviousSnapshotId(EXPECTED_PREVIOUS_SNAPSHOT_ID) .setSnapshotPath("test/path") - .setCheckpointDir("checkpoint.testdir") .build(); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java index d9e81693dd8d..9c6f033b907b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java @@ -26,6 +26,7 @@ import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.framework; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; import java.io.File; @@ -148,8 +149,8 @@ public void baseSetup() throws Exception { testDir.getAbsolutePath()); ozoneConfiguration.set(OzoneConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); - omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration, - ozoneManager); + omMetadataManager = spy(new OmMetadataManagerImpl(ozoneConfiguration, + ozoneManager)); when(ozoneManager.getConfiguration()).thenReturn(ozoneConfiguration); when(ozoneManager.resolveBucketLink(any(Pair.class), any(OMClientRequest.class))) .thenAnswer(i -> new ResolvedBucket(i.getArgument(0), @@ -225,7 +226,7 @@ protected Path createSnapshotCheckpoint(String volume, String bucket, String sna RDBStore store = (RDBStore) omMetadataManager.getStore(); String checkpointPrefix = store.getDbLocation().getName(); Path snapshotDirPath = Paths.get(store.getSnapshotsParentDir(), - checkpointPrefix + snapshotInfo.getCheckpointDir()); + checkpointPrefix + SnapshotInfo.getCheckpointDirName(snapshotInfo.getSnapshotId(), 0)); // Check the DB is still there assertTrue(Files.exists(snapshotDirPath)); return snapshotDirPath; @@ -276,5 +277,4 @@ protected List>> getDeletedDirKeys(String volume, S }) .collect(Collectors.toList()); } - } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSstFilteringService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSstFilteringService.java index e523f32ef7e2..108dd30c8222 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSstFilteringService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSstFilteringService.java @@ -219,7 +219,7 @@ public void testIrrelevantSstFileDeletion() .get(SnapshotInfo.getTableKey(volumeName, bucketName2, snapshotName1)); String snapshotDirName = - OmSnapshotManager.getSnapshotPath(conf, snapshotInfo); + OmSnapshotManager.getSnapshotPath(conf, snapshotInfo, 0); for (LiveFileMetaData file : allFiles) { //Skipping the previous files from this check even those also works. @@ -294,11 +294,11 @@ public void testActiveAndDeletedSnapshotCleanup() throws Exception { SnapshotInfo snapshot1Info = om.getMetadataManager().getSnapshotInfoTable() .get(SnapshotInfo.getTableKey(volumeName, bucketNames.get(0), "snap1")); File snapshot1Dir = - new File(OmSnapshotManager.getSnapshotPath(conf, snapshot1Info)); + new File(OmSnapshotManager.getSnapshotPath(conf, snapshot1Info, 0)); SnapshotInfo snapshot2Info = om.getMetadataManager().getSnapshotInfoTable() .get(SnapshotInfo.getTableKey(volumeName, bucketNames.get(0), "snap2")); File snapshot2Dir = - new File(OmSnapshotManager.getSnapshotPath(conf, snapshot2Info)); + new File(OmSnapshotManager.getSnapshotPath(conf, snapshot2Info, 0)); File snap1Current = new File(snapshot1Dir, "CURRENT"); File snap2Current = new File(snapshot2Dir, "CURRENT"); diff --git a/hadoop-ozone/ozonefs-common/pom.xml b/hadoop-ozone/ozonefs-common/pom.xml index 8483ec4cfda5..aecaa66cd4c0 100644 --- a/hadoop-ozone/ozonefs-common/pom.xml +++ b/hadoop-ozone/ozonefs-common/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../../hadoop-hdds/hadoop-dependency-client ozone-filesystem-common - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone FileSystem Common diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java index 67a252e69568..53c87cfe6111 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java @@ -139,13 +139,6 @@ public void initialize(URI name, Configuration conf) throws IOException { listingPageSize = OzoneClientUtils.limitValue(listingPageSize, OZONE_FS_LISTING_PAGE_SIZE, OZONE_FS_MAX_LISTING_PAGE_SIZE); - isRatisStreamingEnabled = conf.getBoolean( - OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED, - OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED_DEFAULT); - streamingAutoThreshold = (int) OzoneConfiguration.of(conf).getStorageSize( - OzoneConfigKeys.OZONE_FS_DATASTREAM_AUTO_THRESHOLD, - OzoneConfigKeys.OZONE_FS_DATASTREAM_AUTO_THRESHOLD_DEFAULT, - StorageUnit.BYTES); setConf(conf); Preconditions.checkNotNull(name.getScheme(), "No scheme provided in %s", name); @@ -193,6 +186,13 @@ public void initialize(URI name, Configuration conf) throws IOException { LOG.trace("Ozone URI for ozfs initialization is {}", uri); ConfigurationSource source = getConfSource(); + isRatisStreamingEnabled = source.getBoolean( + OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED, + OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED_DEFAULT); + streamingAutoThreshold = (int) source.getStorageSize( + OzoneConfigKeys.OZONE_FS_DATASTREAM_AUTO_THRESHOLD, + OzoneConfigKeys.OZONE_FS_DATASTREAM_AUTO_THRESHOLD_DEFAULT, + StorageUnit.BYTES); this.hsyncEnabled = OzoneFSUtils.canEnableHsync(source, true); LOG.debug("hsyncEnabled = {}", hsyncEnabled); this.adapter = diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneFileSystem.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneFileSystem.java index ca717f7a8765..04d2af5868a7 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneFileSystem.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneFileSystem.java @@ -145,13 +145,6 @@ public void initialize(URI name, Configuration conf) throws IOException { listingPageSize = OzoneClientUtils.limitValue(listingPageSize, OZONE_FS_LISTING_PAGE_SIZE, OZONE_FS_MAX_LISTING_PAGE_SIZE); - isRatisStreamingEnabled = conf.getBoolean( - OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED, - OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED_DEFAULT); - streamingAutoThreshold = (int) OzoneConfiguration.of(conf).getStorageSize( - OzoneConfigKeys.OZONE_FS_DATASTREAM_AUTO_THRESHOLD, - OzoneConfigKeys.OZONE_FS_DATASTREAM_AUTO_THRESHOLD_DEFAULT, - StorageUnit.BYTES); setConf(conf); Preconditions.checkNotNull(name.getScheme(), "No scheme provided in %s", name); @@ -207,6 +200,13 @@ public void initialize(URI name, Configuration conf) throws IOException { throw new IOException(msg, ue); } ozoneConfiguration = OzoneConfiguration.of(getConfSource()); + isRatisStreamingEnabled = ozoneConfiguration.getBoolean( + OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED, + OzoneConfigKeys.OZONE_FS_DATASTREAM_ENABLED_DEFAULT); + streamingAutoThreshold = (int) ozoneConfiguration.getStorageSize( + OzoneConfigKeys.OZONE_FS_DATASTREAM_AUTO_THRESHOLD, + OzoneConfigKeys.OZONE_FS_DATASTREAM_AUTO_THRESHOLD_DEFAULT, + StorageUnit.BYTES); } protected OzoneClientAdapter createAdapter(ConfigurationSource conf, diff --git a/hadoop-ozone/ozonefs-hadoop2/pom.xml b/hadoop-ozone/ozonefs-hadoop2/pom.xml index 2e1ea5ae9f2f..d204061df350 100644 --- a/hadoop-ozone/ozonefs-hadoop2/pom.xml +++ b/hadoop-ozone/ozonefs-hadoop2/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-filesystem-hadoop2 - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone FS Hadoop 2.x compatibility diff --git a/hadoop-ozone/ozonefs-hadoop3-client/pom.xml b/hadoop-ozone/ozonefs-hadoop3-client/pom.xml index 2871f5122455..24086c3750cd 100644 --- a/hadoop-ozone/ozonefs-hadoop3-client/pom.xml +++ b/hadoop-ozone/ozonefs-hadoop3-client/pom.xml @@ -17,7 +17,7 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-filesystem-hadoop3-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone FS Hadoop shaded 3.x compatibility diff --git a/hadoop-ozone/ozonefs-hadoop3/pom.xml b/hadoop-ozone/ozonefs-hadoop3/pom.xml index 32a21fb4e165..6d236f54ff24 100644 --- a/hadoop-ozone/ozonefs-hadoop3/pom.xml +++ b/hadoop-ozone/ozonefs-hadoop3/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-filesystem-hadoop3 - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone FS Hadoop 3.x compatibility diff --git a/hadoop-ozone/ozonefs-shaded/pom.xml b/hadoop-ozone/ozonefs-shaded/pom.xml index 7101c56ef856..5e7ec27b8438 100644 --- a/hadoop-ozone/ozonefs-shaded/pom.xml +++ b/hadoop-ozone/ozonefs-shaded/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-filesystem-shaded - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone FileSystem Shaded diff --git a/hadoop-ozone/ozonefs/pom.xml b/hadoop-ozone/ozonefs/pom.xml index e260d4df7968..5b304cac3f33 100644 --- a/hadoop-ozone/ozonefs/pom.xml +++ b/hadoop-ozone/ozonefs/pom.xml @@ -17,11 +17,11 @@ org.apache.ozone hdds-hadoop-dependency-client - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ../../hadoop-hdds/hadoop-dependency-client ozone-filesystem - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone FileSystem diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index b7e4e8428611..32041971cbeb 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone-main - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT pom Apache Ozone Apache Ozone Project diff --git a/hadoop-ozone/recon-codegen/pom.xml b/hadoop-ozone/recon-codegen/pom.xml index 90041224b8a5..58871f098898 100644 --- a/hadoop-ozone/recon-codegen/pom.xml +++ b/hadoop-ozone/recon-codegen/pom.xml @@ -17,7 +17,7 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-reconcodegen Apache Ozone Recon CodeGen diff --git a/hadoop-ozone/recon/pom.xml b/hadoop-ozone/recon/pom.xml index 2beaa42898d3..9a0936ebf194 100644 --- a/hadoop-ozone/recon/pom.xml +++ b/hadoop-ozone/recon/pom.xml @@ -17,7 +17,7 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-recon Apache Ozone Recon diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index b6b7e3cf5b41..57067c421344 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -85,6 +85,7 @@ import org.apache.hadoop.hdds.scm.ha.SCMNodeDetails; import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; import org.apache.hadoop.hdds.scm.metadata.SCMDBTransactionBufferImpl; +import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore; import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl; import org.apache.hadoop.hdds.scm.node.DeadNodeHandler; @@ -693,6 +694,21 @@ public ReconfigurationHandler getReconfigurationHandler() { return null; } + @Override + public SCMMetadataStore getScmMetadataStore() { + return null; + } + + @Override + public SCMHAManager getScmHAManager() { + return null; + } + + @Override + public SequenceIdGenerator getSequenceIdGen() { + return null; + } + public DBStore getScmDBStore() { return dbStore; } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java index faf2008ec6b5..e83fc64ad4cc 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java @@ -127,7 +127,8 @@ protected void handlePutDirEvent(OmDirectoryInfo directoryInfo, long objectId = directoryInfo.getObjectID(); // write the dir name to the current directory String dirName = directoryInfo.getName(); - // Try to get the NSSummary from our local map that maps NSSummaries to IDs + + // Get or create the directory's NSSummary NSSummary curNSSummary = nsSummaryMap.get(objectId); if (curNSSummary == null) { // If we don't have it in this batch we try to get it from the DB @@ -140,34 +141,32 @@ protected void handlePutDirEvent(OmDirectoryInfo directoryInfo, int existingNumOfFiles = directoryAlreadyExists ? curNSSummary.getNumOfFiles() : 0; long existingReplicatedSizeOfFiles = directoryAlreadyExists ? curNSSummary.getReplicatedSizeOfFiles() : 0; - if (curNSSummary == null) { - // If we don't have it locally and in the DB we create a new instance - // as this is a new ID + if (!directoryAlreadyExists) { curNSSummary = new NSSummary(); } curNSSummary.setDirName(dirName); - // Set the parent directory ID curNSSummary.setParentId(parentObjectId); nsSummaryMap.put(objectId, curNSSummary); - // Write the child dir list to the parent directory - // Try to get the NSSummary from our local map that maps NSSummaries to IDs - NSSummary nsSummary = nsSummaryMap.get(parentObjectId); - if (nsSummary == null) { - // If we don't have it in this batch we try to get it from the DB - nsSummary = reconNamespaceSummaryManager.getNSSummary(parentObjectId); + // Get or create the parent's NSSummary + NSSummary parentNSSummary = nsSummaryMap.get(parentObjectId); + if (parentNSSummary == null) { + parentNSSummary = reconNamespaceSummaryManager.getNSSummary(parentObjectId); } - if (nsSummary == null) { + if (parentNSSummary == null) { // If we don't have it locally and in the DB we create a new instance // as this is a new ID - nsSummary = new NSSummary(); + parentNSSummary = new NSSummary(); } - nsSummary.addChildDir(objectId); - nsSummaryMap.put(parentObjectId, nsSummary); + + // Add child directory to parent + parentNSSummary.addChildDir(objectId); + nsSummaryMap.put(parentObjectId, parentNSSummary); // If the directory already existed with content, propagate its totals upward + // propagateSizeUpwards will update parent, grandparent, etc. if (directoryAlreadyExists && (existingSizeOfFiles > 0 || existingNumOfFiles > 0)) { - propagateSizeUpwards(parentObjectId, existingSizeOfFiles, + propagateSizeUpwards(objectId, existingSizeOfFiles, existingReplicatedSizeOfFiles, existingNumOfFiles, nsSummaryMap); } } @@ -233,32 +232,25 @@ protected void handleDeleteDirEvent(OmDirectoryInfo directoryInfo, return; } - // If deleted directory exists, decrement its totals from parent and propagate + // Remove the deleted directory ID from parent's childDir set + parentNsSummary.removeChildDir(deletedDirObjectId); + nsSummaryMap.put(parentObjectId, parentNsSummary); + + // If deleted directory exists, propagate its totals upward (as negative deltas) + // propagateSizeUpwards will update parent, grandparent, etc. if (deletedDirSummary != null) { - // Decrement parent's totals by the deleted directory's totals - parentNsSummary.setNumOfFiles(parentNsSummary.getNumOfFiles() - deletedDirSummary.getNumOfFiles()); - parentNsSummary.setSizeOfFiles(parentNsSummary.getSizeOfFiles() - deletedDirSummary.getSizeOfFiles()); - long parentReplSize = parentNsSummary.getReplicatedSizeOfFiles(); long deletedReplSize = deletedDirSummary.getReplicatedSizeOfFiles(); - if (parentReplSize >= 0 && deletedReplSize >= 0) { - parentNsSummary.setReplicatedSizeOfFiles(parentReplSize - deletedReplSize); - } - - // Propagate the decrements upwards to all ancestors if (deletedReplSize < 0) { deletedReplSize = 0; } - propagateSizeUpwards(parentObjectId, -deletedDirSummary.getSizeOfFiles(), + + propagateSizeUpwards(deletedDirObjectId, -deletedDirSummary.getSizeOfFiles(), -deletedReplSize, -deletedDirSummary.getNumOfFiles(), nsSummaryMap); // Set the deleted directory's parentId to 0 (unlink it) deletedDirSummary.setParentId(0); nsSummaryMap.put(deletedDirObjectId, deletedDirSummary); } - - // Remove the deleted directory ID from parent's childDir set - parentNsSummary.removeChildDir(deletedDirObjectId); - nsSummaryMap.put(parentObjectId, parentNsSummary); } protected boolean flushAndCommitNSToDB(Map nsSummaryMap) { diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/nuMetadata/nuMetadata.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/nuMetadata/nuMetadata.tsx index bcea9ab40cfb..875defd912ec 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/nuMetadata/nuMetadata.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/nuMetadata/nuMetadata.tsx @@ -125,6 +125,7 @@ const NUMetadata: React.FC = ({ }) => { const [state, setState] = useState([]); const [isProcessingData, setIsProcessingData] = useState(false); + const [pgNumber, setPgNumber] = useState(1); // Individual API calls that resolve together const summaryAPI = useApiData( `/api/v1/namespace/summary?path=${path}`, @@ -359,6 +360,11 @@ const NUMetadata: React.FC = ({ } }, [path, getObjectInfoMapping]); + // Reset pagination when path changes + useEffect(() => { + setPgNumber(1); + }, [path]); + // Coordinate API calls - process data when both calls complete useEffect(() => { if (!summaryAPI.loading && !quotaAPI.loading && @@ -369,12 +375,20 @@ const NUMetadata: React.FC = ({ }, [summaryAPI.loading, quotaAPI.loading, summaryAPI.data, quotaAPI.data, summaryAPI.lastUpdated, quotaAPI.lastUpdated, processMetadata]); + const handleTableChange = (newPagination: any) => { + setPgNumber(newPagination.current); + }; + return ( = () => { keys, pipelines, deletedContainers, + openContainers, omServiceId, scmServiceId } = clusterState.data; @@ -290,7 +291,7 @@ const Overview: React.FC<{}> = () => { lg: 16, xl: 16 }, 20]}> - + = () => { linkToUrl='/Volumes' error={clusterState.error} /> - + = () => { linkToUrl='/Buckets' error={clusterState.error} /> - + = () => { data={keys} error={clusterState.error} /> - + = () => { linkToUrl='/Pipelines' error={clusterState.error} /> - + = () => { data={deletedContainers} error={clusterState.error} /> + + + childDirBucket1 = nsSummaryForBucket1.getChildDir(); // after put dir4, bucket1 now has two child dirs: dir1 and dir4 diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryTaskWithLegacy.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryTaskWithLegacy.java index fc2425cdc4d2..ecd886f276b6 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryTaskWithLegacy.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryTaskWithLegacy.java @@ -185,6 +185,7 @@ public class TestProcess { @BeforeEach public void setUp() throws IOException { + getReconNamespaceSummaryManager().clearNSSummaryTable(); nSSummaryTaskWithLegacy.reprocessWithLegacy(getReconOMMetadataManager()); nSSummaryTaskWithLegacy.processWithLegacy(processEventBatch(), 0); @@ -330,7 +331,7 @@ private OMUpdateEventBatch processEventBatch() throws IOException { public void testProcessUpdateFileSize() throws IOException { // file 1 is gone, so bucket 1 is empty now assertNotNull(nsSummaryForBucket1); - assertEquals(6, nsSummaryForBucket1.getNumOfFiles()); + assertEquals(2, nsSummaryForBucket1.getNumOfFiles()); Set childDirBucket1 = nsSummaryForBucket1.getChildDir(); // after put dir4, bucket1 now has two child dirs: dir1 and dir4 diff --git a/hadoop-ozone/s3-secret-store/pom.xml b/hadoop-ozone/s3-secret-store/pom.xml index d50fc92df44f..28379aeacc78 100644 --- a/hadoop-ozone/s3-secret-store/pom.xml +++ b/hadoop-ozone/s3-secret-store/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-s3-secret-store - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone S3 Secret Store diff --git a/hadoop-ozone/s3gateway/pom.xml b/hadoop-ozone/s3gateway/pom.xml index 9b94f4c42734..af45d3117983 100644 --- a/hadoop-ozone/s3gateway/pom.xml +++ b/hadoop-ozone/s3gateway/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-s3gateway - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone S3 Gateway diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/BucketEndpoint.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/BucketEndpoint.java index 066b31fb7d11..c808f0cce761 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/BucketEndpoint.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/BucketEndpoint.java @@ -28,6 +28,7 @@ import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.NOT_IMPLEMENTED; import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.newError; import static org.apache.hadoop.ozone.s3.util.S3Consts.ENCODING_TYPE; +import static org.apache.hadoop.ozone.s3.util.S3Utils.wrapInQuotes; import com.google.common.annotations.VisibleForTesting; import java.io.IOException; @@ -763,7 +764,7 @@ private void addKey(ListObjectResponse response, OzoneKey next) { keyMetadata.setSize(next.getDataSize()); String eTag = next.getMetadata().get(ETAG); if (eTag != null) { - keyMetadata.setETag(ObjectEndpoint.wrapInQuotes(eTag)); + keyMetadata.setETag(wrapInQuotes(eTag)); } keyMetadata.setStorageClass(S3StorageType.fromReplicationConfig( next.getReplicationConfig()).toString()); diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/ObjectEndpoint.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/ObjectEndpoint.java index 7b8f8e99b490..b495ea346dc1 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/ObjectEndpoint.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/ObjectEndpoint.java @@ -58,9 +58,11 @@ import static org.apache.hadoop.ozone.s3.util.S3Consts.TAG_DIRECTIVE_HEADER; import static org.apache.hadoop.ozone.s3.util.S3Utils.hasMultiChunksPayload; import static org.apache.hadoop.ozone.s3.util.S3Utils.hasUnsignedPayload; +import static org.apache.hadoop.ozone.s3.util.S3Utils.stripQuotes; import static org.apache.hadoop.ozone.s3.util.S3Utils.urlDecode; import static org.apache.hadoop.ozone.s3.util.S3Utils.validateMultiChunksUpload; import static org.apache.hadoop.ozone.s3.util.S3Utils.validateSignatureHeader; +import static org.apache.hadoop.ozone.s3.util.S3Utils.wrapInQuotes; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; @@ -905,7 +907,7 @@ public Response completeMultipartUpload(@PathParam("bucket") String bucket, S3Owner.verifyBucketOwnerCondition(headers, bucket, ozoneBucket.getOwner()); for (CompleteMultipartUploadRequest.Part part : partList) { - partsMap.put(part.getPartNumber(), part.getETag()); + partsMap.put(part.getPartNumber(), stripQuotes(part.getETag())); } if (LOG.isDebugEnabled()) { LOG.debug("Parts map {}", partsMap); @@ -1044,29 +1046,21 @@ private Response createMultipartKey(OzoneVolume volume, OzoneBucket ozoneBucket, "Bytes to skip: " + rangeHeader.getStartOffset() + " actual: " + skipped); } - try (OzoneOutputStream ozoneOutputStream = getClientProtocol() - .createMultipartKey(volume.getName(), bucketName, key, length, - partNumber, uploadID)) { - metadataLatencyNs = - getMetrics().updateCopyKeyMetadataStats(startNanos); - copyLength = IOUtils.copyLarge( - sourceObject, ozoneOutputStream, 0, length, new byte[getIOBufferSize(length)]); - ozoneOutputStream.getMetadata() - .putAll(sourceKeyDetails.getMetadata()); - outputStream = ozoneOutputStream; - } - } else { - try (OzoneOutputStream ozoneOutputStream = getClientProtocol() - .createMultipartKey(volume.getName(), bucketName, key, length, - partNumber, uploadID)) { - metadataLatencyNs = - getMetrics().updateCopyKeyMetadataStats(startNanos); - copyLength = IOUtils.copyLarge(sourceObject, ozoneOutputStream, 0, length, - new byte[getIOBufferSize(length)]); - ozoneOutputStream.getMetadata() - .putAll(sourceKeyDetails.getMetadata()); - outputStream = ozoneOutputStream; + } + try (OzoneOutputStream ozoneOutputStream = getClientProtocol() + .createMultipartKey(volume.getName(), bucketName, key, length, + partNumber, uploadID)) { + metadataLatencyNs = + getMetrics().updateCopyKeyMetadataStats(startNanos); + copyLength = IOUtils.copyLarge(sourceObject, ozoneOutputStream, 0, length, + new byte[getIOBufferSize(length)]); + ozoneOutputStream.getMetadata() + .putAll(sourceKeyDetails.getMetadata()); + String raw = ozoneOutputStream.getMetadata().get(ETAG); + if (raw != null) { + ozoneOutputStream.getMetadata().put(ETAG, stripQuotes(raw)); } + outputStream = ozoneOutputStream; } getMetrics().incCopyObjectSuccessLength(copyLength); perf.appendSizeBytes(copyLength); @@ -1099,6 +1093,7 @@ private Response createMultipartKey(OzoneVolume volume, OzoneBucket ozoneBucket, if (StringUtils.isEmpty(eTag)) { eTag = omMultipartCommitUploadPartInfo.getPartName(); } + eTag = wrapInQuotes(eTag); if (copyHeader != null) { getMetrics().updateCopyObjectSuccessStats(startNanos); @@ -1518,10 +1513,6 @@ public boolean isDatastreamEnabled() { return datastreamEnabled; } - static String wrapInQuotes(String value) { - return "\"" + value + "\""; - } - @VisibleForTesting public MessageDigest getMessageDigestInstance() { return E_TAG_PROVIDER.get(); diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/ObjectEndpointStreaming.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/ObjectEndpointStreaming.java index e9db0882acb2..647aafe839cb 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/ObjectEndpointStreaming.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/ObjectEndpointStreaming.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.ozone.audit.AuditLogger.PerformanceStringBuilder; import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.INVALID_REQUEST; import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.NO_SUCH_UPLOAD; +import static org.apache.hadoop.ozone.s3.util.S3Utils.wrapInQuotes; import java.io.IOException; import java.io.InputStream; @@ -189,6 +190,8 @@ public static Response createMultipartKey(OzoneBucket ozoneBucket, String key, } throw ex; } - return Response.ok().header(OzoneConsts.ETAG, eTag).build(); + return Response.ok() + .header(OzoneConsts.ETAG, wrapInQuotes(eTag)) + .build(); } } diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/util/S3Utils.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/util/S3Utils.java index 2b698c502721..36c4445470d1 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/util/S3Utils.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/util/S3Utils.java @@ -202,4 +202,24 @@ public static String generateCanonicalUserId(String input) { return DigestUtils.sha256Hex(input); } + /** + * Strips leading and trailing double quotes from the given string. + * + * @param value the input string + * @return the string without leading and trailing double quotes + */ + public static String stripQuotes(String value) { + return StringUtils.strip(value, "\""); + } + + /** + * Wraps the given string in double quotes. + * + * @param value the input string + * @return the string wrapped in double quotes + */ + public static String wrapInQuotes(String value) { + return StringUtils.wrap(value, '\"'); + } + } diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index 0d0262fd2430..1eb9c0605e60 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -17,10 +17,10 @@ org.apache.ozone ozone - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT ozone-tools - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT jar Apache Ozone Tools Apache Ozone Tools diff --git a/pom.xml b/pom.xml index 5d86e4596ef7..4b85695a3d10 100644 --- a/pom.xml +++ b/pom.xml @@ -16,7 +16,7 @@ 4.0.0 org.apache.ozone ozone-main - 2.1.0-SNAPSHOT + 2.2.0-SNAPSHOT pom Apache Ozone Main Apache Ozone Main @@ -39,9 +39,9 @@ 1.9.7 3.27.6 1.12.788 - 2.35.10 + 2.37.3 0.8.0.RELEASE - 1.81 + 1.82 3.6.1 2.0 9.3 @@ -89,7 +89,7 @@ 2.29.2 unhealthy - 3.6.1 + 3.6.2 false 1.15.4 2.13.1 @@ -130,14 +130,14 @@ 3.9.12 3.30.6 0.10.4 - 3.1.20 + 3.1.21 2.12.7 3.11.10 0.1.55 1.1.1 2.1 1.1.1 - 5.14.0 + 5.14.1 1.0.1 1.9.25 2.7.0 @@ -181,8 +181,8 @@ 4.12.0 1.55.0 1.7.1 - Joshua Tree - 2.1.0-SNAPSHOT + Katmai + 2.2.0-SNAPSHOT 4.7.7 4.2.2 3.26.0 @@ -236,7 +236,7 @@ 3.1.9.Final 5.4.0 3.8.4 - 1.5.7-5 + 1.5.7-6