From ce1734c4491abf0c31392412a357be8e1c5f1953 Mon Sep 17 00:00:00 2001 From: hemantk-12 Date: Fri, 28 Feb 2025 13:57:04 -0800 Subject: [PATCH 1/3] HDDS-12210. Implemented BootstrapStateHandler for SnapshotDeletingService --- .../ozone/om/OMDBCheckpointServlet.java | 5 +++-- .../om/service/SnapshotDeletingService.java | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 1bd739c730dd..7c02df4bebff 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -649,8 +649,9 @@ static class Lock extends BootstrapStateHandler.Lock { locks = Stream.of( om.getKeyManager().getDeletingService(), om.getKeyManager().getSnapshotSstFilteringService(), - om.getMetadataManager().getStore().getRocksDBCheckpointDiffer(), - om.getKeyManager().getSnapshotDeletingService() + om.getKeyManager().getSnapshotDeletingService(), + om.getKeyManager().getSnapshotDirectoryService(), + om.getMetadataManager().getStore().getRocksDBCheckpointDiffer() ) .filter(Objects::nonNull) .map(BootstrapStateHandler::getBootstrapStateLock) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index a567e08d696e..7fabe79a2af2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -29,6 +29,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Objects; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -61,6 +62,7 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveKeyInfos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveTableKeysRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotPurgeRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.ratis.protocol.ClientId; import org.slf4j.Logger; @@ -238,7 +240,7 @@ public BackgroundTaskResult call() throws InterruptedException { return BackgroundTaskResult.EmptyTaskResult.newResult(); } - private void submitSnapshotPurgeRequest(List purgeSnapshotKeys) { + private void submitSnapshotPurgeRequest(List purgeSnapshotKeys) throws InterruptedException { if (!purgeSnapshotKeys.isEmpty()) { SnapshotPurgeRequest snapshotPurgeRequest = SnapshotPurgeRequest .newBuilder() @@ -251,14 +253,16 @@ private void submitSnapshotPurgeRequest(List purgeSnapshotKeys) { .setClientId(clientId.toString()) .build(); - submitRequest(omRequest); + try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock()) { + submitRequest(omRequest); + } } } private void submitSnapshotMoveDeletedKeys(SnapshotInfo snapInfo, List deletedKeys, List renamedList, - List dirsToMove) { + List dirsToMove) throws InterruptedException { SnapshotMoveTableKeysRequest.Builder moveDeletedKeysBuilder = SnapshotMoveTableKeysRequest.newBuilder() .setFromSnapshotID(toProtobuf(snapInfo.getSnapshotId())); @@ -287,15 +291,18 @@ private void submitSnapshotMoveDeletedKeys(SnapshotInfo snapInfo, .setSnapshotMoveTableKeysRequest(moveDeletedKeys) .setClientId(clientId.toString()) .build(); - - try (BootstrapStateHandler.Lock lock = new BootstrapStateHandler.Lock()) { + try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock()) { submitRequest(omRequest); } } private void submitRequest(OMRequest omRequest) { try { - OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, getRunCount().get()); + Status status = + OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, getRunCount().get()).getStatus(); + if (!Objects.equals(status, Status.OK)) { + LOG.error("Request: {} failed with an status: {}. Will retry in the next run.", omRequest, status); + } } catch (ServiceException e) { LOG.error("Request: {} fired by SnapshotDeletingService failed. Will retry in the next run", omRequest, e); } From 9063a5d9d6f54248f1ed36f367c053df8c8de673 Mon Sep 17 00:00:00 2001 From: hemantk-12 Date: Tue, 4 Mar 2025 12:30:14 -0800 Subject: [PATCH 2/3] Added a flush for snapshot's RocksDBs --- .../hadoop/ozone/om/OMDBCheckpointServlet.java | 3 +++ .../apache/hadoop/ozone/om/OmSnapshotManager.java | 9 +++++++++ .../hadoop/ozone/om/snapshot/SnapshotCache.java | 14 ++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 7c02df4bebff..eab703c4d272 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -668,6 +668,9 @@ public BootstrapStateHandler.Lock lock() // Then wait for the double buffer to be flushed. om.awaitDoubleBufferFlush(); + + // Flush all the pending transactions to get DB to a consistent state. + om.getOmSnapshotManager().flushLog(); return this; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 21f8b9fa28ff..baeb1efb5db3 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -422,6 +422,15 @@ public int getSnapshotCacheSize() { return snapshotCache == null ? 0 : snapshotCache.size(); } + /** + * Immediately flush the outstanding I/O operations of the DB. + */ + public void flushLog() { + if (snapshotCache != null) { + snapshotCache.flushLog(); + } + } + /** * Immediately invalidate all entries and close their DB instances in cache. */ diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index d2b64296dc60..e1a2d90cd118 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -116,6 +116,20 @@ public void invalidateAll() { } } + /** + * Immediately flush the outstanding I/O operations of the DB. + */ + public void flushLog() { + for (ReferenceCounted snapshot : dbMap.values()) { + try { + snapshot.get().getMetadataManager().getStore().flushLog(true); + } catch (IOException e) { + LOG.error("Failed to flush log for snapshot cache", e); + throw new IllegalStateException(e); + } + } + } + @Override public void close() { invalidateAll(); From 2695421bafd67263bb6b9e4e1135843a0d417a8f Mon Sep 17 00:00:00 2001 From: hemantk-12 Date: Wed, 5 Mar 2025 08:52:43 -0800 Subject: [PATCH 3/3] Revert "Added a flush for snapshot's RocksDBs" This reverts commit 9063a5d9d6f54248f1ed36f367c053df8c8de673. --- .../hadoop/ozone/om/OMDBCheckpointServlet.java | 3 --- .../apache/hadoop/ozone/om/OmSnapshotManager.java | 9 --------- .../hadoop/ozone/om/snapshot/SnapshotCache.java | 14 -------------- 3 files changed, 26 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index eab703c4d272..7c02df4bebff 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -668,9 +668,6 @@ public BootstrapStateHandler.Lock lock() // Then wait for the double buffer to be flushed. om.awaitDoubleBufferFlush(); - - // Flush all the pending transactions to get DB to a consistent state. - om.getOmSnapshotManager().flushLog(); return this; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index baeb1efb5db3..21f8b9fa28ff 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -422,15 +422,6 @@ public int getSnapshotCacheSize() { return snapshotCache == null ? 0 : snapshotCache.size(); } - /** - * Immediately flush the outstanding I/O operations of the DB. - */ - public void flushLog() { - if (snapshotCache != null) { - snapshotCache.flushLog(); - } - } - /** * Immediately invalidate all entries and close their DB instances in cache. */ diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index e1a2d90cd118..d2b64296dc60 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -116,20 +116,6 @@ public void invalidateAll() { } } - /** - * Immediately flush the outstanding I/O operations of the DB. - */ - public void flushLog() { - for (ReferenceCounted snapshot : dbMap.values()) { - try { - snapshot.get().getMetadataManager().getStore().flushLog(true); - } catch (IOException e) { - LOG.error("Failed to flush log for snapshot cache", e); - throw new IllegalStateException(e); - } - } - } - @Override public void close() { invalidateAll();