From 039e9bf837963115d11c7107040e7e3c542b24b6 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 21 Feb 2024 14:10:43 -0800 Subject: [PATCH] HDDS-10408. NPE causes OM crash in Snapshot Purge request --- .../om/request/snapshot/OMSnapshotPurgeRequest.java | 11 +++++++++++ .../hadoop/ozone/om/snapshot/SnapshotUtils.java | 5 +++++ 2 files changed, 16 insertions(+) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java index 0fa9087e25e7..3f4d746adb54 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java @@ -35,6 +35,8 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotPurgeRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.HashMap; @@ -49,6 +51,8 @@ */ public class OMSnapshotPurgeRequest extends OMClientRequest { + private static final Logger LOG = LoggerFactory.getLogger(OMSnapshotPurgeRequest.class); + public OMSnapshotPurgeRequest(OMRequest omRequest) { super(omRequest); } @@ -83,6 +87,13 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable() .get(snapTableKey); + if (fromSnapshot == null) { + // Snapshot may have been purged in the previous iteration of SnapshotDeletingService. + LOG.warn("The snapshot {} is not longer in snapshot table, It maybe removed in the previous " + + "Snapshot purge request.", snapTableKey); + continue; + } + SnapshotInfo nextSnapshot = SnapshotUtils .getNextActiveSnapshot(fromSnapshot, snapshotChainManager, omSnapshotManager); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java index 89823995d0cd..2041fa791a76 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java @@ -40,6 +40,7 @@ import java.util.UUID; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; +import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INVALID_SNAPSHOT_ERROR; import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.DIRECTORY_TABLE; import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.FILE_TABLE; import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.KEY_TABLE; @@ -148,6 +149,10 @@ public static SnapshotInfo getNextActiveSnapshot(SnapshotInfo snapInfo, // If the snapshot is deleted in the previous run, then the in-memory // SnapshotChainManager might throw NoSuchElementException as the snapshot // is removed in-memory but OMDoubleBuffer has not flushed yet. + if (snapInfo == null) { + throw new OMException("Snapshot Info is null. Cannot get the next snapshot", INVALID_SNAPSHOT_ERROR); + } + try { while (chainManager.hasNextPathSnapshot(snapInfo.getSnapshotPath(), snapInfo.getSnapshotId())) {