Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -561,26 +561,38 @@ public void testDirectoryOps(BucketLayout bucketLayout) throws Exception {
assertEquals(initialNumKeyRenames + expectedRenames, getLongCounter("NumKeyRenames", omMetrics));
}

@SuppressWarnings("checkstyle:methodlength")
@Test
public void testSnapshotOps() throws Exception {
// This tests needs enough dataNodes to allocate the blocks for the keys.
// get initial values for metrics
MetricsRecordBuilder omMetrics = getMetrics("OMMetrics");
long initialNumSnapshotCreateFails = getLongCounter("NumSnapshotCreateFails", omMetrics);
long initialNumSnapshotCreates = getLongCounter("NumSnapshotCreates", omMetrics);
long initialNumSnapshotInfoFails = getLongCounter("NumSnapshotInfoFails", omMetrics);
long initialNumSnapshotInfos = getLongCounter("NumSnapshotInfos", omMetrics);
long initialNumSnapshotListFails = getLongCounter("NumSnapshotListFails", omMetrics);
long initialNumSnapshotLists = getLongCounter("NumSnapshotLists", omMetrics);
long initialNumSnapshotActive = getLongCounter("NumSnapshotActive", omMetrics);
long initialNumSnapshotDeleted = getLongCounter("NumSnapshotDeleted", omMetrics);
long initialNumSnapshotDeletes = getLongCounter("NumSnapshotDeletes", omMetrics);
long initialNumSnapshotDeleteFails = getLongCounter("NumSnapshotDeleteFails", omMetrics);
long initialNumSnapshotDiffJobs = getLongCounter("NumSnapshotDiffJobs", omMetrics);
long initialNumSnapshotDiffJobFails = getLongCounter("NumSnapshotDiffJobFails", omMetrics);
long initialNumListSnapshotDiffJobs = getLongCounter("NumListSnapshotDiffJobs", omMetrics);
long initialNumListSnapshotDiffJobFails = getLongCounter("NumListSnapshotDiffJobFails", omMetrics);
long initialNumCancelSnapshotDiffs = getLongCounter("NumCancelSnapshotDiffs", omMetrics);
long initialNumCancelSnapshotDiffFails = getLongCounter("NumCancelSnapshotDiffFails", omMetrics);
long initialNumSnapshotRenames = getLongCounter("NumSnapshotRenames", omMetrics);
long initialNumSnapshotRenameFails = getLongCounter("NumSnapshotRenameFails", omMetrics);

OmBucketInfo omBucketInfo = createBucketInfo(false);

String volumeName = omBucketInfo.getVolumeName();
String bucketName = omBucketInfo.getBucketName();
String snapshot1 = "snap1";
String snapshot2 = "snap2";
String snapshot3 = "snap3";

writeClient.createBucket(omBucketInfo);

Expand Down Expand Up @@ -624,9 +636,52 @@ public void testSnapshotOps() throws Exception {
}
}
omMetrics = getMetrics("OMMetrics");

assertEquals(initialNumSnapshotDiffJobs + 1, getLongCounter("NumSnapshotDiffJobs", omMetrics));
assertEquals(initialNumSnapshotDiffJobFails, getLongCounter("NumSnapshotDiffJobFails", omMetrics));

// List snapshot diff jobs
writeClient.listSnapshotDiffJobs(volumeName, bucketName, "", true, null, 1000);

omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumListSnapshotDiffJobs + 1, getLongCounter("NumListSnapshotDiffJobs", omMetrics));
assertEquals(initialNumListSnapshotDiffJobFails, getLongCounter("NumListSnapshotDiffJobFails", omMetrics));

// List snapshot diff jobs: invalid bucket case.
assertThrows(OMException.class, () ->
writeClient.listSnapshotDiffJobs(volumeName, "invalidBucket", "", true, null, 1000));
omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumListSnapshotDiffJobs + 2, getLongCounter("NumListSnapshotDiffJobs", omMetrics));
assertEquals(initialNumListSnapshotDiffJobFails + 1, getLongCounter("NumListSnapshotDiffJobFails", omMetrics));

// Cancel snapshot diff
writeClient.cancelSnapshotDiff(volumeName, bucketName, snapshot1, snapshot2);

omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumCancelSnapshotDiffs + 1, getLongCounter("NumCancelSnapshotDiffs", omMetrics));
assertEquals(initialNumCancelSnapshotDiffFails, getLongCounter("NumCancelSnapshotDiffFails", omMetrics));

// Cancel snapshot diff job: invalid bucket case.
assertThrows(OMException.class, () ->
writeClient.cancelSnapshotDiff(volumeName, "invalidBucket", snapshot1, snapshot2));
omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumCancelSnapshotDiffs + 2, getLongCounter("NumCancelSnapshotDiffs", omMetrics));
assertEquals(initialNumCancelSnapshotDiffFails + 1, getLongCounter("NumCancelSnapshotDiffFails", omMetrics));

// Get snapshot info
writeClient.getSnapshotInfo(volumeName, bucketName, snapshot1);

omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumSnapshotInfos + 1, getLongCounter("NumSnapshotInfos", omMetrics));
assertEquals(initialNumSnapshotInfoFails, getLongCounter("NumSnapshotInfoFails", omMetrics));

// Get snapshot info: invalid snapshot case.
assertThrows(OMException.class, () ->
writeClient.getSnapshotInfo(volumeName, bucketName, "invalidSnapshot"));
omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumSnapshotInfos + 2, getLongCounter("NumSnapshotInfos", omMetrics));
assertEquals(initialNumSnapshotInfoFails + 1, getLongCounter("NumSnapshotInfoFails", omMetrics));

// List snapshots
writeClient.listSnapshot(
volumeName, bucketName, null, null, Integer.MAX_VALUE);
Expand All @@ -643,6 +698,40 @@ public void testSnapshotOps() throws Exception {
omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumSnapshotLists + 2, getLongCounter("NumSnapshotLists", omMetrics));
assertEquals(initialNumSnapshotListFails + 1, getLongCounter("NumSnapshotListFails", omMetrics));

// Rename snapshot
writeClient.renameSnapshot(volumeName, bucketName, snapshot2, snapshot3);

omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumSnapshotActive + 2, getLongCounter("NumSnapshotActive", omMetrics));
assertEquals(initialNumSnapshotRenames + 1, getLongCounter("NumSnapshotRenames", omMetrics));
assertEquals(initialNumSnapshotRenameFails, getLongCounter("NumSnapshotRenameFails", omMetrics));

// Rename snapshot: invalid snapshot case.
assertThrows(OMException.class, () -> writeClient.renameSnapshot(volumeName,
bucketName, snapshot2, snapshot3));
omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumSnapshotActive + 2, getLongCounter("NumSnapshotActive", omMetrics));
assertEquals(initialNumSnapshotRenames + 2, getLongCounter("NumSnapshotRenames", omMetrics));
assertEquals(initialNumSnapshotRenameFails + 1, getLongCounter("NumSnapshotRenameFails", omMetrics));

// Delete snapshot
writeClient.deleteSnapshot(volumeName, bucketName, snapshot3);

omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumSnapshotActive + 1, getLongCounter("NumSnapshotActive", omMetrics));
assertEquals(initialNumSnapshotDeletes + 1, getLongCounter("NumSnapshotDeletes", omMetrics));
assertEquals(initialNumSnapshotDeleted + 1, getLongCounter("NumSnapshotDeleted", omMetrics));
assertEquals(initialNumSnapshotDeleteFails, getLongCounter("NumSnapshotDeleteFails", omMetrics));

// Delete snapshot: invalid snapshot case.
assertThrows(OMException.class, () -> writeClient.deleteSnapshot(volumeName,
bucketName, snapshot3));
omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumSnapshotActive + 1, getLongCounter("NumSnapshotActive", omMetrics));
assertEquals(initialNumSnapshotDeletes + 2, getLongCounter("NumSnapshotDeletes", omMetrics));
assertEquals(initialNumSnapshotDeleted + 1, getLongCounter("NumSnapshotDeleted", omMetrics));
assertEquals(initialNumSnapshotDeleteFails + 1, getLongCounter("NumSnapshotDeleteFails", omMetrics));
}

private OMMetadataManager mockWritePathExceptions(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,13 @@ public class OMMetrics implements OmMetadataReaderMetrics {
private @Metric MutableCounterLong numSnapshotCreates;
private @Metric MutableCounterLong numSnapshotDeletes;
private @Metric MutableCounterLong numSnapshotLists;
private @Metric MutableCounterLong numSnapshotRenames;
private @Metric MutableCounterLong numSnapshotDiffJobs;
private @Metric MutableCounterLong numSnapshotInfos;
private @Metric MutableCounterLong numSnapshotPurges;
private @Metric MutableCounterLong numSnapshotSetProperties;
private @Metric MutableCounterLong numCancelSnapshotDiffs;
private @Metric MutableCounterLong numListSnapshotDiffJobs;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the patch @jojochuang. Can you also add metrics to track OMSnapshotMoveTableKeysRequest.java and OMSnapshotMoveDeletedKeysRequest.java?

Copy link
Contributor Author

@jojochuang jojochuang Jun 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SaketaChalamchala good idea. This PR implements metrics/tests for client operations. We should add the same for internal operations like you mentioned.

This PR is already quite lengthy. Let me open another jira to track the internal snapshot operations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

@SaketaChalamchala SaketaChalamchala Jun 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case let's also move SnapshotPurge metrics to the internal metrics as well. This patch LGTM otherwise.


private @Metric MutableGaugeInt numSnapshotCacheSize;
private @Metric MutableCounterLong numGetFileStatus;
Expand Down Expand Up @@ -138,12 +141,15 @@ public class OMMetrics implements OmMetadataReaderMetrics {
private @Metric MutableCounterLong numOpenKeyDeleteRequestFails;
private @Metric MutableCounterLong numExpiredMPUAbortRequestFails;
private @Metric MutableCounterLong numSnapshotCreateFails;
private @Metric MutableCounterLong numSnapshotRenameFails;
private @Metric MutableCounterLong numSnapshotDeleteFails;
private @Metric MutableCounterLong numSnapshotListFails;
private @Metric MutableCounterLong numSnapshotDiffJobFails;
private @Metric MutableCounterLong numSnapshotInfoFails;
private @Metric MutableCounterLong numSnapshotPurgeFails;
private @Metric MutableCounterLong numSnapshotSetPropertyFails;
private @Metric MutableCounterLong numCancelSnapshotDiffFails;
private @Metric MutableCounterLong numListSnapshotDiffJobFails;

private @Metric MutableCounterLong numSnapshotActive;
private @Metric MutableCounterLong numSnapshotDeleted;
Expand Down Expand Up @@ -477,6 +483,14 @@ public void incNumSnapshotCreateFails() {
numSnapshotCreateFails.incr();
}

public void incNumSnapshotRenames() {
numSnapshotRenames.incr();
}

public void incNumSnapshotRenameFails() {
numSnapshotRenameFails.incr();
}

public void incNumSnapshotDeletes() {
numSnapshotDeletes.incr();
}
Expand Down Expand Up @@ -505,6 +519,22 @@ public void incNumSnapshotDiffJobs() {
numSnapshotDiffJobs.incr();
}

public void incNumCancelSnapshotDiffs() {
numCancelSnapshotDiffs.incr();
}

public void incNumCancelSnapshotDiffJobFails() {
numCancelSnapshotDiffFails.incr();
}

public void incNumListSnapshotDiffJobs() {
numListSnapshotDiffJobs.incr();
}

public void incNumListSnapshotDiffJobFails() {
numListSnapshotDiffJobFails.incr();
}

public void incNumSnapshotListFails() {
numSnapshotListFails.incr();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5044,6 +5044,7 @@ public CancelSnapshotDiffResponse cancelSnapshotDiff(String volume,
auditMap.put(OzoneConsts.BUCKET, bucket);
auditMap.put(OzoneConsts.FROM_SNAPSHOT, fromSnapshot);
auditMap.put(OzoneConsts.TO_SNAPSHOT, toSnapshot);
metrics.incNumCancelSnapshotDiffs();

try {
ResolvedBucket resolvedBucket = this.resolveBucketLink(Pair.of(volume, bucket), false);
Expand All @@ -5054,6 +5055,7 @@ public CancelSnapshotDiffResponse cancelSnapshotDiff(String volume,
return omSnapshotManager.cancelSnapshotDiff(resolvedBucket.realVolume(), resolvedBucket.realBucket(),
fromSnapshot, toSnapshot);
} catch (Exception ex) {
metrics.incNumCancelSnapshotDiffJobFails();
auditSuccess = false;
AUDIT.logReadFailure(buildAuditMessageForFailure(OMAction.CANCEL_SNAPSHOT_DIFF_JOBS,
auditMap, ex));
Expand All @@ -5079,6 +5081,7 @@ public ListSnapshotDiffJobResponse listSnapshotDiffJobs(
auditMap.put(OzoneConsts.VOLUME, volume);
auditMap.put(OzoneConsts.BUCKET, bucket);
auditMap.put(OzoneConsts.JOB_STATUS, jobStatus);
metrics.incNumListSnapshotDiffJobs();

try {
ResolvedBucket resolvedBucket = this.resolveBucketLink(Pair.of(volume, bucket), false);
Expand All @@ -5089,6 +5092,7 @@ public ListSnapshotDiffJobResponse listSnapshotDiffJobs(
return omSnapshotManager.getSnapshotDiffList(resolvedBucket.realVolume(), resolvedBucket.realBucket(),
jobStatus, listAllStatus, prevSnapshotDiffJob, maxListResult);
} catch (Exception ex) {
metrics.incNumListSnapshotDiffJobFails();
auditSuccess = false;
AUDIT.logReadFailure(buildAuditMessageForFailure(OMAction.LIST_SNAPSHOT_DIFF_JOBS,
auditMap, ex));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.hadoop.ozone.OmUtils;
import org.apache.hadoop.ozone.audit.AuditLogger;
import org.apache.hadoop.ozone.audit.OMAction;
import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
import org.apache.hadoop.ozone.om.OzoneManager;
import org.apache.hadoop.ozone.om.ResolvedBucket;
Expand Down Expand Up @@ -110,6 +111,9 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException {

@Override
public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, ExecutionContext context) {
OMMetrics omMetrics = ozoneManager.getMetrics();
omMetrics.incNumSnapshotRenames();

boolean acquiredBucketLock = false;
boolean acquiredSnapshotOldLock = false;
boolean acquiredSnapshotNewLock = false;
Expand Down Expand Up @@ -153,7 +157,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut
String snapshotNewTableKey = SnapshotInfo.getTableKey(volumeName, bucketName, snapshotNewName);

if (omMetadataManager.getSnapshotInfoTable().isExist(snapshotNewTableKey)) {
throw new OMException("Snapshot with name " + snapshotNewName + "already exist",
throw new OMException("Snapshot with name " + snapshotNewName + " already exist",
FILE_ALREADY_EXISTS);
}

Expand Down Expand Up @@ -200,6 +204,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut
omResponse.build(), snapshotOldTableKey, snapshotNewTableKey, snapshotOldInfo);

} catch (IOException | InvalidPathException ex) {
omMetrics.incNumSnapshotRenameFails();
exception = ex;
omClientResponse = new OMSnapshotRenameResponse(
createErrorOMResponse(omResponse, exception));
Expand Down