diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java index b8e6feaee3a6..a3f56a95b335 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java @@ -44,6 +44,7 @@ import org.apache.hadoop.ozone.client.VolumeArgs; import org.apache.hadoop.ozone.client.protocol.ClientProtocol; import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.TrashPolicyOzone; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer.ACLIdentityType; @@ -174,6 +175,10 @@ public static void teardown() { IOUtils.closeQuietly(fs); } + private OMMetrics getOMMetrics() { + return cluster.getOzoneManager().getMetrics(); + } + @Test public void testOzoneFsServiceLoader() throws IOException { OzoneConfiguration confTestLoader = new OzoneConfiguration(); @@ -1203,6 +1208,12 @@ public void testTrash() throws Exception { "fs.trash.classname", TrashPolicy.class). isAssignableFrom(TrashPolicyOzone.class)); + long prevNumTrashDeletes = getOMMetrics().getNumTrashDeletes(); + long prevNumTrashFileDeletes = getOMMetrics().getNumTrashFilesDeletes(); + + long prevNumTrashRenames = getOMMetrics().getNumTrashRenames(); + long prevNumTrashFileRenames = getOMMetrics().getNumTrashFilesRenames(); + // Call moveToTrash. We can't call protected fs.rename() directly trash.moveToTrash(path); @@ -1225,6 +1236,17 @@ public void testTrash() throws Exception { } }, 1000, 180000); + // This condition should pass after the checkpoint + Assert.assertTrue(getOMMetrics() + .getNumTrashRenames() > prevNumTrashRenames); + Assert.assertTrue(getOMMetrics() + .getNumTrashFilesRenames() > prevNumTrashFileRenames); + + // This condition should succeed once the checkpoint directory is deleted + GenericTestUtils.waitFor( + () -> getOMMetrics().getNumTrashDeletes() > prevNumTrashDeletes + && getOMMetrics().getNumTrashFilesDeletes() + > prevNumTrashFileDeletes, 100, 180000); // Cleanup ofs.delete(trashRoot, true); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java index 310a8a628fc9..bc483434ccd2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java @@ -147,6 +147,22 @@ public class OMMetrics { private @Metric MutableCounterLong numListMultipartUploadFails; private @Metric MutableCounterLong numListMultipartUploads; + // Metrics related to OM Trash. + private @Metric MutableCounterLong numTrashRenames; + private @Metric MutableCounterLong numTrashDeletes; + private @Metric MutableCounterLong numTrashListStatus; + private @Metric MutableCounterLong numTrashGetFileStatus; + private @Metric MutableCounterLong numTrashGetTrashRoots; + private @Metric MutableCounterLong numTrashExists; + private @Metric MutableCounterLong numTrashWriteRequests; + private @Metric MutableCounterLong numTrashFilesRenames; + private @Metric MutableCounterLong numTrashFilesDeletes; + private @Metric MutableCounterLong numTrashActiveCycles; + private @Metric MutableCounterLong numTrashCheckpointsProcessed; + private @Metric MutableCounterLong numTrashFails; + private @Metric MutableCounterLong numTrashRootsEnqueued; + private @Metric MutableCounterLong numTrashRootsProcessed; + public OMMetrics() { } @@ -873,6 +889,75 @@ public long getNumRemoveAcl() { return numRemoveAcl.value(); } + public void incNumTrashRenames() { + numTrashRenames.incr(); + } + + public long getNumTrashRenames() { + return numTrashRenames.value(); + } + + public void incNumTrashDeletes() { + numTrashDeletes.incr(); + } + + public long getNumTrashDeletes() { + return numTrashDeletes.value(); + } + + public void incNumTrashListStatus() { + numTrashListStatus.incr(); + } + + public void incNumTrashGetFileStatus() { + numTrashGetFileStatus.incr(); + } + + public void incNumTrashGetTrashRoots() { + numTrashGetTrashRoots.incr(); + } + + public void incNumTrashExists() { + numTrashExists.incr(); + } + + public void incNumTrashWriteRequests() { + numTrashWriteRequests.incr(); + } + + public void incNumTrashFilesRenames() { + numTrashFilesRenames.incr(); + } + + public long getNumTrashFilesRenames() { + return numTrashFilesRenames.value(); + } + + public void incNumTrashFilesDeletes() { + numTrashFilesDeletes.incr(); + } + + public long getNumTrashFilesDeletes() { + return numTrashFilesDeletes.value(); + } + + + public void incNumTrashActiveCycles() { + numTrashActiveCycles.incr(); + } + + public void incNumTrashRootsEnqueued() { + numTrashRootsEnqueued.incr(); + } + + public void incNumTrashRootsProcessed() { + numTrashRootsProcessed.incr(); + } + + public void incNumTrashFails() { + numTrashFails.incr(); + } + public void unRegister() { MetricsSystem ms = DefaultMetricsSystem.instance(); ms.unregisterSource(SOURCE_NAME); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java index 436b10dae810..ba439608be08 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java @@ -68,15 +68,15 @@ public class TrashOzoneFileSystem extends FileSystem { private static final int OZONE_FS_ITERATE_BATCH_SIZE = 100; - private OzoneManager ozoneManager; + private final OzoneManager ozoneManager; - private String userName; + private final String userName; private String ofsPathPrefix; private final AtomicLong runCount; - private static ClientId clientId = ClientId.randomId(); + private final static ClientId CLIENT_ID = ClientId.randomId(); private static final Logger LOG = LoggerFactory.getLogger(TrashOzoneFileSystem.class); @@ -91,7 +91,7 @@ public TrashOzoneFileSystem(OzoneManager ozoneManager) throws IOException { private RaftClientRequest getRatisRequest( OzoneManagerProtocolProtos.OMRequest omRequest) { return RaftClientRequest.newBuilder() - .setClientId(clientId) + .setClientId(CLIENT_ID) .setServerId(ozoneManager.getOmRatisServer().getRaftPeerId()) .setGroupId(ozoneManager.getOmRatisServer().getRaftGroupId()) .setCallId(runCount.getAndIncrement()) @@ -105,6 +105,7 @@ private RaftClientRequest getRatisRequest( private void submitRequest(OzoneManagerProtocolProtos.OMRequest omRequest) throws Exception { + ozoneManager.getMetrics().incNumTrashWriteRequests(); if (ozoneManager.isRatisEnabled()) { RaftClientRequest req = getRatisRequest(omRequest); ozoneManager.getOmRatisServer().submitRequest(omRequest, req); @@ -144,6 +145,7 @@ public FSDataOutputStream append(Path path, int i, @Override public boolean rename(Path src, Path dst) throws IOException { + ozoneManager.getMetrics().incNumTrashRenames(); LOG.trace("Src:" + src + "Dst:" + dst); // check whether the src and dst belong to the same bucket & trashroot. OFSPath srcPath = new OFSPath(src); @@ -159,6 +161,7 @@ public boolean rename(Path src, Path dst) throws IOException { @Override public boolean delete(Path path, boolean b) throws IOException { + ozoneManager.getMetrics().incNumTrashDeletes(); DeleteIterator iterator = new DeleteIterator(path, true); iterator.iterate(); return true; @@ -166,6 +169,7 @@ public boolean delete(Path path, boolean b) throws IOException { @Override public FileStatus[] listStatus(Path path) throws IOException { + ozoneManager.getMetrics().incNumTrashListStatus(); List fileStatuses = new ArrayList<>(); OmKeyArgs keyArgs = constructOmKeyArgs(path); List list = ozoneManager. @@ -216,6 +220,7 @@ public boolean mkdirs(Path path, @Override public FileStatus getFileStatus(Path path) throws IOException { + ozoneManager.getMetrics().incNumGetFileStatus(); OmKeyArgs keyArgs = constructOmKeyArgs(path); OzoneFileStatus ofs = ozoneManager.getKeyManager().getFileStatus(keyArgs); FileStatus fileStatus = convertToFileStatus(ofs); @@ -240,6 +245,7 @@ private OmKeyArgs constructOmKeyArgs(Path path) { @Override public Collection getTrashRoots(boolean allUsers) { Preconditions.checkArgument(allUsers); + ozoneManager.getMetrics().incNumTrashGetTrashRoots(); Iterator, CacheValue>> bucketIterator = ozoneManager.getMetadataManager().getBucketIterator(); @@ -271,6 +277,7 @@ public Collection getTrashRoots(boolean allUsers) { @Override public boolean exists(Path f) throws IOException { + ozoneManager.getMetrics().incNumTrashExists(); try { this.getFileStatus(f); return true; @@ -386,6 +393,7 @@ boolean processKeyPath(List keyPathList) { OzoneManagerProtocolProtos.OMRequest omRequest = getRenameKeyRequest(src, dst); try { + ozoneManager.getMetrics().incNumTrashFilesRenames(); submitRequest(omRequest); } catch (Throwable e) { LOG.error("Couldn't send rename request.", e); @@ -416,7 +424,7 @@ boolean processKeyPath(List keyPathList) { .build(); OzoneManagerProtocolProtos.OMRequest omRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() - .setClientId(clientId.toString()) + .setClientId(CLIENT_ID.toString()) .setRenameKeyRequest(renameKeyRequest) .setCmdType(OzoneManagerProtocolProtos.Type.RenameKey) .build(); @@ -447,6 +455,7 @@ boolean processKeyPath(List keyPathList) { OzoneManagerProtocolProtos.OMRequest omRequest = getDeleteKeyRequest(path); try { + ozoneManager.getMetrics().incNumTrashFilesDeletes(); submitRequest(omRequest); } catch (Throwable e) { LOG.error("Couldn't send rename request.", e); @@ -474,7 +483,7 @@ boolean processKeyPath(List keyPathList) { .build(); OzoneManagerProtocolProtos.OMRequest omRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() - .setClientId(clientId.toString()) + .setClientId(CLIENT_ID.toString()) .setDeleteKeyRequest(deleteKeyRequest) .setCmdType(OzoneManagerProtocolProtos.Type.DeleteKey) .build(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashPolicyOzone.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashPolicyOzone.java index 2e17b4f0657b..edb4a4081431 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashPolicyOzone.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashPolicyOzone.java @@ -161,30 +161,35 @@ public void run() { } try { + om.getMetrics().incNumTrashActiveCycles(); now = Time.now(); if (now >= end) { Collection trashRoots; trashRoots = fs.getTrashRoots(true); // list all trash dirs LOG.debug("Trash root Size: " + trashRoots.size()); for (FileStatus trashRoot : trashRoots) { // dump each trash - LOG.info("Trashroot:" + trashRoot.getPath().toString()); + LOG.debug("Trashroot:" + trashRoot.getPath().toString()); if (!trashRoot.isDirectory()) { continue; } TrashPolicyOzone trash = new TrashPolicyOzone(fs, conf, om); Runnable task = ()->{ try { + om.getMetrics().incNumTrashRootsProcessed(); trash.deleteCheckpoint(trashRoot.getPath(), false); trash.createCheckpoint(trashRoot.getPath(), new Date(Time.now())); } catch (Exception e) { + om.getMetrics().incNumTrashFails(); LOG.error("Unable to checkpoint:" + trashRoot.getPath(), e); } }; + om.getMetrics().incNumTrashRootsEnqueued(); executor.submit(task); } } } catch (Exception e) { + om.getMetrics().incNumTrashFails(); LOG.warn("RuntimeException during Trash.Emptier.run(): ", e); } } @@ -197,7 +202,7 @@ public void run() { try { executor.awaitTermination(60, TimeUnit.SECONDS); } catch (InterruptedException e) { - LOG.error("Error attempting to shutdown"); + LOG.error("Error attempting to shutdown", e); } } } @@ -226,10 +231,11 @@ private void createCheckpoint(Path trashRoot, Date date) throws IOException { while (true) { try { fs.rename(current, checkpoint); - LOG.info("Created trash checkpoint: " + checkpoint.toUri().getPath()); + LOG.debug("Created trash checkpoint: " + checkpoint.toUri().getPath()); break; } catch (FileAlreadyExistsException e) { if (++attempt > 1000) { + om.getMetrics().incNumTrashFails(); throw new IOException("Failed to checkpoint trash: " + checkpoint); } checkpoint = checkpointBase.suffix("-" + attempt); @@ -239,7 +245,7 @@ private void createCheckpoint(Path trashRoot, Date date) throws IOException { private void deleteCheckpoint(Path trashRoot, boolean deleteImmediately) throws IOException { - LOG.info("TrashPolicyOzone#deleteCheckpoint for trashRoot: " + trashRoot); + LOG.debug("TrashPolicyOzone#deleteCheckpoint for trashRoot: " + trashRoot); FileStatus[] dirs = null; try { @@ -261,14 +267,16 @@ private void deleteCheckpoint(Path trashRoot, boolean deleteImmediately) try { time = getTimeFromCheckpoint(name); } catch (ParseException e) { + om.getMetrics().incNumTrashFails(); LOG.warn("Unexpected item in trash: "+dir+". Ignoring."); continue; } if (((now - deletionInterval) > time) || deleteImmediately) { if (fs.delete(path, true)) { - LOG.info("Deleted trash checkpoint: "+dir); + LOG.debug("Deleted trash checkpoint:{} ", dir); } else { + om.getMetrics().incNumTrashFails(); LOG.warn("Couldn't delete checkpoint: " + dir + " Ignoring."); } }