From 0a4194d05d2afe915c3ca8425d9b593997f4dae8 Mon Sep 17 00:00:00 2001 From: ashishk Date: Fri, 6 Dec 2024 12:53:55 +0530 Subject: [PATCH 1/4] HDDS-11807. Make callId different for each request through openKeyCleanupService. --- .../org/apache/hadoop/fs/ozone/TestHSync.java | 63 +++++++++++++++++++ .../om/service/OpenKeyCleanupService.java | 6 +- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java index cd6002ceee25..19f7013f84c5 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java @@ -496,6 +496,54 @@ public void testOfsHSync(boolean incrementalChunkList) throws Exception { } } + @Test + public void testHSyncOpenKeyCommitAfterExpiry() throws Exception { + // Set the fs.defaultFS + final String rootPath = String.format("%s://%s/", + OZONE_OFS_URI_SCHEME, CONF.get(OZONE_OM_ADDRESS_KEY)); + CONF.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + + final Path key1 = new Path("hsync-key"); + final Path key2 = new Path("hsync-key1"); + + try (FileSystem fs = FileSystem.get(CONF)) { + // Create key1 + try (FSDataOutputStream os = fs.create(key1, true)) { + os.write(1); + os.hsync(); + // Create key2 + try (FSDataOutputStream os1 = fs.create(key2, true)) { + os1.write(1); + os1.hsync(); + // There should be 2 key in openFileTable + assertThat(2 == getOpenKeyInfo(BUCKET_LAYOUT).size()); + assertThat(2 == getKeyInfo(BUCKET_LAYOUT).size()); + + // Resume openKeyCleanupService + openKeyCleanupService.resume(); + GenericTestUtils.waitFor(() -> 0 == getOpenKeyInfo(BUCKET_LAYOUT).size(), 1000, 12000); + + // Verify entry from openKey gets committed eventually + GenericTestUtils.waitFor(() -> + 0 == getOpenKeyInfo(BUCKET_LAYOUT).size(), 1000, 12000); + // Verify key is still present + assertThat(2 == getKeyInfo(BUCKET_LAYOUT).size()); + + // Clean up + assertTrue(fs.delete(key1, false)); + assertTrue(fs.delete(key2, false)); + waitForEmptyDeletedTable(); + } catch (OMException ex) { + assertEquals(OMException.ResultCodes.KEY_NOT_FOUND, ex.getResult()); + } + } catch (OMException ex) { + assertEquals(OMException.ResultCodes.KEY_NOT_FOUND, ex.getResult()); + } finally { + openKeyCleanupService.suspend(); + } + } + } + @Test public void testHSyncDeletedKey() throws Exception { // Verify that a key can't be successfully hsync'ed again after it's deleted, @@ -595,6 +643,21 @@ private List getOpenKeyInfo(BucketLayout bucketLayout) { return omKeyInfo; } + private List getKeyInfo(BucketLayout bucketLayout) { + List omKeyInfo = new ArrayList<>(); + + Table openFileTable = + cluster.getOzoneManager().getMetadataManager().getKeyTable(bucketLayout); + try (TableIterator> + iterator = openFileTable.iterator()) { + while (iterator.hasNext()) { + omKeyInfo.add(iterator.next().getValue()); + } + } catch (Exception e) { + } + return omKeyInfo; + } + @Test public void testUncommittedBlocks() throws Exception { waitForEmptyDeletedTable(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java index c0d958f61213..06faddbf8d08 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.utils.BackgroundTask; import org.apache.hadoop.hdds.utils.BackgroundTaskQueue; import org.apache.hadoop.hdds.utils.BackgroundTaskResult; +import org.apache.hadoop.ozone.ClientVersion; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.om.ExpiredOpenKeys; import org.apache.hadoop.ozone.om.KeyManager; @@ -78,6 +79,7 @@ public class OpenKeyCleanupService extends BackgroundService { private final int cleanupLimitPerTask; private final AtomicLong submittedOpenKeyCount; private final AtomicLong runCount; + private final AtomicLong callIdCount; private final AtomicBoolean suspended; public OpenKeyCleanupService(long interval, TimeUnit unit, long timeout, @@ -113,6 +115,7 @@ public OpenKeyCleanupService(long interval, TimeUnit unit, long timeout, this.submittedOpenKeyCount = new AtomicLong(0); this.runCount = new AtomicLong(0); + this.callIdCount = new AtomicLong(0); this.suspended = new AtomicBoolean(false); } @@ -244,6 +247,7 @@ private OMRequest createCommitKeyRequest( .setCmdType(Type.CommitKey) .setCommitKeyRequest(request) .setClientId(clientId.toString()) + .setVersion(ClientVersion.CURRENT_VERSION) .build(); } @@ -265,7 +269,7 @@ private OMRequest createDeleteOpenKeysRequest( private OMResponse submitRequest(OMRequest omRequest) { try { - return OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, runCount.get()); + return OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, callIdCount.incrementAndGet()); } catch (ServiceException e) { LOG.error("Open key " + omRequest.getCmdType() + " request failed. Will retry at next run.", e); From 0422c0687977680e75939a2ac3e18c9962552336 Mon Sep 17 00:00:00 2001 From: ashishk Date: Tue, 10 Dec 2024 15:17:50 +0530 Subject: [PATCH 2/4] Fix review comments --- .../org/apache/hadoop/fs/ozone/TestHSync.java | 20 +++++++++---------- .../om/service/OpenKeyCleanupService.java | 6 +++--- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java index 19f7013f84c5..f185addf6b83 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestHSync.java @@ -504,34 +504,32 @@ public void testHSyncOpenKeyCommitAfterExpiry() throws Exception { CONF.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); final Path key1 = new Path("hsync-key"); - final Path key2 = new Path("hsync-key1"); + final Path key2 = new Path("key2"); try (FileSystem fs = FileSystem.get(CONF)) { - // Create key1 + // Create key1 with hsync try (FSDataOutputStream os = fs.create(key1, true)) { os.write(1); os.hsync(); - // Create key2 + // Create key2 without hsync try (FSDataOutputStream os1 = fs.create(key2, true)) { os1.write(1); - os1.hsync(); // There should be 2 key in openFileTable assertThat(2 == getOpenKeyInfo(BUCKET_LAYOUT).size()); - assertThat(2 == getKeyInfo(BUCKET_LAYOUT).size()); + // One key will be in fileTable as hsynced + assertThat(1 == getKeyInfo(BUCKET_LAYOUT).size()); // Resume openKeyCleanupService openKeyCleanupService.resume(); - GenericTestUtils.waitFor(() -> 0 == getOpenKeyInfo(BUCKET_LAYOUT).size(), 1000, 12000); - - // Verify entry from openKey gets committed eventually + // Verify hsync openKey gets committed eventually + // Key without hsync is deleted GenericTestUtils.waitFor(() -> 0 == getOpenKeyInfo(BUCKET_LAYOUT).size(), 1000, 12000); - // Verify key is still present - assertThat(2 == getKeyInfo(BUCKET_LAYOUT).size()); + // Verify only one key is still present in fileTable + assertThat(1 == getKeyInfo(BUCKET_LAYOUT).size()); // Clean up assertTrue(fs.delete(key1, false)); - assertTrue(fs.delete(key2, false)); waitForEmptyDeletedTable(); } catch (OMException ex) { assertEquals(OMException.ResultCodes.KEY_NOT_FOUND, ex.getResult()); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java index 06faddbf8d08..1289d3e6a45b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java @@ -79,7 +79,7 @@ public class OpenKeyCleanupService extends BackgroundService { private final int cleanupLimitPerTask; private final AtomicLong submittedOpenKeyCount; private final AtomicLong runCount; - private final AtomicLong callIdCount; + private final AtomicLong callId; private final AtomicBoolean suspended; public OpenKeyCleanupService(long interval, TimeUnit unit, long timeout, @@ -115,7 +115,7 @@ public OpenKeyCleanupService(long interval, TimeUnit unit, long timeout, this.submittedOpenKeyCount = new AtomicLong(0); this.runCount = new AtomicLong(0); - this.callIdCount = new AtomicLong(0); + this.callId = new AtomicLong(0); this.suspended = new AtomicBoolean(false); } @@ -269,7 +269,7 @@ private OMRequest createDeleteOpenKeysRequest( private OMResponse submitRequest(OMRequest omRequest) { try { - return OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, callIdCount.incrementAndGet()); + return OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, callId.incrementAndGet()); } catch (ServiceException e) { LOG.error("Open key " + omRequest.getCmdType() + " request failed. Will retry at next run.", e); From dbcc81636f2e41806733249f9e38166af88bfa68 Mon Sep 17 00:00:00 2001 From: ashishk Date: Thu, 12 Dec 2024 09:18:03 +0530 Subject: [PATCH 3/4] Remove runCount and use CallId --- .../ozone/om/service/OpenKeyCleanupService.java | 7 ++----- .../om/service/TestOpenKeyCleanupService.java | 15 ++------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java index 1289d3e6a45b..2d8277f6b6bf 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java @@ -78,7 +78,6 @@ public class OpenKeyCleanupService extends BackgroundService { private final Duration leaseThreshold; private final int cleanupLimitPerTask; private final AtomicLong submittedOpenKeyCount; - private final AtomicLong runCount; private final AtomicLong callId; private final AtomicBoolean suspended; @@ -114,19 +113,18 @@ public OpenKeyCleanupService(long interval, TimeUnit unit, long timeout, OMConfigKeys.OZONE_OM_OPEN_KEY_CLEANUP_LIMIT_PER_TASK_DEFAULT); this.submittedOpenKeyCount = new AtomicLong(0); - this.runCount = new AtomicLong(0); this.callId = new AtomicLong(0); this.suspended = new AtomicBoolean(false); } /** - * Returns the number of times this Background service has run. + * Returns the number of times this Background service has called OM for delete/commit keys. * * @return Long, run count. */ @VisibleForTesting public long getRunCount() { - return runCount.get(); + return callId.get(); } /** @@ -192,7 +190,6 @@ public BackgroundTaskResult call() throws Exception { if (!shouldRun()) { return BackgroundTaskResult.EmptyTaskResult.newResult(); } - runCount.incrementAndGet(); long startTime = Time.monotonicNow(); final ExpiredOpenKeys expiredOpenKeys; try { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java index 014865f919fe..54ef6967a7a6 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java @@ -189,9 +189,6 @@ public void testCleanupExpiredOpenKeys( GenericTestUtils.waitFor( () -> openKeyCleanupService.getSubmittedOpenKeyCount() >= oldkeyCount + keyCount, SERVICE_INTERVAL, WAIT_TIME); - GenericTestUtils.waitFor( - () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2, - SERVICE_INTERVAL, WAIT_TIME); waitForOpenKeyCleanup(false, BucketLayout.DEFAULT); waitForOpenKeyCleanup(hsync, BucketLayout.FILE_SYSTEM_OPTIMIZED); @@ -353,13 +350,8 @@ public void testExcludeMPUOpenKeys( BucketLayout.FILE_SYSTEM_OPTIMIZED); openKeyCleanupService.resume(); - - GenericTestUtils.waitFor( - () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2, - SERVICE_INTERVAL, WAIT_TIME); - - // wait for requests to complete - Thread.sleep(SERVICE_INTERVAL); + // wait for openKeyCleanupService to complete at least once + Thread.sleep(SERVICE_INTERVAL * 2); // No expired open keys fetched assertEquals(openKeyCleanupService.getSubmittedOpenKeyCount(), oldkeyCount); @@ -423,9 +415,6 @@ public void testCleanupExpiredOpenMPUPartKeys( GenericTestUtils.waitFor( () -> openKeyCleanupService.getSubmittedOpenKeyCount() >= oldkeyCount + partCount, SERVICE_INTERVAL, WAIT_TIME); - GenericTestUtils.waitFor( - () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2, - SERVICE_INTERVAL, WAIT_TIME); // No expired MPU parts fetched waitForOpenKeyCleanup(false, BucketLayout.DEFAULT); From e34569abd4eb163b9d480beecfbd3bc4fb841a4f Mon Sep 17 00:00:00 2001 From: ashishk Date: Fri, 13 Dec 2024 15:36:29 +0530 Subject: [PATCH 4/4] Remove getRunCount --- .../hadoop/ozone/om/service/OpenKeyCleanupService.java | 10 ---------- .../ozone/om/service/TestOpenKeyCleanupService.java | 9 +++------ 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java index 2d8277f6b6bf..6d53e48a0fdb 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java @@ -117,16 +117,6 @@ public OpenKeyCleanupService(long interval, TimeUnit unit, long timeout, this.suspended = new AtomicBoolean(false); } - /** - * Returns the number of times this Background service has called OM for delete/commit keys. - * - * @return Long, run count. - */ - @VisibleForTesting - public long getRunCount() { - return callId.get(); - } - /** * Suspend the service (for testing). */ diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java index 54ef6967a7a6..ab22b353bd7f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java @@ -166,8 +166,7 @@ public void testCleanupExpiredOpenKeys( // wait for submitted tasks to complete Thread.sleep(SERVICE_INTERVAL); final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount(); - final long oldrunCount = openKeyCleanupService.getRunCount(); - LOG.info("oldkeyCount={}, oldrunCount={}", oldkeyCount, oldrunCount); + LOG.info("oldkeyCount={}", oldkeyCount); final OMMetrics metrics = om.getMetrics(); long numKeyHSyncs = metrics.getNumKeyHSyncs(); @@ -329,8 +328,7 @@ public void testExcludeMPUOpenKeys( // wait for submitted tasks to complete Thread.sleep(SERVICE_INTERVAL); final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount(); - final long oldrunCount = openKeyCleanupService.getRunCount(); - LOG.info("oldMpuKeyCount={}, oldMpuRunCount={}", oldkeyCount, oldrunCount); + LOG.info("oldMpuKeyCount={}", oldkeyCount); final OMMetrics metrics = om.getMetrics(); long numKeyHSyncs = metrics.getNumKeyHSyncs(); @@ -389,8 +387,7 @@ public void testCleanupExpiredOpenMPUPartKeys( // wait for submitted tasks to complete Thread.sleep(SERVICE_INTERVAL); final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount(); - final long oldrunCount = openKeyCleanupService.getRunCount(); - LOG.info("oldMpuKeyCount={}, oldMpuRunCount={}", oldkeyCount, oldrunCount); + LOG.info("oldMpuKeyCount={},", oldkeyCount); final OMMetrics metrics = om.getMetrics(); long numOpenKeysCleaned = metrics.getNumOpenKeysCleaned();