diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java
index 36c1a5370935..7b40f265dc92 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java
@@ -51,6 +51,12 @@ public abstract class BackgroundService {
private final TimeUnit unit;
private final PeriodicalTask service;
+ public BackgroundService(String serviceName, long interval,
+ TimeUnit unit, int threadPoolSize) {
+ // Set service timeout to 0 to disable.
+ this(serviceName, interval, unit, threadPoolSize, 0);
+ }
+
public BackgroundService(String serviceName, long interval,
TimeUnit unit, int threadPoolSize, long serviceTimeout) {
this.interval = interval;
@@ -117,7 +123,8 @@ public synchronized void run() {
LOG.warn("Background task execution failed", e);
} finally {
long endTime = System.nanoTime();
- if (endTime - startTime > serviceTimeoutInNanos) {
+ if (serviceTimeoutInNanos > 0 &&
+ endTime - startTime > serviceTimeoutInNanos) {
LOG.warn("{} Background task execution took {}ns > {}ns(timeout)",
serviceName, endTime - startTime, serviceTimeoutInNanos);
}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
index c5c442627218..083750920d7b 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
@@ -158,23 +158,6 @@ public final class OzoneConfigKeys {
public static final String OZONE_BLOCK_DELETING_SERVICE_INTERVAL_DEFAULT
= "60s";
- /**
- * The interval of open key clean service.
- */
- public static final String OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL_SECONDS =
- "ozone.open.key.cleanup.service.interval.seconds";
- public static final int
- OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL_SECONDS_DEFAULT
- = 24 * 3600; // a total of 24 hour
-
- /**
- * An open key gets cleaned up when it is being in open state for too long.
- */
- public static final String OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS =
- "ozone.open.key.expire.threshold";
- public static final int OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS_DEFAULT =
- 24 * 3600;
-
public static final String OZONE_BLOCK_DELETING_SERVICE_TIMEOUT =
"ozone.block.deleting.service.timeout";
public static final String OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 47dbf39794fc..7af1bdc10726 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -1133,23 +1133,36 @@
- ozone.open.key.cleanup.service.interval.seconds
- 86400
+ ozone.om.open.key.cleanup.service.interval
+ 24hOZONE, OM, PERFORMANCE
- A background job periodically checks open key entries and delete the expired ones. This entry controls the
- interval of this cleanup check.
+ A background job that periodically checks open key entries and marks
+ expired open keys for deletion. This entry controls the interval of this
+ cleanup check.
- ozone.open.key.expire.threshold
- 86400
+ ozone.om.open.key.expire.threshold
+ 7dOZONE, OM, PERFORMANCE
Controls how long an open key operation is considered active. Specifically, if a key
has been open longer than the value of this config entry, that open key is considered as
- expired (e.g. due to client crash). Default to 24 hours.
+ expired (e.g. due to client crash).
+
+
+
+
+ ozone.om.open.key.cleanup.limit.per.task
+ 1000
+ OZONE, OM, PERFORMANCE
+
+ The maximum number of open keys to be identified as expired and marked
+ for deletion by one run of the open key cleanup service on the OM.
+ This property is used to throttle the actual number of open key deletions
+ on the OM.
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
index 3ad4ab9e0918..85c9bcb6e7b4 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
@@ -87,6 +87,21 @@ private OMConfigKeys() {
"ozone.key.deleting.limit.per.task";
public static final int OZONE_KEY_DELETING_LIMIT_PER_TASK_DEFAULT = 20000;
+ public static final String OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL =
+ "ozone.om.open.key.cleanup.service.interval";
+ public static final TimeDuration
+ OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL_DEFAULT =
+ TimeDuration.valueOf(24, TimeUnit.HOURS);
+
+ public static final String OZONE_OPEN_KEY_EXPIRE_THRESHOLD =
+ "ozone.om.open.key.expire.threshold";
+ public static final TimeDuration OZONE_OPEN_KEY_EXPIRE_THRESHOLD_DEFAULT =
+ TimeDuration.valueOf(7, TimeUnit.DAYS);
+
+ public static final String OZONE_OPEN_KEY_CLEANUP_LIMIT_PER_TASK =
+ "ozone.om.open.key.cleanup.limit.per.task";
+ public static final int OZONE_OPEN_KEY_CLEANUP_LIMIT_PER_TASK_DEFAULT = 1000;
+
public static final String OZONE_OM_METRICS_SAVE_INTERVAL =
"ozone.om.save.metrics.interval";
public static final String OZONE_OM_METRICS_SAVE_INTERVAL_DEFAULT = "5m";
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneHACluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneHACluster.java
index 051eb94d582e..3837b8023b6d 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneHACluster.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneHACluster.java
@@ -33,12 +33,13 @@
import java.io.IOException;
import java.util.UUID;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD;
-import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD;
/**
* This class tests MiniOzoneHAClusterImpl.
@@ -73,7 +74,7 @@ public void init() throws Exception {
conf.setBoolean(OZONE_ACL_ENABLED, true);
conf.set(OzoneConfigKeys.OZONE_ADMINISTRATORS,
OZONE_ADMINISTRATORS_WILDCARD);
- conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
+ conf.setTimeDuration(OZONE_OPEN_KEY_EXPIRE_THRESHOLD, 2, TimeUnit.SECONDS);
cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf)
.setClusterId(clusterId)
.setScmId(scmId)
@@ -81,6 +82,7 @@ public void init() throws Exception {
.setNumOfOzoneManagers(numOfOMs)
.build();
cluster.waitForClusterToBeReady();
+ cluster.restartOzoneManager();
objectStore = OzoneClientFactory.getRpcClient(omServiceId, conf)
.getObjectStore();
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java
index 2c66885e3534..c61daafcc3a4 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java
@@ -43,7 +43,6 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD;
-import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.om.OMDBCheckpointServlet.writeOmDBCheckpointToStream;
@@ -93,7 +92,6 @@ public void init() throws Exception {
omId = UUID.randomUUID().toString();
conf.setBoolean(OZONE_ACL_ENABLED, false);
conf.set(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD);
- conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
cluster = MiniOzoneCluster.newBuilder(conf)
.setClusterId(clusterId)
.setScmId(scmId)
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java
index 271109f0b3c7..08100cfa0c25 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmAcls.java
@@ -36,8 +36,8 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD;
-import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
import org.junit.AfterClass;
+
import static org.junit.Assert.assertTrue;
import org.junit.BeforeClass;
import org.junit.Rule;
@@ -80,7 +80,6 @@ public static void init() throws Exception {
scmId = UUID.randomUUID().toString();
omId = UUID.randomUUID().toString();
conf.setBoolean(OZONE_ACL_ENABLED, true);
- conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
conf.setClass(OZONE_ACL_AUTHORIZER_CLASS, OzoneAccessAuthorizerTest.class,
IAccessAuthorizer.class);
conf.setStrings(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD);
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmInit.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmInit.java
index 7eb548d7a098..9745e2da006c 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmInit.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmInit.java
@@ -18,12 +18,14 @@
import java.io.IOException;
import java.util.UUID;
+import java.util.concurrent.TimeUnit;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
-import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD;
+
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
@@ -65,7 +67,7 @@ public static void init() throws Exception {
clusterId = UUID.randomUUID().toString();
scmId = UUID.randomUUID().toString();
omId = UUID.randomUUID().toString();
- conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
+ conf.setTimeDuration(OZONE_OPEN_KEY_EXPIRE_THRESHOLD, 2, TimeUnit.SECONDS);
cluster = MiniOzoneCluster.newBuilder(conf)
.setClusterId(clusterId)
.setScmId(scmId)
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOpenKeyCleanupService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOpenKeyCleanupService.java
new file mode 100644
index 000000000000..09e00b163396
--- /dev/null
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOpenKeyCleanupService.java
@@ -0,0 +1,588 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.ozone.om;
+
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Random;
+
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.client.ObjectStore;
+import org.apache.hadoop.ozone.client.OzoneClientFactory;
+import org.apache.hadoop.ozone.common.BlockGroup;
+import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
+import org.apache.hadoop.ozone.om.request.TestOMRequestUtils;
+import org.apache.hadoop.test.LambdaTestUtils;
+import org.apache.ratis.util.TimeDuration;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
+
+/**
+ * Integration tests for the open key cleanup service on OM.
+ */
+@RunWith(Parameterized.class)
+public class TestOpenKeyCleanupService {
+ // Increase service interval of open key cleanup so we can trigger the
+ // service manually between setting up the DB and checking the results.
+ // Increase service interval of key deleting service to ensure it does not
+ // run during the tests, interfering with results.
+ private static final TimeDuration TESTING_SERVICE_INTERVAL =
+ TimeDuration.valueOf(24, TimeUnit.HOURS);
+ // High expiration time used so keys without modified creation time will not
+ // expire during the test.
+ private static final TimeDuration TESTING_EXPIRE_THRESHOLD =
+ TimeDuration.valueOf(24, TimeUnit.HOURS);
+ // Maximum number of keys to be cleaned up per run of the service.
+ private static final int TESTING_TASK_LIMIT = 10;
+ // Volume and bucket created and added to the DB that will hold open keys
+ // created by this test unless tests specify otherwise.
+ private static final String DEFAULT_VOLUME = "volume";
+ private static final String DEFAULT_BUCKET = "bucket";
+ // Time in milliseconds to wait for followers in the cluster to apply
+ // transactions.
+ private static final int FOLLOWER_WAIT_TIMEOUT = 10000;
+ // Time in milliseconds between checks that followers have applied
+ // transactions.
+ private static final int FOLLOWER_CHECK_INTERVAL = 1000;
+
+ private MiniOzoneCluster cluster;
+ private boolean isOMHA;
+ private List ozoneManagers;
+
+ // Parameterized to test open key cleanup in both OM HA and non-HA.
+ @Parameterized.Parameters
+ public static Iterable