Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
fe14a85
Remove extra serialization from getBlockID
errose28 Oct 2, 2020
3823d25
Initial setup for open key cleanup service implementation
errose28 Oct 7, 2020
eddf5ea
Update docs and formatting for open key cleanup service
errose28 Oct 8, 2020
0c0b8ef
Use TimeDuration to specify open key expiration time
errose28 Oct 8, 2020
4cfc293
Add open key cleanup service start and stop to key manager
errose28 Oct 8, 2020
f66008a
Disable service timeout for open key cleanup
errose28 Oct 8, 2020
8ec0a42
Make unit test use TimeDuration instead of long
errose28 Oct 9, 2020
1604d8b
Add initial implementation of open key cleanup integration tests
errose28 Oct 9, 2020
ee18fc7
Fix incorrect unit being used to read in time duration configurations
errose28 Oct 12, 2020
4f118f3
Implement open key creation method and remove unused methods
errose28 Oct 12, 2020
b1816d4
Add more robust volume and bcuket testing
errose28 Oct 14, 2020
7100ff7
Merge branch 'master' into HDDS-4123
errose28 Oct 14, 2020
97cff3f
Fix misnamed configuration
errose28 Oct 14, 2020
10bff2e
Add extra tests for to make sure open keys are added in setup
errose28 Oct 14, 2020
efadaf0
Add first draft setup using mini ozone cluster
errose28 Oct 19, 2020
6e5a5cc
Merge branch 'master' into HDDS-4123
errose28 Oct 19, 2020
e3caed3
Add checks based on key names, not just key counts
errose28 Oct 19, 2020
ce65c91
Move integration test for open key cleanup to the correct directory
errose28 Oct 20, 2020
66cf297
Rename all instances of old key expire threshold configuration to the…
errose28 Oct 20, 2020
a49978f
Add delete open key request type to ratis utils switch statement
errose28 Oct 20, 2020
5280d69
All integration tests with mini ozone cluster pass
errose28 Oct 20, 2020
db65a06
Add test without artifical key expiration of service firing
errose28 Oct 20, 2020
73a2419
Add documentation and minor code reformatting
errose28 Oct 21, 2020
0df85dc
Add new config properties to xml config
errose28 Oct 21, 2020
1021fd3
Correct hour unit in ozone-defaults.xml
errose28 Oct 21, 2020
9fd547e
Fix checkstyle violations
errose28 Oct 21, 2020
845ddaf
Fix indentation issues
errose28 Oct 21, 2020
6597e50
Update default config settings
errose28 Oct 28, 2020
f1d980a
Fix time unit in ozone-defaults.xml
errose28 Oct 28, 2020
895219c
Merge branch 'master' into HDDS-4123
errose28 Nov 18, 2020
04f763f
Retrigger CI
errose28 Nov 19, 2020
893ea5e
Add om prefix to new config names
errose28 Dec 1, 2020
4d89c18
Remove unnecessary open key expiration configs from tests
errose28 Dec 1, 2020
d539d25
Fix copy paste errors from KeyDeletingService
errose28 Dec 1, 2020
3dc26ad
Merge branch 'master' into HDDS-4123
errose28 Dec 1, 2020
5ccb8b6
Use new isLeaderReady() call
errose28 Dec 2, 2020
c8de2b9
Add testing and handling for keys with slashes in them
errose28 Dec 2, 2020
4860806
Add separate code path for HA and non-HA in cleanup service
errose28 Dec 4, 2020
5a0e172
First draft of parameterized HA/non-HA test
errose28 Dec 4, 2020
231922c
HA tests pass
errose28 Jan 4, 2021
656da18
Fix cluster configuration for non-HA, and add more comments
errose28 Jan 5, 2021
c74b01e
Fix checkstyle violations
errose28 Jan 5, 2021
38913c9
Merge branch 'master' into HDDS-4123
errose28 Feb 1, 2021
5a69252
Fix checkstyle and incorporate Ratis API changes
errose28 Feb 1, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ public abstract class BackgroundService {
private final TimeUnit unit;
private final PeriodicalTask service;

public BackgroundService(String serviceName, long interval,
TimeUnit unit, int threadPoolSize) {
// Set service timeout to 0 to disable.
this(serviceName, interval, unit, threadPoolSize, 0);
}

public BackgroundService(String serviceName, long interval,
TimeUnit unit, int threadPoolSize, long serviceTimeout) {
this.interval = interval;
Expand Down Expand Up @@ -117,7 +123,8 @@ public synchronized void run() {
LOG.warn("Background task execution failed", e);
} finally {
long endTime = System.nanoTime();
if (endTime - startTime > serviceTimeoutInNanos) {
if (serviceTimeoutInNanos > 0 &&
endTime - startTime > serviceTimeoutInNanos) {
LOG.warn("{} Background task execution took {}ns > {}ns(timeout)",
serviceName, endTime - startTime, serviceTimeoutInNanos);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,23 +158,6 @@ public final class OzoneConfigKeys {
public static final String OZONE_BLOCK_DELETING_SERVICE_INTERVAL_DEFAULT
= "60s";

/**
* The interval of open key clean service.
*/
public static final String OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL_SECONDS =
"ozone.open.key.cleanup.service.interval.seconds";
public static final int
OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL_SECONDS_DEFAULT
= 24 * 3600; // a total of 24 hour

/**
* An open key gets cleaned up when it is being in open state for too long.
*/
public static final String OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS =
"ozone.open.key.expire.threshold";
public static final int OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS_DEFAULT =
24 * 3600;

public static final String OZONE_BLOCK_DELETING_SERVICE_TIMEOUT =
"ozone.block.deleting.service.timeout";
public static final String OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT
Expand Down
27 changes: 20 additions & 7 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1133,23 +1133,36 @@
</property>

<property>
<name>ozone.open.key.cleanup.service.interval.seconds</name>
<value>86400</value>
<name>ozone.om.open.key.cleanup.service.interval</name>
<value>24h</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>
A background job periodically checks open key entries and delete the expired ones. This entry controls the
interval of this cleanup check.
A background job that periodically checks open key entries and marks
expired open keys for deletion. This entry controls the interval of this
cleanup check.
</description>
</property>

<property>
<name>ozone.open.key.expire.threshold</name>
<value>86400</value>
<name>ozone.om.open.key.expire.threshold</name>
<value>7d</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>
Controls how long an open key operation is considered active. Specifically, if a key
has been open longer than the value of this config entry, that open key is considered as
expired (e.g. due to client crash). Default to 24 hours.
expired (e.g. due to client crash).
</description>
</property>

<property>
<name>ozone.om.open.key.cleanup.limit.per.task</name>
<value>1000</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>
The maximum number of open keys to be identified as expired and marked
for deletion by one run of the open key cleanup service on the OM.
This property is used to throttle the actual number of open key deletions
on the OM.
</description>
</property>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,21 @@ private OMConfigKeys() {
"ozone.key.deleting.limit.per.task";
public static final int OZONE_KEY_DELETING_LIMIT_PER_TASK_DEFAULT = 20000;

public static final String OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: Do we want to append om to these configs?
Generally, most OM configs have "ozone.om"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I added the ozone.om prefix.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you update the config variable name OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL also to include "OM".
The convention for variable names is to have the exact words as is in the config key.

"ozone.om.open.key.cleanup.service.interval";
public static final TimeDuration
OZONE_OPEN_KEY_CLEANUP_SERVICE_INTERVAL_DEFAULT =
TimeDuration.valueOf(24, TimeUnit.HOURS);

public static final String OZONE_OPEN_KEY_EXPIRE_THRESHOLD =
"ozone.om.open.key.expire.threshold";
public static final TimeDuration OZONE_OPEN_KEY_EXPIRE_THRESHOLD_DEFAULT =
TimeDuration.valueOf(7, TimeUnit.DAYS);

public static final String OZONE_OPEN_KEY_CLEANUP_LIMIT_PER_TASK =
"ozone.om.open.key.cleanup.limit.per.task";
public static final int OZONE_OPEN_KEY_CLEANUP_LIMIT_PER_TASK_DEFAULT = 1000;

public static final String OZONE_OM_METRICS_SAVE_INTERVAL =
"ozone.om.save.metrics.interval";
public static final String OZONE_OM_METRICS_SAVE_INTERVAL_DEFAULT = "5m";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@

import java.io.IOException;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;

import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD;

/**
* This class tests MiniOzoneHAClusterImpl.
Expand Down Expand Up @@ -73,14 +74,15 @@ public void init() throws Exception {
conf.setBoolean(OZONE_ACL_ENABLED, true);
conf.set(OzoneConfigKeys.OZONE_ADMINISTRATORS,
OZONE_ADMINISTRATORS_WILDCARD);
conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
conf.setTimeDuration(OZONE_OPEN_KEY_EXPIRE_THRESHOLD, 2, TimeUnit.SECONDS);
cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf)
.setClusterId(clusterId)
.setScmId(scmId)
.setOMServiceId(omServiceId)
.setNumOfOzoneManagers(numOfOMs)
.build();
cluster.waitForClusterToBeReady();
cluster.restartOzoneManager();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need OM restart in init?

objectStore = OzoneClientFactory.getRpcClient(omServiceId, conf)
.getObjectStore();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.om.OMDBCheckpointServlet.writeOmDBCheckpointToStream;

Expand Down Expand Up @@ -93,7 +92,6 @@ public void init() throws Exception {
omId = UUID.randomUUID().toString();
conf.setBoolean(OZONE_ACL_ENABLED, false);
conf.set(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD);
conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
cluster = MiniOzoneCluster.newBuilder(conf)
.setClusterId(clusterId)
.setScmId(scmId)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
import org.junit.AfterClass;

import static org.junit.Assert.assertTrue;
import org.junit.BeforeClass;
import org.junit.Rule;
Expand Down Expand Up @@ -80,7 +80,6 @@ public static void init() throws Exception {
scmId = UUID.randomUUID().toString();
omId = UUID.randomUUID().toString();
conf.setBoolean(OZONE_ACL_ENABLED, true);
conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
conf.setClass(OZONE_ACL_AUTHORIZER_CLASS, OzoneAccessAuthorizerTest.class,
IAccessAuthorizer.class);
conf.setStrings(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@

import java.io.IOException;
import java.util.UUID;
import java.util.concurrent.TimeUnit;

import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.security.authentication.client.AuthenticationException;

import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OPEN_KEY_EXPIRE_THRESHOLD;

import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
Expand Down Expand Up @@ -65,7 +67,7 @@ public static void init() throws Exception {
clusterId = UUID.randomUUID().toString();
scmId = UUID.randomUUID().toString();
omId = UUID.randomUUID().toString();
conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
conf.setTimeDuration(OZONE_OPEN_KEY_EXPIRE_THRESHOLD, 2, TimeUnit.SECONDS);
cluster = MiniOzoneCluster.newBuilder(conf)
.setClusterId(clusterId)
.setScmId(scmId)
Expand Down
Loading