Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,7 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool
// Query the meta region
long pauseBase = connectionConfig.getPauseMillis();
takeUserRegionLock();
final long lockStartTime = EnvironmentEdgeManager.currentTime();
try {
// We don't need to check if useCache is enabled or not. Even if useCache is false
// we already cleared the cache for this row before acquiring userRegion lock so if this
Expand Down Expand Up @@ -1113,6 +1114,10 @@ rpcControllerFactory, getMetaLookupPool(), connectionConfig.getMetaReadRpcTimeou
}
} finally {
userRegionLock.unlock();
// update duration of the lock being held
if (metrics != null) {
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
}
}
try {
Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
Expand All @@ -1126,9 +1131,19 @@ rpcControllerFactory, getMetaLookupPool(), connectionConfig.getMetaReadRpcTimeou
void takeUserRegionLock() throws IOException {
try {
long waitTime = connectionConfig.getMetaOperationTimeout();
if (metrics != null) {
metrics.updateUserRegionLockQueue(userRegionLock.getQueueLength());
}
final long waitStartTime = EnvironmentEdgeManager.currentTime();
if (!userRegionLock.tryLock(waitTime, TimeUnit.MILLISECONDS)) {
if (metrics != null) {
metrics.incrUserRegionLockTimeout();
}
throw new LockTimeoutException("Failed to get user region lock in" + waitTime + " ms. "
+ " for accessing meta region server.");
} else if (metrics != null) {
// successfully grabbed the lock, start timer of holding the lock
metrics.updateUserRegionLockWaiting(EnvironmentEdgeManager.currentTime() - waitStartTime);
}
} catch (InterruptedException ie) {
LOG.error("Interrupted while waiting for a lock", ie);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ public Counter newMetric(Class<?> clazz, String name, String scope) {
private final Counter nsLookups;
private final Counter nsLookupsFailed;
private final Timer overloadedBackoffTimer;
private final Counter userRegionLockTimeoutCount;
private final Timer userRegionLockWaitingTimer;
private final Timer userRegionLockHeldTimer;
private final Histogram userRegionLockQueueHist;

// dynamic metrics

Expand Down Expand Up @@ -443,6 +447,15 @@ protected Ratio getRatio() {
this.nsLookups = registry.counter(name(this.getClass(), NS_LOOKUPS, scope));
this.nsLookupsFailed = registry.counter(name(this.getClass(), NS_LOOKUPS_FAILED, scope));

this.userRegionLockTimeoutCount =
registry.counter(name(this.getClass(), "userRegionLockTimeoutCount", scope));
this.userRegionLockWaitingTimer =
registry.timer(name(this.getClass(), "userRegionLockWaitingDuration", scope));
this.userRegionLockHeldTimer =
registry.timer(name(this.getClass(), "userRegionLockHeldDuration", scope));
this.userRegionLockQueueHist =
registry.histogram(name(MetricsConnection.class, "userRegionLockQueueLength", scope));

this.overloadedBackoffTimer =
registry.timer(name(this.getClass(), "overloadedBackoffDurationMs", scope));

Expand Down Expand Up @@ -598,6 +611,41 @@ public void incrementServerOverloadedBackoffTime(long time, TimeUnit timeUnit) {
overloadedBackoffTimer.update(time, timeUnit);
}

/** incr */
public void incrUserRegionLockTimeout() {
userRegionLockTimeoutCount.inc();
}

/** get */
public Counter getUserRegionLockTimeout() {
return userRegionLockTimeoutCount;
}

public Timer getUserRegionLockWaitingTimer() {
return userRegionLockWaitingTimer;
}

public Timer getUserRegionLockHeldTimer() {
return userRegionLockHeldTimer;
}

public Histogram getUserRegionLockQueue() {
return userRegionLockQueueHist;
}

/** update */
public void updateUserRegionLockWaiting(long duration) {
userRegionLockWaitingTimer.update(duration, TimeUnit.MILLISECONDS);
}

public void updateUserRegionLockHeld(long duration) {
userRegionLockHeldTimer.update(duration, TimeUnit.MILLISECONDS);
}

public void updateUserRegionLockQueue(int count) {
userRegionLockQueueHist.update(count);
}

/** Return the connection count of the metrics within a scope */
public long getConnectionCount() {
return connectionCount.getCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0);
conf.setLong(HConstants.HBASE_CLIENT_META_OPERATION_TIMEOUT, 2000);
conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 2000);
conf.setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, true);

try (ConnectionImplementation conn =
(ConnectionImplementation) ConnectionFactory.createConnection(conf)) {
Expand All @@ -459,6 +460,28 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE

assertTrue(client1.getException() instanceof LockTimeoutException
^ client2.getException() instanceof LockTimeoutException);

// obtain the client metrics
MetricsConnection metrics = conn.getConnectionMetrics();
long queueCount = metrics.getUserRegionLockQueue().getCount();
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount,
queueCount, 2);

long timeoutCount = metrics.getUserRegionLockTimeout().getCount();
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount,
timeoutCount, 1);

long waitingTimerCount = metrics.getUserRegionLockWaitingTimer().getCount();
assertEquals("userRegionLock should be grabbed successfully once. waitingTimerCount: "
+ waitingTimerCount, waitingTimerCount, 1);

long heldTimerCount = metrics.getUserRegionLockHeldTimer().getCount();
assertEquals(
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount,
heldTimerCount, 1);
double heldTime = metrics.getUserRegionLockHeldTimer().getSnapshot().getMax();
assertTrue("Max held time should be greater than 2 seconds. heldTime: " + heldTime,
heldTime >= 2E9);
}
}

Expand Down