From 775efa23c1866aec3b77d704dc4ac4c2fa156536 Mon Sep 17 00:00:00 2001 From: Briana Augenreich Date: Wed, 24 Aug 2022 14:35:14 -0400 Subject: [PATCH 1/3] Report client backoff time for server overloaded --- .../hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java | 4 ++++ .../hadoop/hbase/client/AsyncRpcRetryingCaller.java | 4 ++++ .../client/AsyncScanSingleRegionRpcRetryingCaller.java | 7 +++++++ .../org/apache/hadoop/hbase/client/MetricsConnection.java | 8 ++++++++ 4 files changed, 23 insertions(+) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java index 0798915c08de..c6ea73a69a14 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java @@ -487,6 +487,10 @@ private void tryResubmit(Stream actions, int tries, boolean immediately, } else { delayNs = getPauseTime(pauseNsToUse, tries - 1); } + Optional metrics = conn.getConnectionMetrics(); + if(isServerOverloaded){ + metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); + } retryTimer.newTimeout(t -> groupAndSend(actions, tries + 1), delayNs, TimeUnit.NANOSECONDS); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java index a19d3b039f18..3fa5d681cf72 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java @@ -139,6 +139,10 @@ private void tryScheduleRetry(Throwable error) { delayNs = getPauseTime(pauseNsToUse, tries - 1); } tries++; + Optional metrics = conn.getConnectionMetrics(); + if(HBaseServerException.isServerOverloaded(error)){ + metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); + } retryTimer.newTimeout(t -> doCall(), delayNs, TimeUnit.NANOSECONDS); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java index dbaae5c26e2e..7934e9fcd7ca 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java @@ -113,6 +113,8 @@ class AsyncScanSingleRegionRpcRetryingCaller { private final Runnable completeWhenNoMoreResultsInRegion; + private final AsyncConnectionImpl conn; + private final CompletableFuture future; private final HBaseRpcController controller; @@ -318,6 +320,7 @@ public AsyncScanSingleRegionRpcRetryingCaller(Timer retryTimer, AsyncConnectionI long pauseNsForServerOverloaded, int maxAttempts, long scanTimeoutNs, long rpcTimeoutNs, int startLogErrorsCnt) { this.retryTimer = retryTimer; + this.conn = conn; this.scan = scan; this.scanMetrics = scanMetrics; this.scannerId = scannerId; @@ -441,6 +444,10 @@ private void onError(Throwable error) { return; } tries++; + Optional metrics = conn.getConnectionMetrics(); + if(HBaseServerException.isServerOverloaded(error)){ + metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); + } retryTimer.newTimeout(t -> call(), delayNs, TimeUnit.NANOSECONDS); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java index dc452bcd9d9a..52f888ae8d4d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java @@ -315,6 +315,7 @@ public Counter newMetric(Class clazz, String name, String scope) { protected final Histogram numActionsPerServerHist; protected final Counter nsLookups; protected final Counter nsLookupsFailed; + protected final Timer overloadedBackoffTimer; // dynamic metrics @@ -376,6 +377,8 @@ protected Ratio getRatio() { registry.histogram(name(MetricsConnection.class, "numActionsPerServer", scope)); this.nsLookups = registry.counter(name(this.getClass(), NS_LOOKUPS, scope)); this.nsLookupsFailed = registry.counter(name(this.getClass(), NS_LOOKUPS_FAILED, scope)); + this.overloadedBackoffTimer = registry.timer(name(this.getClass(), + "overloadedBackoffDurationMs", scope)); this.reporter = JmxReporter.forRegistry(this.registry).build(); this.reporter.start(); @@ -449,6 +452,11 @@ public void incrDelayRunnersAndUpdateDelayInterval(long interval) { this.runnerStats.updateDelayInterval(interval); } + /** Update the overloaded backoff time **/ + public void incrementServerOverloadedBackoffTime(long time, TimeUnit timeUnit){ + overloadedBackoffTimer.update(time, timeUnit); + } + /** * Get a metric for {@code key} from {@code map}, or create it with {@code factory}. */ From 9301ca94d6bea17326f146411093962da898a506 Mon Sep 17 00:00:00 2001 From: Briana Augenreich Date: Wed, 24 Aug 2022 14:49:57 -0400 Subject: [PATCH 2/3] formatting --- .../hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java | 2 +- .../apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java | 2 +- .../client/AsyncScanSingleRegionRpcRetryingCaller.java | 2 +- .../org/apache/hadoop/hbase/client/MetricsConnection.java | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java index c6ea73a69a14..c4b2a7c46faa 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java @@ -488,7 +488,7 @@ private void tryResubmit(Stream actions, int tries, boolean immediately, delayNs = getPauseTime(pauseNsToUse, tries - 1); } Optional metrics = conn.getConnectionMetrics(); - if(isServerOverloaded){ + if (isServerOverloaded) { metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); } retryTimer.newTimeout(t -> groupAndSend(actions, tries + 1), delayNs, TimeUnit.NANOSECONDS); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java index 3fa5d681cf72..0d81c7b4af43 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java @@ -140,7 +140,7 @@ private void tryScheduleRetry(Throwable error) { } tries++; Optional metrics = conn.getConnectionMetrics(); - if(HBaseServerException.isServerOverloaded(error)){ + if (HBaseServerException.isServerOverloaded(error)) { metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); } retryTimer.newTimeout(t -> doCall(), delayNs, TimeUnit.NANOSECONDS); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java index 7934e9fcd7ca..0a5c54108cb8 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java @@ -445,7 +445,7 @@ private void onError(Throwable error) { } tries++; Optional metrics = conn.getConnectionMetrics(); - if(HBaseServerException.isServerOverloaded(error)){ + if (HBaseServerException.isServerOverloaded(error)) { metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); } retryTimer.newTimeout(t -> call(), delayNs, TimeUnit.NANOSECONDS); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java index 52f888ae8d4d..f844c47e4065 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java @@ -377,8 +377,8 @@ protected Ratio getRatio() { registry.histogram(name(MetricsConnection.class, "numActionsPerServer", scope)); this.nsLookups = registry.counter(name(this.getClass(), NS_LOOKUPS, scope)); this.nsLookupsFailed = registry.counter(name(this.getClass(), NS_LOOKUPS_FAILED, scope)); - this.overloadedBackoffTimer = registry.timer(name(this.getClass(), - "overloadedBackoffDurationMs", scope)); + this.overloadedBackoffTimer = + registry.timer(name(this.getClass(), "overloadedBackoffDurationMs", scope)); this.reporter = JmxReporter.forRegistry(this.registry).build(); this.reporter.start(); @@ -453,7 +453,7 @@ public void incrDelayRunnersAndUpdateDelayInterval(long interval) { } /** Update the overloaded backoff time **/ - public void incrementServerOverloadedBackoffTime(long time, TimeUnit timeUnit){ + public void incrementServerOverloadedBackoffTime(long time, TimeUnit timeUnit) { overloadedBackoffTimer.update(time, timeUnit); } From 6ed54ed2e3d690440d2ad16dacfbdb202ce0973d Mon Sep 17 00:00:00 2001 From: Briana Augenreich Date: Wed, 31 Aug 2022 09:53:30 -0400 Subject: [PATCH 3/3] PR feedback: move metric inside if block --- .../hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java | 3 ++- .../org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java | 2 +- .../hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java index c4b2a7c46faa..49cf75892072 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java @@ -487,8 +487,9 @@ private void tryResubmit(Stream actions, int tries, boolean immediately, } else { delayNs = getPauseTime(pauseNsToUse, tries - 1); } - Optional metrics = conn.getConnectionMetrics(); + if (isServerOverloaded) { + Optional metrics = conn.getConnectionMetrics(); metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); } retryTimer.newTimeout(t -> groupAndSend(actions, tries + 1), delayNs, TimeUnit.NANOSECONDS); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java index 0d81c7b4af43..04e227108388 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java @@ -139,8 +139,8 @@ private void tryScheduleRetry(Throwable error) { delayNs = getPauseTime(pauseNsToUse, tries - 1); } tries++; - Optional metrics = conn.getConnectionMetrics(); if (HBaseServerException.isServerOverloaded(error)) { + Optional metrics = conn.getConnectionMetrics(); metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); } retryTimer.newTimeout(t -> doCall(), delayNs, TimeUnit.NANOSECONDS); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java index 0a5c54108cb8..3ef7b9b6cccc 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java @@ -444,8 +444,9 @@ private void onError(Throwable error) { return; } tries++; - Optional metrics = conn.getConnectionMetrics(); + if (HBaseServerException.isServerOverloaded(error)) { + Optional metrics = conn.getConnectionMetrics(); metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); } retryTimer.newTimeout(t -> call(), delayNs, TimeUnit.NANOSECONDS);