-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HADOOP-18288. Total requests and total requests per sec served by RPC servers #4431
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
725b57d
89959a3
23967fc
2d7e78f
943dbec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,9 +65,12 @@ | |
| import java.util.concurrent.BlockingQueue; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.LinkedBlockingQueue; | ||
| import java.util.concurrent.ScheduledExecutorService; | ||
| import java.util.concurrent.ScheduledThreadPoolExecutor; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import java.util.concurrent.atomic.LongAdder; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| import javax.security.sasl.Sasl; | ||
|
|
@@ -127,6 +130,8 @@ | |
| import org.apache.hadoop.tracing.TraceUtils; | ||
| import com.fasterxml.jackson.databind.ObjectMapper; | ||
| import org.apache.hadoop.classification.VisibleForTesting; | ||
|
|
||
| import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; | ||
| import org.apache.hadoop.thirdparty.protobuf.ByteString; | ||
| import org.apache.hadoop.thirdparty.protobuf.CodedOutputStream; | ||
| import org.apache.hadoop.thirdparty.protobuf.Message; | ||
|
|
@@ -500,6 +505,11 @@ protected ResponseBuffer initialValue() { | |
| private Responder responder = null; | ||
| private Handler[] handlers = null; | ||
| private final AtomicInteger numInProcessHandler = new AtomicInteger(); | ||
| private final LongAdder totalRequests = new LongAdder(); | ||
| private long lastSeenTotalRequests = 0; | ||
| private long totalRequestsPerSecond = 0; | ||
| private final long metricsUpdaterInterval; | ||
| private final ScheduledExecutorService scheduledExecutorService; | ||
|
|
||
| private boolean logSlowRPC = false; | ||
|
|
||
|
|
@@ -515,6 +525,14 @@ public int getNumInProcessHandler() { | |
| return numInProcessHandler.get(); | ||
| } | ||
|
|
||
| public long getTotalRequests() { | ||
| return totalRequests.sum(); | ||
| } | ||
|
|
||
| public long getTotalRequestsPerSecond() { | ||
| return totalRequestsPerSecond; | ||
| } | ||
|
|
||
| /** | ||
| * Sets slow RPC flag. | ||
| * @param logSlowRPCFlag input logSlowRPCFlag. | ||
|
|
@@ -578,6 +596,7 @@ void logSlowRpcCalls(String methodName, Call call, | |
| } | ||
|
|
||
| void updateMetrics(Call call, long startTime, boolean connDropped) { | ||
| totalRequests.increment(); | ||
| // delta = handler + processing + response | ||
| long deltaNanos = Time.monotonicNowNanos() - startTime; | ||
| long timestampNanos = call.timestampNanos; | ||
|
|
@@ -3304,6 +3323,14 @@ protected Server(String bindAddress, int port, | |
| this.exceptionsHandler.addTerseLoggingExceptions(StandbyException.class); | ||
| this.exceptionsHandler.addTerseLoggingExceptions( | ||
| HealthCheckFailedException.class); | ||
| this.metricsUpdaterInterval = | ||
| conf.getLong(CommonConfigurationKeysPublic.IPC_SERVER_METRICS_UPDATE_RUNNER_INTERVAL, | ||
| CommonConfigurationKeysPublic.IPC_SERVER_METRICS_UPDATE_RUNNER_INTERVAL_DEFAULT); | ||
| this.scheduledExecutorService = new ScheduledThreadPoolExecutor(1, | ||
| new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Hadoop-Metrics-Updater-%d") | ||
| .build()); | ||
| this.scheduledExecutorService.scheduleWithFixedDelay(new MetricsUpdateRunner(), | ||
| metricsUpdaterInterval, metricsUpdaterInterval, TimeUnit.MILLISECONDS); | ||
| } | ||
|
|
||
| public synchronized void addAuxiliaryListener(int auxiliaryPort) | ||
|
|
@@ -3598,10 +3625,25 @@ public synchronized void stop() { | |
| } | ||
| responder.interrupt(); | ||
| notifyAll(); | ||
| shutdownMetricsUpdaterExecutor(); | ||
| this.rpcMetrics.shutdown(); | ||
| this.rpcDetailedMetrics.shutdown(); | ||
| } | ||
|
|
||
| private void shutdownMetricsUpdaterExecutor() { | ||
| this.scheduledExecutorService.shutdown(); | ||
| try { | ||
| boolean isExecutorShutdown = | ||
| this.scheduledExecutorService.awaitTermination(3, TimeUnit.SECONDS); | ||
| if (!isExecutorShutdown) { | ||
| LOG.info("Hadoop Metrics Updater executor could not be shutdown."); | ||
| } | ||
| } catch (InterruptedException e) { | ||
| Thread.currentThread().interrupt(); | ||
| LOG.info("Hadoop Metrics Updater executor shutdown interrupted.", e); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Wait for the server to be stopped. | ||
| * Does not wait for all subthreads to finish. | ||
|
|
@@ -4061,4 +4103,32 @@ protected int getMaxIdleTime() { | |
| public String getServerName() { | ||
| return serverName; | ||
| } | ||
|
|
||
| /** | ||
| * Server metrics updater thread, used to update some metrics on a regular basis. | ||
| * For instance, requests per second. | ||
| */ | ||
| private class MetricsUpdateRunner implements Runnable { | ||
|
|
||
| private long lastExecuted = 0; | ||
|
|
||
| @Override | ||
| public synchronized void run() { | ||
| long currentTime = Time.monotonicNow(); | ||
| if (lastExecuted == 0) { | ||
| lastExecuted = currentTime - metricsUpdaterInterval; | ||
| } | ||
| long currentTotalRequests = totalRequests.sum(); | ||
| long totalRequestsDiff = currentTotalRequests - lastSeenTotalRequests; | ||
| lastSeenTotalRequests = currentTotalRequests; | ||
| if ((currentTime - lastExecuted) > 0) { | ||
| double totalRequestsPerMillis = | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The unit here is |
||
| (double) totalRequestsDiff / TimeUnit.MILLISECONDS.toSeconds( | ||
| currentTime - lastExecuted); | ||
| totalRequestsPerSecond = ((long) totalRequestsPerMillis); | ||
| } | ||
| lastExecuted = currentTime; | ||
| } | ||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -19,6 +19,8 @@ | |||||
| package org.apache.hadoop.ipc; | ||||||
|
|
||||||
| import org.apache.hadoop.ipc.metrics.RpcMetrics; | ||||||
|
|
||||||
| import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; | ||||||
| import org.apache.hadoop.thirdparty.protobuf.ServiceException; | ||||||
| import org.apache.hadoop.HadoopIllegalArgumentException; | ||||||
| import org.apache.hadoop.conf.Configuration; | ||||||
|
|
@@ -84,6 +86,7 @@ | |||||
| import java.util.concurrent.ThreadLocalRandom; | ||||||
| import java.util.concurrent.TimeUnit; | ||||||
| import java.util.concurrent.atomic.AtomicBoolean; | ||||||
| import java.util.concurrent.atomic.AtomicInteger; | ||||||
| import java.util.concurrent.atomic.AtomicReference; | ||||||
|
|
||||||
| import static org.assertj.core.api.Assertions.assertThat; | ||||||
|
|
@@ -1697,6 +1700,61 @@ public void testRpcMetricsInNanos() throws Exception { | |||||
| } | ||||||
| } | ||||||
|
|
||||||
| @Test | ||||||
| public void testNumTotalRequestsMetrics() throws Exception { | ||||||
| UserGroupInformation ugi = UserGroupInformation. | ||||||
| createUserForTesting("userXyz", new String[0]); | ||||||
|
|
||||||
| final Server server = setupTestServer(conf, 1); | ||||||
|
|
||||||
| ExecutorService executorService = null; | ||||||
| try { | ||||||
| RpcMetrics rpcMetrics = server.getRpcMetrics(); | ||||||
| assertEquals(0, rpcMetrics.getTotalRequests()); | ||||||
| assertEquals(0.0, rpcMetrics.getTotalRequestsPerSecond(), 0.0); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. RpcMetrics#getTotalRequestsPerSecond return
Suggested change
|
||||||
|
|
||||||
| List<ExternalCall<Void>> externalCallList = new ArrayList<>(); | ||||||
|
|
||||||
| executorService = Executors.newSingleThreadExecutor( | ||||||
| new ThreadFactoryBuilder().setDaemon(true).setNameFormat("testNumTotalRequestsMetrics") | ||||||
| .build()); | ||||||
| AtomicInteger rps = new AtomicInteger(0); | ||||||
| CountDownLatch countDownLatch = new CountDownLatch(1); | ||||||
| executorService.submit(() -> { | ||||||
| while (true) { | ||||||
| int numRps = (int) rpcMetrics.getTotalRequestsPerSecond(); | ||||||
| rps.getAndSet(numRps); | ||||||
| if (rps.get() > 0) { | ||||||
| countDownLatch.countDown(); | ||||||
| break; | ||||||
| } | ||||||
| } | ||||||
| }); | ||||||
|
|
||||||
| for (int i = 0; i < 100000; i++) { | ||||||
| externalCallList.add(newExtCall(ugi, () -> null)); | ||||||
| } | ||||||
| for (ExternalCall<Void> externalCall : externalCallList) { | ||||||
| server.queueCall(externalCall); | ||||||
| } | ||||||
| for (ExternalCall<Void> externalCall : externalCallList) { | ||||||
| externalCall.get(); | ||||||
| } | ||||||
|
|
||||||
| assertEquals(100000, rpcMetrics.getTotalRequests()); | ||||||
| if (countDownLatch.await(10, TimeUnit.SECONDS)) { | ||||||
| assertTrue(rps.get() > 10); | ||||||
| } else { | ||||||
| throw new AssertionError("total requests per seconds are still 0"); | ||||||
| } | ||||||
| } finally { | ||||||
| if (executorService != null) { | ||||||
| executorService.shutdown(); | ||||||
| } | ||||||
| server.stop(); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| public static void main(String[] args) throws Exception { | ||||||
| new TestRPC().testCallsInternal(conf); | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -123,6 +123,7 @@ public void testRequeueCall() throws Exception { | |
| + CommonConfigurationKeys.IPC_BACKOFF_ENABLE, true); | ||
|
|
||
| NameNodeAdapter.getRpcServer(nn).refreshCallQueue(configuration); | ||
| assertTrue(NameNodeAdapter.getRpcServer(nn).getTotalRequests() > 0); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you have an assertion where a useful message isn't autogenerated the way assertequals does, afraid you will need to add a message or use assertJ to describe the assert. sorry.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, this is bit painful for debugging purpose. Updated with assertJ as it has better APIs to deal with such conditions (
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks! it is pretty verbose for the little things, but i find assertj lovely for complex things, especially collections |
||
|
|
||
| dfs.create(testPath, (short)1).close(); | ||
| assertSentTo(0); | ||
|
|
@@ -132,6 +133,7 @@ public void testRequeueCall() throws Exception { | |
| // be triggered and client should retry active NN. | ||
| dfs.getFileStatus(testPath); | ||
| assertSentTo(0); | ||
| assertTrue(NameNodeAdapter.getRpcServer(nn).getTotalRequests() > 1); | ||
| // reset the original call queue | ||
| NameNodeAdapter.getRpcServer(nn).refreshCallQueue(originalConf); | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.