Skip to content

Commit 7e82a4f

Browse files
committed
Do not log no-op reconnections at DEBUG (#53469)
Today the NodeConnectionsService emits a DEBUG-level log message each time it calls TransportService#connectToNode, which happens for every node in the cluster every ten seconds, and also at every cluster state update. That's a lot of log messages. Most of these calls are no-ops and can be ignored, but if the call was not a no-op then it may be worth investigating further. Since the logs do not distinguish the interesting and uninteresting cases, they are not useful. This commit distinguishes the two cases and pushes the noisy logging for the common no-op case down to TRACE level, leaving only useful and actionable information in the DEBUG-level logs.
1 parent 2f0aca9 commit 7e82a4f

File tree

2 files changed

+141
-16
lines changed

2 files changed

+141
-16
lines changed

server/src/main/java/org/elasticsearch/cluster/NodeConnectionsService.java

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -304,20 +304,32 @@ private class ConnectionTarget {
304304
@Override
305305
protected void doRun() {
306306
assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held";
307-
transportService.connectToNode(discoveryNode, new ActionListener<Void>() {
308-
@Override
309-
public void onResponse(Void aVoid) {
310-
assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held";
311-
consecutiveFailureCount.set(0);
312-
logger.debug("connected to {}", discoveryNode);
313-
onCompletion(ActivityType.CONNECTING, null, disconnectActivity);
314-
}
307+
if (transportService.nodeConnected(discoveryNode)) {
308+
// transportService.connectToNode is a no-op if already connected, but we don't want any DEBUG logging in this case
309+
// since we run this for every node on every cluster state update.
310+
logger.trace("still connected to {}", discoveryNode);
311+
onConnected();
312+
} else {
313+
logger.debug("connecting to {}", discoveryNode);
314+
transportService.connectToNode(discoveryNode, new ActionListener<Void>() {
315+
@Override
316+
public void onResponse(Void aVoid) {
317+
assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held";
318+
logger.debug("connected to {}", discoveryNode);
319+
onConnected();
320+
}
321+
322+
@Override
323+
public void onFailure(Exception e) {
324+
abstractRunnable.onFailure(e);
325+
}
326+
});
327+
}
328+
}
315329

316-
@Override
317-
public void onFailure(Exception e) {
318-
abstractRunnable.onFailure(e);
319-
}
320-
});
330+
private void onConnected() {
331+
consecutiveFailureCount.set(0);
332+
onCompletion(ActivityType.CONNECTING, null, disconnectActivity);
321333
}
322334

323335
@Override

server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java

Lines changed: 116 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.elasticsearch.cluster;
2121

22+
import org.apache.logging.log4j.Level;
23+
import org.apache.logging.log4j.LogManager;
2224
import org.elasticsearch.ElasticsearchTimeoutException;
2325
import org.elasticsearch.Version;
2426
import org.elasticsearch.action.ActionListener;
@@ -31,10 +33,13 @@
3133
import org.elasticsearch.common.UUIDs;
3234
import org.elasticsearch.common.component.Lifecycle;
3335
import org.elasticsearch.common.component.LifecycleListener;
36+
import org.elasticsearch.common.logging.Loggers;
3437
import org.elasticsearch.common.settings.Settings;
3538
import org.elasticsearch.common.transport.BoundTransportAddress;
3639
import org.elasticsearch.common.transport.TransportAddress;
3740
import org.elasticsearch.test.ESTestCase;
41+
import org.elasticsearch.test.MockLogAppender;
42+
import org.elasticsearch.test.junit.annotations.TestLogging;
3843
import org.elasticsearch.threadpool.TestThreadPool;
3944
import org.elasticsearch.threadpool.ThreadPool;
4045
import org.elasticsearch.transport.ConnectTransportException;
@@ -72,7 +77,6 @@
7277
public class NodeConnectionsServiceTests extends ESTestCase {
7378

7479
private ThreadPool threadPool;
75-
private MockTransport transport;
7680
private TransportService transportService;
7781
private Map<DiscoveryNode, CheckedRunnable<Exception>> nodeConnectionBlocks;
7882

@@ -301,6 +305,116 @@ public void testOnlyBlocksOnConnectionsToNewNodes() throws Exception {
301305
}
302306
}
303307

308+
@TestLogging(reason="testing that DEBUG-level logging is reasonable", value="org.elasticsearch.cluster.NodeConnectionsService:DEBUG")
309+
public void testDebugLogging() throws IllegalAccessException {
310+
final DeterministicTaskQueue deterministicTaskQueue
311+
= new DeterministicTaskQueue(builder().put(NODE_NAME_SETTING.getKey(), "node").build(), random());
312+
313+
MockTransport transport = new MockTransport(deterministicTaskQueue.getThreadPool());
314+
TestTransportService transportService = new TestTransportService(transport, deterministicTaskQueue.getThreadPool());
315+
transportService.start();
316+
transportService.acceptIncomingRequests();
317+
318+
final NodeConnectionsService service
319+
= new NodeConnectionsService(Settings.EMPTY, deterministicTaskQueue.getThreadPool(), transportService);
320+
service.start();
321+
322+
final List<DiscoveryNode> allNodes = generateNodes();
323+
final DiscoveryNodes targetNodes = discoveryNodesFromList(randomSubsetOf(allNodes));
324+
service.connectToNodes(targetNodes, () -> {});
325+
deterministicTaskQueue.runAllRunnableTasks();
326+
327+
// periodic reconnections to unexpectedly-disconnected nodes are logged
328+
final Set<DiscoveryNode> disconnectedNodes = new HashSet<>(randomSubsetOf(allNodes));
329+
for (DiscoveryNode disconnectedNode : disconnectedNodes) {
330+
transportService.disconnectFromNode(disconnectedNode);
331+
}
332+
MockLogAppender appender = new MockLogAppender();
333+
try {
334+
appender.start();
335+
Loggers.addAppender(LogManager.getLogger("org.elasticsearch.cluster.NodeConnectionsService"), appender);
336+
for (DiscoveryNode targetNode : targetNodes) {
337+
if (disconnectedNodes.contains(targetNode)) {
338+
appender.addExpectation(new MockLogAppender.SeenEventExpectation("connecting to " + targetNode,
339+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
340+
"connecting to " + targetNode));
341+
appender.addExpectation(new MockLogAppender.SeenEventExpectation("connected to " + targetNode,
342+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
343+
"connected to " + targetNode));
344+
} else {
345+
appender.addExpectation(new MockLogAppender.UnseenEventExpectation("connecting to " + targetNode,
346+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
347+
"connecting to " + targetNode));
348+
appender.addExpectation(new MockLogAppender.UnseenEventExpectation("connected to " + targetNode,
349+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
350+
"connected to " + targetNode));
351+
}
352+
}
353+
354+
runTasksUntil(deterministicTaskQueue, CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.get(Settings.EMPTY).millis());
355+
appender.assertAllExpectationsMatched();
356+
} finally {
357+
Loggers.removeAppender(LogManager.getLogger("org.elasticsearch.cluster.NodeConnectionsService"), appender);
358+
appender.stop();
359+
} for (DiscoveryNode disconnectedNode : disconnectedNodes) {
360+
transportService.disconnectFromNode(disconnectedNode);
361+
}
362+
363+
// changes to the expected set of nodes are logged, including reconnections to any unexpectedly-disconnected nodes
364+
final DiscoveryNodes newTargetNodes = discoveryNodesFromList(randomSubsetOf(allNodes));
365+
for (DiscoveryNode disconnectedNode : disconnectedNodes) {
366+
transportService.disconnectFromNode(disconnectedNode);
367+
}
368+
appender = new MockLogAppender();
369+
try {
370+
appender.start();
371+
Loggers.addAppender(LogManager.getLogger("org.elasticsearch.cluster.NodeConnectionsService"), appender);
372+
for (DiscoveryNode targetNode : targetNodes) {
373+
if (disconnectedNodes.contains(targetNode) && newTargetNodes.get(targetNode.getId()) != null) {
374+
appender.addExpectation(new MockLogAppender.SeenEventExpectation("connecting to " + targetNode,
375+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
376+
"connecting to " + targetNode));
377+
appender.addExpectation(new MockLogAppender.SeenEventExpectation("connected to " + targetNode,
378+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
379+
"connected to " + targetNode));
380+
} else {
381+
appender.addExpectation(new MockLogAppender.UnseenEventExpectation("connecting to " + targetNode,
382+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
383+
"connecting to " + targetNode));
384+
appender.addExpectation(new MockLogAppender.UnseenEventExpectation("connected to " + targetNode,
385+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
386+
"connected to " + targetNode));
387+
}
388+
if (newTargetNodes.get(targetNode.getId()) == null) {
389+
appender.addExpectation(new MockLogAppender.SeenEventExpectation("disconnected from " + targetNode,
390+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
391+
"disconnected from " + targetNode));
392+
}
393+
}
394+
for (DiscoveryNode targetNode : newTargetNodes) {
395+
appender.addExpectation(new MockLogAppender.UnseenEventExpectation("disconnected from " + targetNode,
396+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
397+
"disconnected from " + targetNode));
398+
if (targetNodes.get(targetNode.getId()) == null) {
399+
appender.addExpectation(new MockLogAppender.SeenEventExpectation("connecting to " + targetNode,
400+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
401+
"connecting to " + targetNode));
402+
appender.addExpectation(new MockLogAppender.SeenEventExpectation("connected to " + targetNode,
403+
"org.elasticsearch.cluster.NodeConnectionsService", Level.DEBUG,
404+
"connected to " + targetNode));
405+
}
406+
}
407+
408+
service.disconnectFromNodesExcept(newTargetNodes);
409+
service.connectToNodes(newTargetNodes, () -> {});
410+
deterministicTaskQueue.runAllRunnableTasks();
411+
appender.assertAllExpectationsMatched();
412+
} finally {
413+
Loggers.removeAppender(LogManager.getLogger("org.elasticsearch.cluster.NodeConnectionsService"), appender);
414+
appender.stop();
415+
}
416+
}
417+
304418
private void runTasksUntil(DeterministicTaskQueue deterministicTaskQueue, long endTimeMillis) {
305419
while (deterministicTaskQueue.getCurrentTimeMillis() < endTimeMillis) {
306420
if (deterministicTaskQueue.hasRunnableTasks() && randomBoolean()) {
@@ -339,9 +453,8 @@ public void setUp() throws Exception {
339453
super.setUp();
340454
ThreadPool threadPool = new TestThreadPool(getClass().getName());
341455
this.threadPool = threadPool;
342-
this.transport = new MockTransport(threadPool);
343456
nodeConnectionBlocks = newConcurrentMap();
344-
transportService = new TestTransportService(transport, threadPool);
457+
transportService = new TestTransportService(new MockTransport(threadPool), threadPool);
345458
transportService.start();
346459
transportService.acceptIncomingRequests();
347460
}

0 commit comments

Comments
 (0)