From 3ee768923d3567c6ef854ebcf3560ef3ecccfac5 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Feb 2023 12:45:59 +0000 Subject: [PATCH 1/3] Capture thread dump on ShardLockObtainFailedException We sometimes see a `ShardLockObtainFailedException` when a shard failed to shut down as fast as we expected, often because a node left and rejoined the cluster. Sometimes this is because it was held open by ongoing scrolls or PITs, but other times it may be because the shutdown process itself is too slow. With this commit we add the ability to capture and log a thread dump at the time of the failure to give us more information about where the shutdown process might be running slowly. Relates #93226 --- .../discovery/fault-detection.asciidoc | 36 ++++++++ .../elasticsearch/common/ReferenceDocs.java | 1 + .../elasticsearch/env/NodeEnvironment.java | 62 ++++++++++--- .../common/reference-docs-links.json | 3 +- .../env/NodeEnvironmentTests.java | 86 ++++++++++++------- 5 files changed, 144 insertions(+), 44 deletions(-) diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc index 7b368065afe11..66889e71d0ab4 100644 --- a/docs/reference/modules/discovery/fault-detection.asciidoc +++ b/docs/reference/modules/discovery/fault-detection.asciidoc @@ -373,3 +373,39 @@ checks are `transport_worker` and `cluster_coordination` threads, for which there should never be a long wait. There may also be evidence of long waits for threads in the {es} logs. Refer to <> for more information. + +===== Diagnosing `ShardLockObtainFailedException` failures + +If a node leaves and rejoins the cluster then {es} will usually shut down and +re-initialize its shards. If the shards do not shut down quickly enough then +{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`. + +To gather more information about the reason for shards shutting down slowly, +configure the following logger: + +[source,yaml] +---- +logger.org.elasticsearch.env.NodeEnvironment: DEBUG +---- + +When this logger is enabled, {es} will attempt to run the +<> API whenever it encounters a +`ShardLockObtainFailedException`. The results are compressed, encoded, and +split into chunks to avoid truncation: + +[source,text] +---- +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) +---- + +To reconstruct the output, base64-decode the data and decompress it using +`gzip`. For instance, on Unix-like systems: + +[source,sh] +---- +cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +---- diff --git a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java index 0fc043210bbda..1f71a290719bd 100644 --- a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java +++ b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java @@ -35,6 +35,7 @@ public enum ReferenceDocs { LAGGING_NODE_TROUBLESHOOTING, CONCURRENT_REPOSITORY_WRITERS, ARCHIVE_INDICES, + SHARD_LOCK_TROUBLESHOOTING, // this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner ; diff --git a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java index e4c42af85af47..f790424d1d8e0 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java +++ b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java @@ -8,6 +8,7 @@ package org.elasticsearch.env; +import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.util.Strings; @@ -25,8 +26,10 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.common.Randomness; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.io.FileSystemUtils; +import org.elasticsearch.common.logging.ChunkedLoggingStream; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; @@ -49,12 +52,15 @@ import org.elasticsearch.index.store.FsDirectoryFactory; import org.elasticsearch.monitor.fs.FsInfo; import org.elasticsearch.monitor.fs.FsProbe; +import org.elasticsearch.monitor.jvm.HotThreads; import org.elasticsearch.monitor.jvm.JvmInfo; import org.elasticsearch.xcontent.NamedXContentRegistry; import java.io.Closeable; import java.io.IOException; +import java.io.OutputStreamWriter; import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; import java.nio.file.AtomicMoveNotSupportedException; import java.nio.file.DirectoryStream; import java.nio.file.FileStore; @@ -926,6 +932,43 @@ public Set lockedShards() { } } + // throttle the hot-threads calls: no more than one per minute + private final Semaphore shardLockHotThreadsPermit = new Semaphore(1); + private long nextShardLockHotThreadsNanos = Long.MIN_VALUE; + + private void maybeLogThreadDump(ShardId shardId, String message) { + if (logger.isDebugEnabled() == false) { + return; + } + + final var prefix = format("hot threads while failing to obtain shard lock for %s: %s", shardId, message); + if (shardLockHotThreadsPermit.tryAcquire()) { + try { + final var now = System.nanoTime(); + if (now <= nextShardLockHotThreadsNanos) { + return; + } + nextShardLockHotThreadsNanos = now + TimeUnit.SECONDS.toNanos(60); + final var hotThreads = new HotThreads().busiestThreads(500).ignoreIdleThreads(false).detect(); + try ( + var stream = ChunkedLoggingStream.create( + logger, + Level.DEBUG, + prefix, + ReferenceDocs.UNSTABLE_CLUSTER_TROUBLESHOOTING + ); + var writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8) + ) { + writer.write(hotThreads); + } + } catch (Exception e) { + logger.error(format("could not obtain %s", prefix), e); + } finally { + shardLockHotThreadsPermit.release(); + } + } + } + private final class InternalShardLock { /* * This class holds a mutex for exclusive access and timeout / wait semantics @@ -975,18 +1018,15 @@ void acquire(long timeoutInMillis, final String details) throws ShardLockObtainF setDetails(details); } else { final Tuple lockDetails = this.lockDetails; // single volatile read - throw new ShardLockObtainFailedException( - shardId, - "obtaining shard lock for [" - + details - + "] timed out after [" - + timeoutInMillis - + "ms], lock already held for [" - + lockDetails.v2() - + "] with age [" - + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lockDetails.v1()) - + "ms]" + final var message = format( + "obtaining shard lock for [%s] timed out after [%dms], lock already held for [%s] with age [%dms]", + details, + timeoutInMillis, + lockDetails.v2(), + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lockDetails.v1()) ); + maybeLogThreadDump(shardId, message); + throw new ShardLockObtainFailedException(shardId, message); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json index a5a62f3da8b7b..901d7481f3462 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json @@ -4,5 +4,6 @@ "UNSTABLE_CLUSTER_TROUBLESHOOTING": "cluster-fault-detection.html#cluster-fault-detection-troubleshooting", "LAGGING_NODE_TROUBLESHOOTING": "cluster-fault-detection.html#_diagnosing_lagging_nodes", "CONCURRENT_REPOSITORY_WRITERS": "add-repository.html", - "ARCHIVE_INDICES": "archive-indices.html" + "ARCHIVE_INDICES": "archive-indices.html", + "SHARD_LOCK_TROUBLESHOOTING": "index.html" } diff --git a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java index 33f7f8a3d24d0..6451c76e99e9e 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java +++ b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java @@ -7,6 +7,7 @@ */ package org.elasticsearch.env; +import org.apache.logging.log4j.Level; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; @@ -34,7 +35,9 @@ import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.MockLogAppender; import org.elasticsearch.test.NodeRoles; +import org.elasticsearch.test.junit.annotations.TestLogging; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -114,44 +117,63 @@ public void testSegmentInfosTracing() { } } + // using a literal string here because the logger is mentioned in the docs, and therefore must only be changed with care + private static final String NODE_ENVIRONMENT_LOGGER_NAME = "org.elasticsearch.env.NodeEnvironment"; + + @TestLogging(reason = "test includes assertions about DEBUG logging", value = NODE_ENVIRONMENT_LOGGER_NAME + ":DEBUG") public void testShardLock() throws Exception { - final NodeEnvironment env = newNodeEnvironment(); + try (var env = newNodeEnvironment()) { - Index index = new Index("foo", "fooUUID"); - ShardLock fooLock = env.shardLock(new ShardId(index, 0), "1"); - assertEquals(new ShardId(index, 0), fooLock.getShardId()); + Index index = new Index("foo", "fooUUID"); - try { - env.shardLock(new ShardId(index, 0), "2"); - fail("shard is locked"); - } catch (ShardLockObtainFailedException ex) { - // expected - } - for (Path path : env.indexPaths(index)) { - Files.createDirectories(path.resolve("0")); - Files.createDirectories(path.resolve("1")); - } - try { - env.lockAllForIndex(index, idxSettings, "3", randomIntBetween(0, 10)); - fail("shard 0 is locked"); - } catch (ShardLockObtainFailedException ex) { - // expected - } + var appender = new MockLogAppender(); + appender.addExpectation( + new MockLogAppender.SeenEventExpectation( + "hot threads logging", + NODE_ENVIRONMENT_LOGGER_NAME, + Level.DEBUG, + "hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [2] timed out after *" + ) + ); + appender.addExpectation( + new MockLogAppender.UnseenEventExpectation( + "second attempt should be suppressed due to throttling", + NODE_ENVIRONMENT_LOGGER_NAME, + Level.DEBUG, + "hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [3] timed out after *" + ) + ); - fooLock.close(); - // can lock again? - env.shardLock(new ShardId(index, 0), "4").close(); + try (var ignored = appender.capturing(NodeEnvironment.class); var lock = env.shardLock(new ShardId(index, 0), "1")) { + assertEquals(new ShardId(index, 0), lock.getShardId()); - List locks = env.lockAllForIndex(index, idxSettings, "5", randomIntBetween(0, 10)); - try { - env.shardLock(new ShardId(index, 0), "6"); - fail("shard is locked"); - } catch (ShardLockObtainFailedException ex) { - // expected + expectThrows(ShardLockObtainFailedException.class, () -> env.shardLock(new ShardId(index, 0), "2")); + + for (Path path : env.indexPaths(index)) { + Files.createDirectories(path.resolve("0")); + Files.createDirectories(path.resolve("1")); + } + expectThrows( + ShardLockObtainFailedException.class, + () -> env.lockAllForIndex(index, idxSettings, "3", randomIntBetween(0, 10)) + ); + + appender.assertAllExpectationsMatched(); + } + + // can lock again? + env.shardLock(new ShardId(index, 0), "4").close(); + + List locks = new ArrayList<>(); + try { + locks.addAll(env.lockAllForIndex(index, idxSettings, "5", randomIntBetween(0, 10))); + expectThrows(ShardLockObtainFailedException.class, () -> env.shardLock(new ShardId(index, 0), "6")); + } finally { + IOUtils.close(locks); + } + + assertTrue("LockedShards: " + env.lockedShards(), env.lockedShards().isEmpty()); } - IOUtils.close(locks); - assertTrue("LockedShards: " + env.lockedShards(), env.lockedShards().isEmpty()); - env.close(); } public void testAvailableIndexFolders() throws Exception { From a261806346e6a9d182e4df4527193b54c3b6f2da Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Feb 2023 13:28:43 +0000 Subject: [PATCH 2/3] Correct ReferenceDocs link --- .../main/java/org/elasticsearch/env/NodeEnvironment.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java index f790424d1d8e0..c21f2c172795b 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java +++ b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java @@ -951,12 +951,7 @@ private void maybeLogThreadDump(ShardId shardId, String message) { nextShardLockHotThreadsNanos = now + TimeUnit.SECONDS.toNanos(60); final var hotThreads = new HotThreads().busiestThreads(500).ignoreIdleThreads(false).detect(); try ( - var stream = ChunkedLoggingStream.create( - logger, - Level.DEBUG, - prefix, - ReferenceDocs.UNSTABLE_CLUSTER_TROUBLESHOOTING - ); + var stream = ChunkedLoggingStream.create(logger, Level.DEBUG, prefix, ReferenceDocs.SHARD_LOCK_TROUBLESHOOTING); var writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8) ) { writer.write(hotThreads); From d5c324bce453a59468e3434eaf10097d0f556104 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Feb 2023 13:54:27 +0000 Subject: [PATCH 3/3] Fix link to docs --- .../src/main/java/org/elasticsearch/common/ReferenceDocs.java | 2 +- .../org/elasticsearch/common/reference-docs-links.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java index 1f71a290719bd..856ba30d6c4a5 100644 --- a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java +++ b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java @@ -33,9 +33,9 @@ public enum ReferenceDocs { DISCOVERY_TROUBLESHOOTING, UNSTABLE_CLUSTER_TROUBLESHOOTING, LAGGING_NODE_TROUBLESHOOTING, + SHARD_LOCK_TROUBLESHOOTING, CONCURRENT_REPOSITORY_WRITERS, ARCHIVE_INDICES, - SHARD_LOCK_TROUBLESHOOTING, // this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner ; diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json index 901d7481f3462..cac82799c50a9 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json @@ -3,7 +3,7 @@ "DISCOVERY_TROUBLESHOOTING": "discovery-troubleshooting.html", "UNSTABLE_CLUSTER_TROUBLESHOOTING": "cluster-fault-detection.html#cluster-fault-detection-troubleshooting", "LAGGING_NODE_TROUBLESHOOTING": "cluster-fault-detection.html#_diagnosing_lagging_nodes", + "SHARD_LOCK_TROUBLESHOOTING": "cluster-fault-detection.html#_diagnosing_shardlockobtainfailedexception_failures", "CONCURRENT_REPOSITORY_WRITERS": "add-repository.html", - "ARCHIVE_INDICES": "archive-indices.html", - "SHARD_LOCK_TROUBLESHOOTING": "index.html" + "ARCHIVE_INDICES": "archive-indices.html" }