diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc index 7b368065afe11..66889e71d0ab4 100644 --- a/docs/reference/modules/discovery/fault-detection.asciidoc +++ b/docs/reference/modules/discovery/fault-detection.asciidoc @@ -373,3 +373,39 @@ checks are `transport_worker` and `cluster_coordination` threads, for which there should never be a long wait. There may also be evidence of long waits for threads in the {es} logs. Refer to <> for more information. + +===== Diagnosing `ShardLockObtainFailedException` failures + +If a node leaves and rejoins the cluster then {es} will usually shut down and +re-initialize its shards. If the shards do not shut down quickly enough then +{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`. + +To gather more information about the reason for shards shutting down slowly, +configure the following logger: + +[source,yaml] +---- +logger.org.elasticsearch.env.NodeEnvironment: DEBUG +---- + +When this logger is enabled, {es} will attempt to run the +<> API whenever it encounters a +`ShardLockObtainFailedException`. The results are compressed, encoded, and +split into chunks to avoid truncation: + +[source,text] +---- +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) +---- + +To reconstruct the output, base64-decode the data and decompress it using +`gzip`. For instance, on Unix-like systems: + +[source,sh] +---- +cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +---- diff --git a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java index 0fc043210bbda..856ba30d6c4a5 100644 --- a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java +++ b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java @@ -33,6 +33,7 @@ public enum ReferenceDocs { DISCOVERY_TROUBLESHOOTING, UNSTABLE_CLUSTER_TROUBLESHOOTING, LAGGING_NODE_TROUBLESHOOTING, + SHARD_LOCK_TROUBLESHOOTING, CONCURRENT_REPOSITORY_WRITERS, ARCHIVE_INDICES, // this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner diff --git a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java index e4c42af85af47..c21f2c172795b 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java +++ b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java @@ -8,6 +8,7 @@ package org.elasticsearch.env; +import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.util.Strings; @@ -25,8 +26,10 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.common.Randomness; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.io.FileSystemUtils; +import org.elasticsearch.common.logging.ChunkedLoggingStream; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; @@ -49,12 +52,15 @@ import org.elasticsearch.index.store.FsDirectoryFactory; import org.elasticsearch.monitor.fs.FsInfo; import org.elasticsearch.monitor.fs.FsProbe; +import org.elasticsearch.monitor.jvm.HotThreads; import org.elasticsearch.monitor.jvm.JvmInfo; import org.elasticsearch.xcontent.NamedXContentRegistry; import java.io.Closeable; import java.io.IOException; +import java.io.OutputStreamWriter; import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; import java.nio.file.AtomicMoveNotSupportedException; import java.nio.file.DirectoryStream; import java.nio.file.FileStore; @@ -926,6 +932,38 @@ public Set lockedShards() { } } + // throttle the hot-threads calls: no more than one per minute + private final Semaphore shardLockHotThreadsPermit = new Semaphore(1); + private long nextShardLockHotThreadsNanos = Long.MIN_VALUE; + + private void maybeLogThreadDump(ShardId shardId, String message) { + if (logger.isDebugEnabled() == false) { + return; + } + + final var prefix = format("hot threads while failing to obtain shard lock for %s: %s", shardId, message); + if (shardLockHotThreadsPermit.tryAcquire()) { + try { + final var now = System.nanoTime(); + if (now <= nextShardLockHotThreadsNanos) { + return; + } + nextShardLockHotThreadsNanos = now + TimeUnit.SECONDS.toNanos(60); + final var hotThreads = new HotThreads().busiestThreads(500).ignoreIdleThreads(false).detect(); + try ( + var stream = ChunkedLoggingStream.create(logger, Level.DEBUG, prefix, ReferenceDocs.SHARD_LOCK_TROUBLESHOOTING); + var writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8) + ) { + writer.write(hotThreads); + } + } catch (Exception e) { + logger.error(format("could not obtain %s", prefix), e); + } finally { + shardLockHotThreadsPermit.release(); + } + } + } + private final class InternalShardLock { /* * This class holds a mutex for exclusive access and timeout / wait semantics @@ -975,18 +1013,15 @@ void acquire(long timeoutInMillis, final String details) throws ShardLockObtainF setDetails(details); } else { final Tuple lockDetails = this.lockDetails; // single volatile read - throw new ShardLockObtainFailedException( - shardId, - "obtaining shard lock for [" - + details - + "] timed out after [" - + timeoutInMillis - + "ms], lock already held for [" - + lockDetails.v2() - + "] with age [" - + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lockDetails.v1()) - + "ms]" + final var message = format( + "obtaining shard lock for [%s] timed out after [%dms], lock already held for [%s] with age [%dms]", + details, + timeoutInMillis, + lockDetails.v2(), + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lockDetails.v1()) ); + maybeLogThreadDump(shardId, message); + throw new ShardLockObtainFailedException(shardId, message); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json index a5a62f3da8b7b..cac82799c50a9 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json @@ -3,6 +3,7 @@ "DISCOVERY_TROUBLESHOOTING": "discovery-troubleshooting.html", "UNSTABLE_CLUSTER_TROUBLESHOOTING": "cluster-fault-detection.html#cluster-fault-detection-troubleshooting", "LAGGING_NODE_TROUBLESHOOTING": "cluster-fault-detection.html#_diagnosing_lagging_nodes", + "SHARD_LOCK_TROUBLESHOOTING": "cluster-fault-detection.html#_diagnosing_shardlockobtainfailedexception_failures", "CONCURRENT_REPOSITORY_WRITERS": "add-repository.html", "ARCHIVE_INDICES": "archive-indices.html" } diff --git a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java index 33f7f8a3d24d0..6451c76e99e9e 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java +++ b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java @@ -7,6 +7,7 @@ */ package org.elasticsearch.env; +import org.apache.logging.log4j.Level; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; @@ -34,7 +35,9 @@ import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.MockLogAppender; import org.elasticsearch.test.NodeRoles; +import org.elasticsearch.test.junit.annotations.TestLogging; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -114,44 +117,63 @@ public void testSegmentInfosTracing() { } } + // using a literal string here because the logger is mentioned in the docs, and therefore must only be changed with care + private static final String NODE_ENVIRONMENT_LOGGER_NAME = "org.elasticsearch.env.NodeEnvironment"; + + @TestLogging(reason = "test includes assertions about DEBUG logging", value = NODE_ENVIRONMENT_LOGGER_NAME + ":DEBUG") public void testShardLock() throws Exception { - final NodeEnvironment env = newNodeEnvironment(); + try (var env = newNodeEnvironment()) { - Index index = new Index("foo", "fooUUID"); - ShardLock fooLock = env.shardLock(new ShardId(index, 0), "1"); - assertEquals(new ShardId(index, 0), fooLock.getShardId()); + Index index = new Index("foo", "fooUUID"); - try { - env.shardLock(new ShardId(index, 0), "2"); - fail("shard is locked"); - } catch (ShardLockObtainFailedException ex) { - // expected - } - for (Path path : env.indexPaths(index)) { - Files.createDirectories(path.resolve("0")); - Files.createDirectories(path.resolve("1")); - } - try { - env.lockAllForIndex(index, idxSettings, "3", randomIntBetween(0, 10)); - fail("shard 0 is locked"); - } catch (ShardLockObtainFailedException ex) { - // expected - } + var appender = new MockLogAppender(); + appender.addExpectation( + new MockLogAppender.SeenEventExpectation( + "hot threads logging", + NODE_ENVIRONMENT_LOGGER_NAME, + Level.DEBUG, + "hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [2] timed out after *" + ) + ); + appender.addExpectation( + new MockLogAppender.UnseenEventExpectation( + "second attempt should be suppressed due to throttling", + NODE_ENVIRONMENT_LOGGER_NAME, + Level.DEBUG, + "hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [3] timed out after *" + ) + ); - fooLock.close(); - // can lock again? - env.shardLock(new ShardId(index, 0), "4").close(); + try (var ignored = appender.capturing(NodeEnvironment.class); var lock = env.shardLock(new ShardId(index, 0), "1")) { + assertEquals(new ShardId(index, 0), lock.getShardId()); - List locks = env.lockAllForIndex(index, idxSettings, "5", randomIntBetween(0, 10)); - try { - env.shardLock(new ShardId(index, 0), "6"); - fail("shard is locked"); - } catch (ShardLockObtainFailedException ex) { - // expected + expectThrows(ShardLockObtainFailedException.class, () -> env.shardLock(new ShardId(index, 0), "2")); + + for (Path path : env.indexPaths(index)) { + Files.createDirectories(path.resolve("0")); + Files.createDirectories(path.resolve("1")); + } + expectThrows( + ShardLockObtainFailedException.class, + () -> env.lockAllForIndex(index, idxSettings, "3", randomIntBetween(0, 10)) + ); + + appender.assertAllExpectationsMatched(); + } + + // can lock again? + env.shardLock(new ShardId(index, 0), "4").close(); + + List locks = new ArrayList<>(); + try { + locks.addAll(env.lockAllForIndex(index, idxSettings, "5", randomIntBetween(0, 10))); + expectThrows(ShardLockObtainFailedException.class, () -> env.shardLock(new ShardId(index, 0), "6")); + } finally { + IOUtils.close(locks); + } + + assertTrue("LockedShards: " + env.lockedShards(), env.lockedShards().isEmpty()); } - IOUtils.close(locks); - assertTrue("LockedShards: " + env.lockedShards(), env.lockedShards().isEmpty()); - env.close(); } public void testAvailableIndexFolders() throws Exception {