diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/HotThreadsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/HotThreadsIT.java index 05e3b81c3683f..45906abd29ff8 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/HotThreadsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/HotThreadsIT.java @@ -7,14 +7,18 @@ */ package org.elasticsearch.action.admin; +import org.apache.logging.log4j.Level; import org.apache.lucene.util.Constants; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.cluster.node.hotthreads.NodeHotThreads; import org.elasticsearch.action.admin.cluster.node.hotthreads.NodesHotThreadsRequestBuilder; import org.elasticsearch.action.admin.cluster.node.hotthreads.NodesHotThreadsResponse; +import org.elasticsearch.common.ReferenceDocs; +import org.elasticsearch.common.logging.ChunkedLoggingStreamTests; import org.elasticsearch.core.TimeValue; import org.elasticsearch.monitor.jvm.HotThreads; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.junit.annotations.TestLogging; import org.hamcrest.Matcher; import java.util.Map; @@ -29,6 +33,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.not; @@ -176,4 +181,25 @@ public void testTimestampAndParams() throws ExecutionException, InterruptedExcep } } } + + @TestLogging(reason = "testing logging at various levels", value = "org.elasticsearch.action.admin.HotThreadsIT:TRACE") + public void testLogLocalHotThreads() { + final var level = randomFrom(Level.TRACE, Level.DEBUG, Level.INFO, Level.WARN, Level.ERROR); + assertThat( + ChunkedLoggingStreamTests.getDecodedLoggedBody( + logger, + level, + getTestName(), + ReferenceDocs.LOGGING, + () -> HotThreads.logLocalHotThreads(logger, level, getTestName(), ReferenceDocs.LOGGING) + ).utf8ToString(), + allOf( + containsString("Hot threads at"), + containsString("interval=500ms"), + containsString("busiestThreads=500"), + containsString("ignoreIdleThreads=false"), + containsString("cpu usage by thread") + ) + ); + } } diff --git a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java index 0380bb80e0013..1d8a9ef1ce1c4 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java +++ b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java @@ -30,7 +30,6 @@ import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.io.FileSystemUtils; -import org.elasticsearch.common.logging.ChunkedLoggingStream; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; @@ -61,9 +60,7 @@ import java.io.Closeable; import java.io.IOException; -import java.io.OutputStreamWriter; import java.io.UncheckedIOException; -import java.nio.charset.StandardCharsets; import java.nio.file.AtomicMoveNotSupportedException; import java.nio.file.DirectoryStream; import java.nio.file.FileStore; @@ -956,13 +953,7 @@ private void maybeLogThreadDump(ShardId shardId, String message) { return; } nextShardLockHotThreadsNanos = now + TimeUnit.SECONDS.toNanos(60); - final var hotThreads = new HotThreads().busiestThreads(500).ignoreIdleThreads(false).detect(); - try ( - var stream = ChunkedLoggingStream.create(logger, Level.DEBUG, prefix, ReferenceDocs.SHARD_LOCK_TROUBLESHOOTING); - var writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8) - ) { - writer.write(hotThreads); - } + HotThreads.logLocalHotThreads(logger, Level.DEBUG, prefix, ReferenceDocs.SHARD_LOCK_TROUBLESHOOTING); } catch (Exception e) { logger.error(format("could not obtain %s", prefix), e); } finally { diff --git a/server/src/main/java/org/elasticsearch/monitor/jvm/HotThreads.java b/server/src/main/java/org/elasticsearch/monitor/jvm/HotThreads.java index e02a3a40b77ef..3966c058aac88 100644 --- a/server/src/main/java/org/elasticsearch/monitor/jvm/HotThreads.java +++ b/server/src/main/java/org/elasticsearch/monitor/jvm/HotThreads.java @@ -8,18 +8,25 @@ package org.elasticsearch.monitor.jvm; +import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.ReferenceDocs; +import org.elasticsearch.common.logging.ChunkedLoggingStream; import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.Maps; import org.elasticsearch.core.TimeValue; import org.elasticsearch.transport.Transports; +import java.io.IOException; +import java.io.OutputStreamWriter; import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; import java.lang.management.ThreadMXBean; +import java.nio.charset.StandardCharsets; import java.time.Clock; import java.time.LocalDateTime; import java.util.ArrayList; @@ -68,6 +75,39 @@ public class HotThreads { "DestroyJavaVM" ); + /** + * Capture and log the hot threads on the local node. Useful for capturing stack traces for unexpectedly-slow operations in production. + * The resulting log message may be large, and contains significant whitespace, so it is compressed and base64-encoded using {@link + * ChunkedLoggingStream}. + * + * @param logger The logger to use for the logging + * @param level The log level to use for the logging. + * @param prefix The prefix to emit on each chunk of the logging. + * @param referenceDocs A link to the docs describing how to decode the logging. + */ + public static void logLocalHotThreads(Logger logger, Level level, String prefix, ReferenceDocs referenceDocs) { + if (logger.isEnabled(level) == false) { + return; + } + + final String hotThreads; + try { + hotThreads = new HotThreads().busiestThreads(500).ignoreIdleThreads(false).detect(); + } catch (Exception e) { + logger.error(() -> org.elasticsearch.common.Strings.format("failed to detect local hot threads with prefix [%s]", prefix), e); + return; + } + + try ( + var stream = ChunkedLoggingStream.create(logger, level, prefix, referenceDocs); + var writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8) + ) { + writer.write(hotThreads); + } catch (IOException e) { + logger.error(() -> org.elasticsearch.common.Strings.format("failed to write local hot threads with prefix [%s]", prefix), e); + } + } + public enum ReportType { CPU("cpu"),