Skip to content

Commit 2f579cf

Browse files
Mattjbachorik
authored andcommitted
Automatically register crashtracking via native extensions (#8851)
Co-authored-by: Jaroslav Bachorik <[email protected]>
1 parent 89bba0d commit 2f579cf

File tree

50 files changed

+1062
-438
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1062
-438
lines changed

dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java

Lines changed: 123 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static datadog.environment.JavaVirtualMachine.isJavaVersionAtLeast;
44
import static datadog.environment.JavaVirtualMachine.isOracleJDK8;
55
import static datadog.trace.api.ConfigDefaults.DEFAULT_STARTUP_LOGS_ENABLED;
6+
import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
67
import static datadog.trace.bootstrap.Library.WILDFLY;
78
import static datadog.trace.bootstrap.Library.detectLibraries;
89
import static datadog.trace.util.AgentThreadFactory.AgentThread.JMX_STARTUP;
@@ -22,6 +23,7 @@
2223
import datadog.trace.api.appsec.AppSecEventTracker;
2324
import datadog.trace.api.config.AppSecConfig;
2425
import datadog.trace.api.config.CiVisibilityConfig;
26+
import datadog.trace.api.config.CrashTrackingConfig;
2527
import datadog.trace.api.config.CwsConfig;
2628
import datadog.trace.api.config.DebuggerConfig;
2729
import datadog.trace.api.config.GeneralConfig;
@@ -57,6 +59,7 @@
5759
import java.net.URISyntaxException;
5860
import java.net.URL;
5961
import java.security.CodeSource;
62+
import java.util.Arrays;
6063
import java.util.EnumSet;
6164
import java.util.concurrent.TimeUnit;
6265
import java.util.concurrent.atomic.AtomicBoolean;
@@ -98,6 +101,9 @@ private enum AgentFeature {
98101
TRACING(TraceInstrumentationConfig.TRACE_ENABLED, true),
99102
JMXFETCH(JmxFetchConfig.JMX_FETCH_ENABLED, true),
100103
STARTUP_LOGS(GeneralConfig.STARTUP_LOGS_ENABLED, DEFAULT_STARTUP_LOGS_ENABLED),
104+
CRASH_TRACKING(
105+
CrashTrackingConfig.CRASH_TRACKING_ENABLED,
106+
CrashTrackingConfig.CRASH_TRACKING_ENABLED_DEFAULT),
101107
PROFILING(ProfilingConfig.PROFILING_ENABLED, false),
102108
APPSEC(AppSecConfig.APPSEC_ENABLED, false),
103109
IAST(IastConfig.IAST_ENABLED, false),
@@ -151,9 +157,11 @@ public boolean isEnabledByDefault() {
151157
private static ClassLoader AGENT_CLASSLOADER = null;
152158

153159
private static volatile Runnable PROFILER_INIT_AFTER_JMX = null;
160+
private static volatile Runnable CRASHTRACKER_INIT_AFTER_JMX = null;
154161

155162
private static boolean jmxFetchEnabled = true;
156163
private static boolean profilingEnabled = false;
164+
private static boolean crashTrackingEnabled = false;
157165
private static boolean appSecEnabled;
158166
private static boolean appSecFullyDisabled;
159167
private static boolean remoteConfigEnabled = true;
@@ -283,6 +291,7 @@ public static void start(
283291

284292
jmxFetchEnabled = isFeatureEnabled(AgentFeature.JMXFETCH);
285293
profilingEnabled = isFeatureEnabled(AgentFeature.PROFILING);
294+
crashTrackingEnabled = isFeatureEnabled(AgentFeature.CRASH_TRACKING);
286295
usmEnabled = isFeatureEnabled(AgentFeature.USM);
287296
appSecEnabled = isFeatureEnabled(AgentFeature.APPSEC);
288297
appSecFullyDisabled = isFullyDisabled(AgentFeature.APPSEC);
@@ -329,13 +338,7 @@ public static void start(
329338
// Profiling can not run early on Oracle JDK 8 because it will cause JFR initialization
330339
// deadlock.
331340
// Oracle JDK 8 JFR controller requires JMX so register an 'after-jmx-initialized' callback.
332-
PROFILER_INIT_AFTER_JMX =
333-
new Runnable() {
334-
@Override
335-
public void run() {
336-
startProfilingAgent(false, inst);
337-
}
338-
};
341+
PROFILER_INIT_AFTER_JMX = () -> startProfilingAgent(false, inst);
339342
}
340343
}
341344

@@ -348,6 +351,14 @@ public void run() {
348351
* when it will happen after the class transformers were added.
349352
*/
350353
AgentTaskScheduler.initialize();
354+
355+
// We need to run the crashtracking initialization after all the config has been resolved and
356+
// task scheduler initialized
357+
if (crashTrackingEnabled) {
358+
StaticEventLogger.begin("crashtracking");
359+
startCrashTracking();
360+
StaticEventLogger.end("crashtracking");
361+
}
351362
startDatadogAgent(initTelemetry, inst);
352363

353364
final EnumSet<Library> libraries = detectLibraries(log);
@@ -742,6 +753,29 @@ private static synchronized void installDatadogTracer(
742753
StaticEventLogger.end("GlobalTracer");
743754
}
744755

756+
private static void startCrashTracking() {
757+
if (isJavaVersionAtLeast(9)) {
758+
// it is safe to initialize crashtracking early
759+
// since it can take 100ms+ to initialize the native library we will defer the initialization
760+
// ... unless we request early start with the debug config flag
761+
boolean forceEarlyStart = CrashTrackingConfig.CRASH_TRACKING_START_EARLY_DEFAULT;
762+
String forceEarlyStartStr =
763+
ddGetProperty("dd." + CrashTrackingConfig.CRASH_TRACKING_START_EARLY);
764+
if (forceEarlyStartStr != null) {
765+
forceEarlyStart = Boolean.parseBoolean(forceEarlyStartStr);
766+
}
767+
if (forceEarlyStart) {
768+
initializeCrashTrackingDefault();
769+
} else {
770+
AgentTaskScheduler.INSTANCE.execute(Agent::initializeCrashTrackingDefault);
771+
}
772+
} else {
773+
// for Java 8 we are relying on JMX to give us the process PID
774+
// we need to delay the crash tracking initialization until JMX is available
775+
CRASHTRACKER_INIT_AFTER_JMX = Agent::initializeDelayedCrashTracking;
776+
}
777+
}
778+
745779
private static void scheduleJmxStart(final int jmxStartDelay) {
746780
if (jmxStartDelay > 0) {
747781
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
@@ -765,25 +799,41 @@ private static synchronized void startJmx() {
765799
if (jmxStarting.getAndSet(true)) {
766800
return; // another thread is already in startJmx
767801
}
768-
// error tracking initialization relies on JMX being available
769-
initializeErrorTracking();
770802
if (jmxFetchEnabled) {
771803
startJmxFetch();
772804
}
773805
initializeJmxSystemAccessProvider(AGENT_CLASSLOADER);
806+
if (crashTrackingEnabled && CRASHTRACKER_INIT_AFTER_JMX != null) {
807+
try {
808+
CRASHTRACKER_INIT_AFTER_JMX.run();
809+
} finally {
810+
CRASHTRACKER_INIT_AFTER_JMX = null;
811+
}
812+
}
774813
if (profilingEnabled) {
775814
registerDeadlockDetectionEvent();
776815
registerSmapEntryEvent();
777816
if (PROFILER_INIT_AFTER_JMX != null) {
778-
if (getJmxStartDelay() == 0) {
779-
log.debug("Waiting for profiler initialization");
780-
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
781-
PROFILER_INIT_AFTER_JMX, 500, TimeUnit.MILLISECONDS);
782-
} else {
783-
log.debug("Initializing profiler");
784-
PROFILER_INIT_AFTER_JMX.run();
817+
try {
818+
/*
819+
When getJmxStartDelay() is set to 0 we will attempt to initialize the JMX subsystem as soon as available.
820+
But, this can cause issues with JFR as it needs some 'grace period' after JMX is ready. That's why we are
821+
re-scheduling the profiler initialization code just a tad later.
822+
823+
If the jmx start delay is set, we are already delayed relative to the jmx init so we can just plainly
824+
run the initialization code.
825+
*/
826+
if (getJmxStartDelay() == 0) {
827+
log.debug("Waiting for profiler initialization");
828+
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
829+
PROFILER_INIT_AFTER_JMX, 500, TimeUnit.MILLISECONDS);
830+
} else {
831+
log.debug("Initializing profiler");
832+
PROFILER_INIT_AFTER_JMX.run();
833+
}
834+
} finally {
835+
PROFILER_INIT_AFTER_JMX = null;
785836
}
786-
PROFILER_INIT_AFTER_JMX = null;
787837
}
788838
}
789839
}
@@ -1043,16 +1093,63 @@ private static void stopTelemetry() {
10431093
}
10441094
}
10451095

1046-
private static void initializeErrorTracking() {
1096+
private static void initializeDelayedCrashTracking() {
1097+
initializeCrashTracking(true, isCrashTrackingAutoconfigEnabled());
1098+
}
1099+
1100+
private static void initializeDelayedCrashTrackingOnlyJmx() {
1101+
initializeCrashTracking(true, false);
1102+
}
1103+
1104+
private static void initializeCrashTrackingDefault() {
1105+
initializeCrashTracking(false, isCrashTrackingAutoconfigEnabled());
1106+
}
1107+
1108+
private static boolean isCrashTrackingAutoconfigEnabled() {
1109+
String enabledVal = ddGetProperty("dd." + CrashTrackingConfig.CRASH_TRACKING_ENABLE_AUTOCONFIG);
1110+
boolean enabled = CrashTrackingConfig.CRASH_TRACKING_ENABLE_AUTOCONFIG_DEFAULT;
1111+
if (enabledVal != null) {
1112+
enabled = Boolean.parseBoolean(enabledVal);
1113+
} else {
1114+
// If the property is not set, then we check if profiling is enabled
1115+
enabled = profilingEnabled;
1116+
}
1117+
return enabled;
1118+
}
1119+
1120+
private static void initializeCrashTracking(boolean delayed, boolean checkNative) {
10471121
if (JavaVirtualMachine.isJ9()) {
10481122
// TODO currently crash tracking is supported only for HotSpot based JVMs
10491123
return;
10501124
}
1125+
log.debug("Initializing crashtracking");
10511126
try {
1052-
Class<?> clz = AGENT_CLASSLOADER.loadClass("com.datadog.crashtracking.ScriptInitializer");
1053-
clz.getMethod("initialize").invoke(null);
1127+
Class<?> clz = AGENT_CLASSLOADER.loadClass("datadog.crashtracking.Initializer");
1128+
// first try to use the JVMAccess using the native library; unless `checkNative` is false
1129+
Boolean rslt =
1130+
checkNative && (Boolean) clz.getMethod("initialize", boolean.class).invoke(null, false);
1131+
if (!rslt) {
1132+
if (delayed) {
1133+
// already delayed initialization, so no need to reschedule it again
1134+
// just call initialize and force JMX
1135+
rslt = (Boolean) clz.getMethod("initialize", boolean.class).invoke(null, true);
1136+
} else {
1137+
// delayed initialization, so we need to reschedule it and mark as delayed but do not
1138+
// re-check the native library
1139+
CRASHTRACKER_INIT_AFTER_JMX = Agent::initializeDelayedCrashTrackingOnlyJmx;
1140+
rslt = null; // we will initialize it later
1141+
}
1142+
}
1143+
if (rslt == null) {
1144+
log.debug("Crashtracking initialization delayed until JMX is available");
1145+
} else if (rslt) {
1146+
log.debug("Crashtracking initialized");
1147+
} else {
1148+
log.debug(
1149+
SEND_TELEMETRY, "Crashtracking failed to initialize. No additional details available.");
1150+
}
10541151
} catch (Throwable t) {
1055-
log.debug("Unable to initialize crash uploader", t);
1152+
log.debug(SEND_TELEMETRY, "Unable to initialize crashtracking", t);
10561153
}
10571154
}
10581155

@@ -1151,8 +1248,11 @@ public void withTracer(TracerAPI tracer) {
11511248
}
11521249
});
11531250
}
1154-
} catch (final Throwable ex) {
1155-
log.error("Throwable thrown while starting profiling agent", ex);
1251+
} catch (final Throwable t) {
1252+
log.error(
1253+
SEND_TELEMETRY,
1254+
"Throwable thrown while starting profiling agent "
1255+
+ Arrays.toString(t.getCause().getStackTrace()));
11561256
} finally {
11571257
Thread.currentThread().setContextClassLoader(contextLoader);
11581258
}

dd-java-agent/agent-crashtracking/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies {
1616
implementation project(':internal-api')
1717
implementation project(':utils:container-utils')
1818
implementation project(':utils:version-utils')
19+
implementation project(path: ':dd-java-agent:ddprof-lib', configuration: 'shadow')
1920

2021
implementation libs.okhttp
2122
implementation libs.moshi

0 commit comments

Comments
 (0)