Skip to content

Commit

Permalink
Merge branch 'master' into datadog/trace-agent-sampling-feedback-opti…
Browse files Browse the repository at this point in the history
…mization
  • Loading branch information
dougqh authored Jan 27, 2025
2 parents 7d94941 + c58164b commit 7341990
Show file tree
Hide file tree
Showing 31 changed files with 199 additions and 129 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import datadog.remoteconfig.DefaultConfigurationPoller;
import datadog.trace.api.Config;
import datadog.trace.util.AgentTaskScheduler;
import java.security.Security;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -59,6 +60,7 @@ public void createRemaining(Config config) {
}
}

/** Registers a callback to be called when remote communications resume. */
public void whenReady(Runnable callback) {
if (paused) {
synchronized (pausedComponents) {
Expand All @@ -71,8 +73,15 @@ public void whenReady(Runnable callback) {
callback.run(); // not paused, run immediately
}

/** Resumes remote communications including any paused callbacks. */
public void resume() {
paused = false;
// attempt discovery first to avoid potential race condition on IBM Java8
if (null != featuresDiscovery) {
featuresDiscovery.discoverIfOutdated();
} else {
Security.getProviders(); // fallback to preloading provider extensions
}
synchronized (pausedComponents) {
for (Runnable callback : pausedComponents) {
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package datadog.trace.bootstrap;

import static datadog.trace.api.ConfigDefaults.DEFAULT_STARTUP_LOGS_ENABLED;
import static datadog.trace.api.Platform.getRuntimeVendor;
import static datadog.trace.api.Platform.isJavaVersionAtLeast;
import static datadog.trace.api.Platform.isOracleJDK8;
import static datadog.trace.bootstrap.Library.WILDFLY;
Expand Down Expand Up @@ -329,7 +328,7 @@ public void run() {
if (appUsingCustomJMXBuilder) {
log.debug("Custom JMX builder detected. Delaying JMXFetch initialization.");
registerMBeanServerBuilderCallback(new StartJmxCallback(jmxStartDelay));
// one minute fail-safe in case nothing touches JMX and and callback isn't triggered
// one minute fail-safe in case nothing touches JMX and callback isn't triggered
scheduleJmxStart(60 + jmxStartDelay);
} else if (appUsingCustomLogManager) {
log.debug("Custom logger detected. Delaying JMXFetch initialization.");
Expand All @@ -339,20 +338,31 @@ public void run() {
}
}

boolean delayOkHttp = appUsingCustomLogManager && okHttpMayIndirectlyLoadJUL();

/*
* Similar thing happens with DatadogTracer on (at least) zulu-8 because it uses OkHttp which indirectly loads JFR
* events which in turn loads LogManager. This is not a problem on newer JDKs because there JFR uses different
* logging facility. Likewise on IBM JDKs OkHttp may indirectly load 'IBMSASL' which in turn loads LogManager.
*/
boolean delayOkHttp = !ciVisibilityEnabled && okHttpMayIndirectlyLoadJUL();
boolean waitForJUL = appUsingCustomLogManager && delayOkHttp;
int okHttpDelayMillis;
if (waitForJUL) {
okHttpDelayMillis = 1_000;
} else if (delayOkHttp) {
okHttpDelayMillis = 100;
} else {
okHttpDelayMillis = 0;
}

InstallDatadogTracerCallback installDatadogTracerCallback =
new InstallDatadogTracerCallback(initTelemetry, inst, delayOkHttp);
if (delayOkHttp) {
new InstallDatadogTracerCallback(initTelemetry, inst, okHttpDelayMillis);
if (waitForJUL) {
log.debug("Custom logger detected. Delaying Datadog Tracer initialization.");
registerLogManagerCallback(installDatadogTracerCallback);
} else if (okHttpDelayMillis > 0) {
installDatadogTracerCallback.run(); // complete on different thread (after premain)
} else {
installDatadogTracerCallback.execute();
installDatadogTracerCallback.execute(); // complete on primordial thread in premain
}

/*
Expand All @@ -362,7 +372,7 @@ public void run() {
if (profilingEnabled && !isOracleJDK8()) {
StaticEventLogger.begin("Profiling");

if (delayOkHttp) {
if (waitForJUL) {
log.debug("Custom logger detected. Delaying Profiling initialization.");
registerLogManagerCallback(new StartProfilingAgentCallback(inst));
} else {
Expand Down Expand Up @@ -499,18 +509,18 @@ protected static class InstallDatadogTracerCallback extends ClassLoadCallBack {
private final Instrumentation instrumentation;
private final Object sco;
private final Class<?> scoClass;
private final boolean delayOkHttp;
private final int okHttpDelayMillis;

public InstallDatadogTracerCallback(
InitializationTelemetry initTelemetry,
Instrumentation instrumentation,
boolean delayOkHttp) {
this.delayOkHttp = delayOkHttp;
int okHttpDelayMillis) {
this.okHttpDelayMillis = okHttpDelayMillis;
this.instrumentation = instrumentation;
try {
scoClass =
AGENT_CLASSLOADER.loadClass("datadog.communication.ddagent.SharedCommunicationObjects");
sco = scoClass.getConstructor(boolean.class).newInstance(delayOkHttp);
sco = scoClass.getConstructor(boolean.class).newInstance(okHttpDelayMillis > 0);
} catch (ClassNotFoundException
| NoSuchMethodException
| InstantiationException
Expand All @@ -530,7 +540,7 @@ public AgentThread agentThread() {

@Override
public void execute() {
if (delayOkHttp) {
if (okHttpDelayMillis > 0) {
resumeRemoteComponents();
}

Expand All @@ -550,7 +560,7 @@ private void resumeRemoteComponents() {
try {
// remote components were paused for custom log-manager/jmx-builder
// add small delay before resuming remote I/O to help stabilization
Thread.sleep(1_000);
Thread.sleep(okHttpDelayMillis);
scoClass.getMethod("resume").invoke(sco);
} catch (InterruptedException ignore) {
} catch (Throwable e) {
Expand Down Expand Up @@ -1339,15 +1349,25 @@ private static String ddGetEnv(final String sysProp) {
}

private static boolean okHttpMayIndirectlyLoadJUL() {
if ("IBM Corporation".equals(getRuntimeVendor())) {
return true; // IBM JDKs ship with 'IBMSASL' which will load JUL when OkHttp accesses TLS
if (isIBMSASLInstalled() || isACCPInstalled()) {
return true; // 'IBMSASL' and 'ACCP' crypto providers can load JUL when OkHttp accesses TLS
}
if (isJavaVersionAtLeast(9)) {
return false; // JDKs since 9 have reworked JFR to use a different logging facility, not JUL
}
return isJFRSupported(); // assume OkHttp will indirectly load JUL via its JFR events
}

private static boolean isIBMSASLInstalled() {
return ClassLoader.getSystemResource("com/ibm/security/sasl/IBMSASL.class") != null;
}

private static boolean isACCPInstalled() {
return ClassLoader.getSystemResource(
"com/amazon/corretto/crypto/provider/AmazonCorrettoCryptoProvider.class")
!= null;
}

private static boolean isJFRSupported() {
// FIXME: this is quite a hack because there maybe jfr classes on classpath somehow that have
// nothing to do with JDK - but this should be safe because only thing this does is to delay
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,12 @@ TestSuiteImpl testSuiteStart(
boolean isModified(TestSourceData testSourceData);

/**
* Checks if a given test should be skipped with Intelligent Test Runner or not
* Checks if a given test can be skipped with Intelligent Test Runner or not.
*
* @param test Test to be checked
* @return {@code true} if the test can be skipped, {@code false} otherwise
*/
boolean shouldBeSkipped(TestIdentifier test);

/**
* Checks if a given test can be skipped with Intelligent Test Runner or not. If the test is
* considered skippable, the count of skippable tests is incremented.
*
* @param test Test to be checked
* @return {@code true} if the test can be skipped, {@code false} otherwise
*/
boolean skip(TestIdentifier test);
boolean isSkippable(TestIdentifier test);

@Nonnull
TestRetryPolicy retryPolicy(TestIdentifier test, TestSourceData testSource);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import datadog.trace.civisibility.source.LinesResolver;
import datadog.trace.civisibility.source.SourcePathResolver;
import datadog.trace.civisibility.source.SourceResolutionException;
import datadog.trace.civisibility.test.ExecutionResults;
import java.lang.reflect.Method;
import java.util.Collection;
import java.util.Collections;
Expand All @@ -50,6 +51,7 @@ public class TestImpl implements DDTest {
private static final Logger log = LoggerFactory.getLogger(TestImpl.class);

private final CiVisibilityMetricCollector metricCollector;
private final ExecutionResults executionResults;
private final TestFrameworkInstrumentation instrumentation;
private final AgentSpan span;
private final DDTraceId sessionId;
Expand Down Expand Up @@ -78,11 +80,13 @@ public TestImpl(
LinesResolver linesResolver,
Codeowners codeowners,
CoverageStore.Factory coverageStoreFactory,
ExecutionResults executionResults,
Consumer<AgentSpan> onSpanFinish) {
this.instrumentation = instrumentation;
this.metricCollector = metricCollector;
this.sessionId = moduleSpanContext.getTraceId();
this.suiteId = suiteId;
this.executionResults = executionResults;
this.onSpanFinish = onSpanFinish;

this.identifier = new TestIdentifier(testSuiteName, testName, testParameters);
Expand Down Expand Up @@ -258,6 +262,10 @@ public void end(@Nullable Long endTime) {
span.finish();
}

if (InstrumentationBridge.ITR_SKIP_REASON.equals(span.getTag(Tags.TEST_SKIP_REASON))) {
executionResults.incrementTestsSkippedByItr();
}

metricCollector.add(
CiVisibilityCountMetric.EVENT_FINISHED,
1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import datadog.trace.civisibility.source.LinesResolver;
import datadog.trace.civisibility.source.SourcePathResolver;
import datadog.trace.civisibility.source.SourceResolutionException;
import datadog.trace.civisibility.test.ExecutionResults;
import datadog.trace.civisibility.utils.SpanUtils;
import java.lang.reflect.Method;
import java.util.Collection;
Expand Down Expand Up @@ -49,6 +50,7 @@ public class TestSuiteImpl implements DDTestSuite {
private final Codeowners codeowners;
private final LinesResolver linesResolver;
private final CoverageStore.Factory coverageStoreFactory;
private final ExecutionResults executionResults;
private final boolean parallelized;
private final Consumer<AgentSpan> onSpanFinish;

Expand All @@ -69,6 +71,7 @@ public TestSuiteImpl(
Codeowners codeowners,
LinesResolver linesResolver,
CoverageStore.Factory coverageStoreFactory,
ExecutionResults executionResults,
Consumer<AgentSpan> onSpanFinish) {
this.moduleSpanContext = moduleSpanContext;
this.moduleName = moduleName;
Expand All @@ -84,6 +87,7 @@ public TestSuiteImpl(
this.codeowners = codeowners;
this.linesResolver = linesResolver;
this.coverageStoreFactory = coverageStoreFactory;
this.executionResults = executionResults;
this.onSpanFinish = onSpanFinish;

AgentTracer.SpanBuilder spanBuilder =
Expand Down Expand Up @@ -254,6 +258,7 @@ public TestImpl testStart(
linesResolver,
codeowners,
coverageStoreFactory,
executionResults,
SpanUtils.propagateCiVisibilityTagsTo(span));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import datadog.trace.civisibility.ipc.TestFramework;
import datadog.trace.civisibility.source.LinesResolver;
import datadog.trace.civisibility.source.SourcePathResolver;
import datadog.trace.civisibility.test.ExecutionResults;
import datadog.trace.civisibility.test.ExecutionStrategy;
import java.util.Collection;
import java.util.TreeSet;
Expand All @@ -46,6 +47,7 @@ public class ProxyTestModule implements TestFrameworkModule {
private final AgentSpanContext parentProcessModuleContext;
private final String moduleName;
private final ExecutionStrategy executionStrategy;
private final ExecutionResults executionResults;
private final SignalClient.Factory signalClientFactory;
private final ChildProcessCoverageReporter childProcessCoverageReporter;
private final Config config;
Expand Down Expand Up @@ -73,6 +75,7 @@ public ProxyTestModule(
this.parentProcessModuleContext = parentProcessModuleContext;
this.moduleName = moduleName;
this.executionStrategy = executionStrategy;
this.executionResults = new ExecutionResults();
this.signalClientFactory = signalClientFactory;
this.childProcessCoverageReporter = childProcessCoverageReporter;
this.config = config;
Expand Down Expand Up @@ -100,13 +103,8 @@ public boolean isModified(TestSourceData testSourceData) {
}

@Override
public boolean shouldBeSkipped(TestIdentifier test) {
return executionStrategy.shouldBeSkipped(test);
}

@Override
public boolean skip(TestIdentifier test) {
return executionStrategy.skip(test);
public boolean isSkippable(TestIdentifier test) {
return executionStrategy.isSkippable(test);
}

@Override
Expand Down Expand Up @@ -143,7 +141,7 @@ private void sendModuleExecutionResult() {
boolean earlyFlakeDetectionEnabled = earlyFlakeDetectionSettings.isEnabled();
boolean earlyFlakeDetectionFaulty =
earlyFlakeDetectionEnabled && executionStrategy.isEFDLimitReached();
long testsSkippedTotal = executionStrategy.getTestsSkipped();
long testsSkippedTotal = executionResults.getTestsSkippedByItr();

signalClient.send(
new ModuleExecutionResult(
Expand Down Expand Up @@ -185,6 +183,7 @@ public TestSuiteImpl testSuiteStart(
codeowners,
linesResolver,
coverageStoreFactory,
executionResults,
this::propagateTestFrameworkData);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import datadog.trace.civisibility.domain.TestSuiteImpl;
import datadog.trace.civisibility.source.LinesResolver;
import datadog.trace.civisibility.source.SourcePathResolver;
import datadog.trace.civisibility.test.ExecutionResults;
import datadog.trace.civisibility.test.ExecutionStrategy;
import datadog.trace.civisibility.utils.SpanUtils;
import java.util.function.Consumer;
Expand All @@ -39,6 +40,7 @@ public class HeadlessTestModule extends AbstractTestModule implements TestFramew

private final CoverageStore.Factory coverageStoreFactory;
private final ExecutionStrategy executionStrategy;
private final ExecutionResults executionResults;

public HeadlessTestModule(
AgentSpanContext sessionSpanContext,
Expand Down Expand Up @@ -67,6 +69,7 @@ public HeadlessTestModule(
onSpanFinish);
this.coverageStoreFactory = coverageStoreFactory;
this.executionStrategy = executionStrategy;
this.executionResults = new ExecutionResults();
}

@Override
Expand All @@ -85,13 +88,8 @@ public boolean isModified(TestSourceData testSourceData) {
}

@Override
public boolean shouldBeSkipped(TestIdentifier test) {
return executionStrategy.shouldBeSkipped(test);
}

@Override
public boolean skip(TestIdentifier test) {
return executionStrategy.skip(test);
public boolean isSkippable(TestIdentifier test) {
return executionStrategy.isSkippable(test);
}

@Override
Expand All @@ -111,7 +109,7 @@ public void end(@Nullable Long endTime) {
setTag(Tags.TEST_ITR_TESTS_SKIPPING_ENABLED, true);
setTag(Tags.TEST_ITR_TESTS_SKIPPING_TYPE, "test");

long testsSkippedTotal = executionStrategy.getTestsSkipped();
long testsSkippedTotal = executionResults.getTestsSkippedByItr();
setTag(Tags.TEST_ITR_TESTS_SKIPPING_COUNT, testsSkippedTotal);
if (testsSkippedTotal > 0) {
setTag(DDTags.CI_ITR_TESTS_SKIPPED, true);
Expand Down Expand Up @@ -154,6 +152,7 @@ public TestSuiteImpl testSuiteStart(
codeowners,
linesResolver,
coverageStoreFactory,
executionResults,
SpanUtils.propagateCiVisibilityTagsTo(span));
}
}
Loading

0 comments on commit 7341990

Please sign in to comment.