Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize test retry logic #8289

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
import datadog.trace.api.civisibility.domain.TestContext;
import datadog.trace.api.civisibility.telemetry.CiVisibilityCountMetric;
import datadog.trace.api.civisibility.telemetry.CiVisibilityMetricCollector;
import datadog.trace.api.civisibility.telemetry.TagValue;
import datadog.trace.api.civisibility.telemetry.tag.BrowserDriver;
import datadog.trace.api.civisibility.telemetry.tag.EventType;
import datadog.trace.api.civisibility.telemetry.tag.IsModified;
import datadog.trace.api.civisibility.telemetry.tag.IsNew;
import datadog.trace.api.civisibility.telemetry.tag.IsRetry;
import datadog.trace.api.civisibility.telemetry.tag.IsRum;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.api.civisibility.telemetry.tag.SkipReason;
import datadog.trace.api.civisibility.telemetry.tag.TestFrameworkInstrumentation;
import datadog.trace.api.gateway.RequestContextSlot;
Expand Down Expand Up @@ -263,6 +263,7 @@ public void end(@Nullable Long endTime) {
span.finish();
}

Object retryReason = span.getTag(Tags.TEST_RETRY_REASON);
metricCollector.add(
CiVisibilityCountMetric.EVENT_FINISHED,
1,
Expand All @@ -271,25 +272,13 @@ public void end(@Nullable Long endTime) {
span.getTag(Tags.TEST_IS_NEW) != null ? IsNew.TRUE : null,
span.getTag(Tags.TEST_IS_MODIFIED) != null ? IsModified.TRUE : null,
span.getTag(Tags.TEST_IS_RETRY) != null ? IsRetry.TRUE : null,
getRetryReason(),
retryReason instanceof TagValue ? (TagValue) retryReason : null,
span.getTag(Tags.TEST_IS_RUM_ACTIVE) != null ? IsRum.TRUE : null,
CIConstants.SELENIUM_BROWSER_DRIVER.equals(span.getTag(Tags.TEST_BROWSER_DRIVER))
? BrowserDriver.SELENIUM
: null);
}

private RetryReason getRetryReason() {
String retryReason = (String) span.getTag(Tags.TEST_RETRY_REASON);
if (retryReason != null) {
try {
return RetryReason.valueOf(retryReason.toUpperCase());
} catch (IllegalArgumentException e) {
log.debug("Non-standard retry-reason: {}", retryReason);
}
}
return null;
}

/**
* Tests often perform operations that involve APM instrumentations: sending an HTTP request,
* executing a database query, etc. APM instrumentations create spans that correspond to those
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import datadog.trace.api.civisibility.config.TestSourceData;
import datadog.trace.api.civisibility.events.TestEventsHandler;
import datadog.trace.api.civisibility.retry.TestRetryPolicy;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.api.civisibility.telemetry.tag.SkipReason;
import datadog.trace.api.civisibility.telemetry.tag.TestFrameworkInstrumentation;
import datadog.trace.bootstrap.ContextStore;
Expand Down Expand Up @@ -57,7 +58,7 @@ public void onTestStart(
@Nullable String testParameters,
@Nullable Collection<String> categories,
@Nonnull TestSourceData testSourceData,
String retryReason,
RetryReason retryReason,
@Nullable Long startTime) {
// do nothing
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import datadog.trace.api.civisibility.telemetry.CiVisibilityCountMetric;
import datadog.trace.api.civisibility.telemetry.CiVisibilityMetricCollector;
import datadog.trace.api.civisibility.telemetry.tag.EventType;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.api.civisibility.telemetry.tag.SkipReason;
import datadog.trace.api.civisibility.telemetry.tag.TestFrameworkInstrumentation;
import datadog.trace.bootstrap.ContextStore;
Expand Down Expand Up @@ -138,7 +139,7 @@ public void onTestStart(
final @Nullable String testParameters,
final @Nullable Collection<String> categories,
final @Nonnull TestSourceData testSourceData,
final @Nullable String retryReason,
final @Nullable RetryReason retryReason,
final @Nullable Long startTime) {
if (skipTrace(testSourceData.getTestClass())) {
return;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package datadog.trace.civisibility.retry;

import datadog.trace.api.civisibility.retry.TestRetryPolicy;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import org.jetbrains.annotations.Nullable;

public class NeverRetry implements TestRetryPolicy {
Expand All @@ -20,18 +21,13 @@ public boolean suppressFailures() {
}

@Override
public boolean retry(boolean successful, long duration) {
return false;
}

@Override
public boolean currentExecutionIsRetry() {
public boolean retry(boolean successful, long durationMillis) {
return false;
}

@Nullable
@Override
public String currentExecutionRetryReason() {
public RetryReason currentExecutionRetryReason() {
return null;
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package datadog.trace.civisibility.retry;

import datadog.trace.api.civisibility.retry.TestRetryPolicy;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import java.util.concurrent.atomic.AtomicInteger;
import org.jetbrains.annotations.Nullable;

/** Retries a test case if it failed, up to a maximum number of times. */
public class RetryIfFailed implements TestRetryPolicy {

private static final String AUTO_TEST_RETRIES = "atr";

private final int maxExecutions;
private int executions;

Expand All @@ -28,11 +27,13 @@ public boolean retriesLeft() {

@Override
public boolean suppressFailures() {
return true;
// if this isn't the last attempt,
// possible failures should be suppressed
return retriesLeft();
}

@Override
public boolean retry(boolean successful, long duration) {
public boolean retry(boolean successful, long durationMillis) {
if (!successful && ++executions < maxExecutions) {
totalExecutions.incrementAndGet();
return true;
Expand All @@ -41,14 +42,13 @@ public boolean retry(boolean successful, long duration) {
}
}

@Nullable
@Override
public boolean currentExecutionIsRetry() {
return executions > 0;
public RetryReason currentExecutionRetryReason() {
return currentExecutionIsRetry() ? RetryReason.atr : null;
}

@Nullable
@Override
public String currentExecutionRetryReason() {
return currentExecutionIsRetry() ? AUTO_TEST_RETRIES : null;
private boolean currentExecutionIsRetry() {
return executions > 0;
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package datadog.trace.civisibility.retry;

import datadog.trace.api.civisibility.retry.TestRetryPolicy;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.civisibility.config.EarlyFlakeDetectionSettings;
import org.jetbrains.annotations.Nullable;

/** Retries a test case N times (N depends on test duration) regardless of success or failure. */
public class RetryNTimes implements TestRetryPolicy {

private static final String EARLY_FLAKINESS_DETECTION = "efd";

private final EarlyFlakeDetectionSettings earlyFlakeDetectionSettings;
private int executions;
private int maxExecutions;
Expand All @@ -30,21 +29,20 @@ public boolean suppressFailures() {
}

@Override
public boolean retry(boolean successful, long duration) {
public boolean retry(boolean successful, long durationMillis) {
// adjust maximum retries based on the now known test duration
int maxExecutionsForGivenDuration = earlyFlakeDetectionSettings.getExecutions(duration);
int maxExecutionsForGivenDuration = earlyFlakeDetectionSettings.getExecutions(durationMillis);
maxExecutions = Math.min(maxExecutions, maxExecutionsForGivenDuration);
return ++executions < maxExecutions;
}

@Nullable
@Override
public boolean currentExecutionIsRetry() {
return executions > 0;
public RetryReason currentExecutionRetryReason() {
return currentExecutionIsRetry() ? RetryReason.efd : null;
}

@Nullable
@Override
public String currentExecutionRetryReason() {
return currentExecutionIsRetry() ? EARLY_FLAKINESS_DETECTION : null;
private boolean currentExecutionIsRetry() {
return executions > 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public RetryAwareNotifier(TestRetryPolicy retryPolicy, RunNotifier notifier) {
public void fireTestFailure(Failure failure) {
this.failed = true;

if (!retryPolicy.retriesLeft() || !retryPolicy.suppressFailures()) {
if (!retryPolicy.suppressFailures()) {
super.fireTestFailure(failure);
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import datadog.trace.api.civisibility.config.TestIdentifier;
import datadog.trace.api.civisibility.config.TestSourceData;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.bootstrap.instrumentation.api.AgentScope;
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
Expand Down Expand Up @@ -185,8 +186,9 @@ public static boolean isParameterizedTest(TestDescriptor testDescriptor) {
return "test-template".equals(lastSegment.getType());
}

public static String retryReason(TestDescriptor testDescriptor) {
return getIDSegmentValue(testDescriptor, RETRY_DESCRIPTOR_REASON_SUFFIX);
public static RetryReason retryReason(TestDescriptor testDescriptor) {
String retryReasonSegment = getIDSegmentValue(testDescriptor, RETRY_DESCRIPTOR_REASON_SUFFIX);
return retryReasonSegment != null ? RetryReason.valueOf(retryReasonSegment) : null;
}

public static boolean isRetry(TestDescriptor testDescriptor) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ public static Boolean execute(@Advice.This HierarchicalTestExecutorService.TestT
int retryAttemptIdx = 0;
boolean retry;
while (true) {
factory.setSuppressFailures(retryPolicy.retriesLeft() && retryPolicy.suppressFailures());
factory.setSuppressFailures(retryPolicy.suppressFailures());

long startTimestamp = System.currentTimeMillis();
CallDepthThreadLocalMap.incrementCallDepth(HierarchicalTestExecutorService.TestTask.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ public TestDescriptorHandle(TestDescriptor testDescriptor) {
}

public TestDescriptor withIdSuffix(
String segmentName, String segmentValue, String otherSegmentName, String otherSegmentValue) {
String segmentName, Object segmentValue, String otherSegmentName, Object otherSegmentValue) {
UniqueId uniqueId = testDescriptor.getUniqueId();
UniqueId updatedId =
uniqueId.append(segmentName, segmentValue).append(otherSegmentName, otherSegmentValue);
uniqueId
.append(segmentName, String.valueOf(segmentValue))
.append(otherSegmentName, String.valueOf(otherSegmentValue));
TestDescriptor descriptorClone = UnsafeUtils.tryShallowClone(testDescriptor);
METHOD_HANDLES.invoke(UNIQUE_ID_SETTER, descriptorClone, updatedId);
return descriptorClone;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public static void onAddingStepResult(
retryContext.setFailed(true);

TestRetryPolicy retryPolicy = retryContext.getRetryPolicy();
if (retryPolicy.suppressFailures() && retryPolicy.retriesLeft()) {
if (retryPolicy.suppressFailures()) {
stepResult = new StepResult(stepResult.getStep(), KarateUtils.abortedResult());
stepResult.setFailedReason(result.getError());
stepResult.setErrorIgnored(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import datadog.trace.api.civisibility.config.TestSourceData;
import datadog.trace.api.civisibility.events.TestDescriptor;
import datadog.trace.api.civisibility.events.TestSuiteDescriptor;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.api.civisibility.telemetry.tag.SkipReason;
import datadog.trace.api.civisibility.telemetry.tag.TestFrameworkInstrumentation;
import datadog.trace.bootstrap.ContextStore;
Expand Down Expand Up @@ -143,7 +144,7 @@ public boolean beforeScenario(ScenarioRuntime sr) {
parameters,
categories,
TestSourceData.UNKNOWN,
(String) sr.magicVariables.get(KarateUtils.RETRY_MAGIC_VARIABLE),
(RetryReason) sr.magicVariables.get(KarateUtils.RETRY_MAGIC_VARIABLE),
null);
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public Outcome apply(SuperEngine<?>.TestLeaf testLeaf) {

if (outcome.isFailed()) {
executionFailed = true;
if (retryPolicy.retriesLeft() && retryPolicy.suppressFailures()) {
if (retryPolicy.suppressFailures()) {
Throwable t = outcome.toOption().get();
return Canceled.apply(
new SuppressedTestFailedException("Test failed and will be retried", t, 0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datadog.trace.api.civisibility.config.TestSourceData;
import datadog.trace.api.civisibility.events.TestSuiteDescriptor;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.api.civisibility.telemetry.tag.TestFrameworkInstrumentation;
import datadog.trace.instrumentation.testng.retry.RetryAnalyzer;
import java.util.List;
Expand Down Expand Up @@ -94,7 +95,7 @@ public void onTestStart(final ITestResult result) {
null);
}

private String retryReason(final ITestResult result) {
private RetryReason retryReason(final ITestResult result) {
IRetryAnalyzer retryAnalyzer = TestNGUtils.getRetryAnalyzer(result);
if (retryAnalyzer instanceof RetryAnalyzer) {
RetryAnalyzer datadogAnalyzer = (RetryAnalyzer) retryAnalyzer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import datadog.trace.api.civisibility.config.TestIdentifier;
import datadog.trace.api.civisibility.config.TestSourceData;
import datadog.trace.api.civisibility.retry.TestRetryPolicy;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.instrumentation.testng.TestEventsHandlerHolder;
import datadog.trace.instrumentation.testng.TestNGUtils;
import org.testng.IRetryAnalyzer;
Expand Down Expand Up @@ -31,7 +32,7 @@ public boolean retry(ITestResult result) {
return retryPolicy.retry(result.isSuccess(), result.getEndMillis() - result.getStartMillis());
}

public String currentExecutionRetryReason() {
public RetryReason currentExecutionRetryReason() {
return retryPolicy != null ? retryPolicy.currentExecutionRetryReason() : null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ public String[] helperClassNames() {
}

public static class InvokeBeforeClassAdvice {
@SuppressWarnings("bytebuddy-exception-suppression")
@Advice.OnMethodEnter
public static void invokeBeforeClass(
@Advice.FieldValue("m_testContext") final ITestContext testContext,
Expand All @@ -81,6 +82,7 @@ public static String muzzleCheck(final CustomAttribute customAttribute) {
}

public static class InvokeAfterClassAdvice {
@SuppressWarnings("bytebuddy-exception-suppression")
@Advice.OnMethodExit
public static void invokeAfterClass(
@Advice.FieldValue("m_testContext") final ITestContext testContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public String[] helperClassNames() {
}

public static class RetryAdvice {
@SuppressWarnings("bytebuddy-exception-suppression")
@SuppressFBWarnings("NP_BOOLEAN_RETURN_NULL")
@Advice.OnMethodExit
public static void shouldRetryTestMethod(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datadog.trace.api.civisibility.events.TestDescriptor;
import datadog.trace.api.civisibility.events.TestEventsHandler;
import datadog.trace.api.civisibility.events.TestSuiteDescriptor;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.api.civisibility.telemetry.tag.TestFrameworkInstrumentation;
import datadog.trace.api.time.SystemTimeSource;
import datadog.trace.util.AgentThreadFactory;
Expand Down Expand Up @@ -91,7 +92,7 @@ public static void onTestFinished(TestFinished event, TaskDef taskDef) {
new TestDescriptor(testSuiteName, testClass, testName, testParameters, testQualifier);
String testMethodName = null;
Method testMethod = null;
String retryReason = null;
RetryReason retryReason = null;

// Only test finish is reported, so fake test start timestamp
long endMicros = SystemTimeSource.INSTANCE.getCurrentTimeMicros();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datadog.trace.api.civisibility.config.TestIdentifier;
import datadog.trace.api.civisibility.config.TestSourceData;
import datadog.trace.api.civisibility.retry.TestRetryPolicy;
import datadog.trace.api.civisibility.telemetry.tag.RetryReason;
import datadog.trace.api.civisibility.telemetry.tag.SkipReason;
import datadog.trace.api.civisibility.telemetry.tag.TestFrameworkInstrumentation;
import datadog.trace.bootstrap.ContextStore;
Expand Down Expand Up @@ -64,7 +65,7 @@ void onTestStart(
@Nullable String testParameters,
@Nullable Collection<String> categories,
@Nonnull TestSourceData testSourceData,
@Nullable String retryReason,
@Nullable RetryReason retryReason,
@Nullable Long startTime);

void onTestSkip(TestKey descriptor, @Nullable String reason);
Expand Down
Loading
Loading