-
Notifications
You must be signed in to change notification settings - Fork 319
Antithesis poc #9993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Antithesis poc #9993
Changes from 6 commits
74f1592
d7a5e59
17a3dd7
d8dcccb
14f2568
e750f61
83ce669
a56fe56
35bd8be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,9 @@ | |
|
|
||
| import static datadog.communication.http.OkHttpUtils.prepareRequest; | ||
|
|
||
| import com.antithesis.sdk.Assert; | ||
| import com.fasterxml.jackson.databind.node.JsonNodeFactory; | ||
| import com.fasterxml.jackson.databind.node.ObjectNode; | ||
| import com.squareup.moshi.JsonAdapter; | ||
| import com.squareup.moshi.Moshi; | ||
| import com.squareup.moshi.Types; | ||
|
|
@@ -89,11 +92,33 @@ public void addResponseListener(final RemoteResponseListener listener) { | |
|
|
||
| public Response sendSerializedTraces(final Payload payload) { | ||
| final int sizeInBytes = payload.sizeInBytes(); | ||
|
|
||
| // Antithesis: Track that agent API send is being exercised | ||
| log.debug("ANTITHESIS_ASSERT: Verifying DDAgentApi trace sending is exercised (reachable) with {} traces", payload.traceCount()); | ||
| Assert.reachable("DDAgentApi trace sending is exercised", null); | ||
| log.debug("ANTITHESIS_ASSERT: Checking if traces are being sent through DDAgentApi (sometimes) - count: {}", payload.traceCount()); | ||
| Assert.sometimes( | ||
| payload.traceCount() > 0, | ||
| "Traces are being sent through DDAgentApi", | ||
| null); | ||
|
|
||
| String tracesEndpoint = featuresDiscovery.getTraceEndpoint(); | ||
| if (null == tracesEndpoint) { | ||
| featuresDiscovery.discoverIfOutdated(); | ||
| tracesEndpoint = featuresDiscovery.getTraceEndpoint(); | ||
| if (null == tracesEndpoint) { | ||
| // Antithesis: Agent should always be detectable | ||
| ObjectNode agentDetectionDetails = JsonNodeFactory.instance.objectNode(); | ||
| agentDetectionDetails.put("trace_count", payload.traceCount()); | ||
| agentDetectionDetails.put("payload_size_bytes", sizeInBytes); | ||
| agentDetectionDetails.put("agent_url", agentUrl.toString()); | ||
| agentDetectionDetails.put("failure_reason", "agent_not_detected"); | ||
|
|
||
| log.debug("ANTITHESIS_ASSERT: Agent not detected (unreachable) - url: {}, traces: {}", agentUrl, payload.traceCount()); | ||
| Assert.unreachable( | ||
| "Datadog agent should always be detected - agent communication failure", | ||
| agentDetectionDetails); | ||
|
|
||
| log.error("No datadog agent detected"); | ||
| countAndLogFailedSend(payload.traceCount(), sizeInBytes, null, null); | ||
| return Response.failed(404); | ||
|
|
@@ -122,7 +147,36 @@ public Response sendSerializedTraces(final Payload payload) { | |
| try (final Recording recording = sendPayloadTimer.start(); | ||
| final okhttp3.Response response = httpClient.newCall(request).execute()) { | ||
| handleAgentChange(response.header(DATADOG_AGENT_STATE)); | ||
|
|
||
| // Antithesis: Track HTTP response status and assert success | ||
| ObjectNode httpResponseDetails = JsonNodeFactory.instance.objectNode(); | ||
| httpResponseDetails.put("trace_count", payload.traceCount()); | ||
| httpResponseDetails.put("payload_size_bytes", sizeInBytes); | ||
| httpResponseDetails.put("http_status", response.code()); | ||
| httpResponseDetails.put("http_message", response.message()); | ||
| httpResponseDetails.put("success", response.code() == 200); | ||
| httpResponseDetails.put("agent_url", tracesUrl.toString()); | ||
|
|
||
| log.debug("ANTITHESIS_ASSERT: Checking HTTP response status (always) - code: {}, traces: {}", response.code(), payload.traceCount()); | ||
| Assert.always( | ||
| response.code() == 200, | ||
| "HTTP response from Datadog agent should always be 200 - API communication failure", | ||
| httpResponseDetails); | ||
|
|
||
| if (response.code() != 200) { | ||
| // Antithesis: Mark non-200 path as unreachable | ||
| ObjectNode errorDetails = JsonNodeFactory.instance.objectNode(); | ||
| errorDetails.put("trace_count", payload.traceCount()); | ||
| errorDetails.put("payload_size_bytes", sizeInBytes); | ||
| errorDetails.put("http_status", response.code()); | ||
| errorDetails.put("http_message", response.message()); | ||
| errorDetails.put("failure_reason", "http_error_response"); | ||
|
|
||
| log.debug("ANTITHESIS_ASSERT: Non-200 HTTP response (unreachable) - code: {}, message: {}, traces: {}", response.code(), response.message(), payload.traceCount()); | ||
| Assert.unreachable( | ||
| "Non-200 HTTP response from agent indicates API failure - traces may be lost", | ||
| errorDetails); | ||
|
||
|
|
||
| agentErrorCounter.incrementErrorCount(response.message(), payload.traceCount()); | ||
| countAndLogFailedSend(payload.traceCount(), sizeInBytes, response, null); | ||
| return Response.failed(response.code()); | ||
|
|
@@ -146,6 +200,20 @@ public Response sendSerializedTraces(final Payload payload) { | |
| } | ||
| } | ||
| } catch (final IOException e) { | ||
| // Antithesis: Network failures should not occur | ||
| ObjectNode networkErrorDetails = JsonNodeFactory.instance.objectNode(); | ||
| networkErrorDetails.put("trace_count", payload.traceCount()); | ||
| networkErrorDetails.put("payload_size_bytes", sizeInBytes); | ||
| networkErrorDetails.put("exception_type", e.getClass().getName()); | ||
| networkErrorDetails.put("exception_message", e.getMessage()); | ||
| networkErrorDetails.put("agent_url", agentUrl.toString()); | ||
| networkErrorDetails.put("failure_reason", "network_io_exception"); | ||
|
|
||
| log.debug("ANTITHESIS_ASSERT: Network/IO exception (unreachable) - type: {}, message: {}, traces: {}", e.getClass().getName(), e.getMessage(), payload.traceCount()); | ||
| Assert.unreachable( | ||
| "Network/IO exceptions should not occur when sending to agent - indicates connectivity issues", | ||
| networkErrorDetails); | ||
|
|
||
| countAndLogFailedSend(payload.traceCount(), sizeInBytes, null, e); | ||
| return Response.failed(e); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,9 @@ dependencies { | |
| implementation(libs.slf4j) | ||
|
|
||
| implementation(project(":internal-api")) | ||
|
|
||
| // Antithesis SDK for assertions and property testing - bundled in tracer JAR | ||
| implementation(group = "com.antithesis", name = "sdk", version = "1.4.5") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thought: Another concern, is how much more weight it adds to the jar.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right now this PR adds ~20mb to the final jar... but a lot of that is because it embeds antithesis, its native FFI wrapper, and various If we move it to the That still feels too big to have in the general deliverable for something only used for testing purposes. One option might be to only include the direct dependency in the release (i.e. without |
||
|
|
||
| compileOnly(project(":dd-java-agent:agent-tooling")) | ||
| testImplementation(project(":dd-java-agent:agent-tooling")) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thought: I'm very uncomfortable with using jackson API in these parts. And the
Assertmethods rely on it.