Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

breaking: Use sha256 to hash StepFunctions trace id and manually set _dd.p.tid #490

Merged
merged 12 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/input_files/build.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ integration-test ({{ $runtime.name }}-{{ $runtime.arch }}):
before_script:
- *install-node
- EXTERNAL_ID_NAME=integration-test-externalid ROLE_TO_ASSUME=sandbox-integration-test-deployer AWS_ACCOUNT=425362996713 source ./ci/get_secrets.sh
- yarn global add serverless --prefix /usr/local
- yarn global add serverless@^3.38.0 --prefix /usr/local
- cd integration_tests && yarn install && cd ..
script:
- RUNTIME_PARAM={{ $runtime.python_version }} ARCH={{ $runtime.arch }} ./scripts/run_integration_tests.sh
Expand Down
48 changes: 33 additions & 15 deletions datadog_lambda/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
propagator = HTTPPropagator()

DD_TRACE_JAVA_TRACE_ID_PADDING = "00000000"
HIGHER_64_BITS = "HIGHER_64_BITS"
LOWER_64_BITS = "LOWER_64_BITS"


def _convert_xray_trace_id(xray_trace_id):
Expand Down Expand Up @@ -354,14 +356,16 @@ def extract_context_from_kinesis_event(event, lambda_context):
return extract_context_from_lambda_context(lambda_context)


def _deterministic_md5_hash(s: str) -> int:
"""MD5 here is to generate trace_id, not for any encryption."""
hex_number = hashlib.md5(s.encode("ascii")).hexdigest()
binary = bin(int(hex_number, 16))
binary_str = str(binary)
binary_str_remove_0b = binary_str[2:].rjust(128, "0")
most_significant_64_bits_without_leading_1 = "0" + binary_str_remove_0b[1:-64]
result = int(most_significant_64_bits_without_leading_1, 2)
def _deterministic_sha256_hash(s: str, part: str) -> (int, int):
sha256_hash = hashlib.sha256(s.encode()).hexdigest()

# First two chars is '0b'. zfill to ensure 256 bits, but we only care about the first 128 bits
binary_hash = bin(int(sha256_hash, 16))[2:].zfill(256)
if part == HIGHER_64_BITS:
updated_binary_hash = "0" + binary_hash[1:64]
else:
updated_binary_hash = "0" + binary_hash[65:128]
result = int(updated_binary_hash, 2)
kimi-p marked this conversation as resolved.
Show resolved Hide resolved
if result == 0:
return 1
return result
Expand All @@ -376,13 +380,27 @@ def extract_context_from_step_functions(event, lambda_context):
execution_id = event.get("Execution").get("Id")
state_name = event.get("State").get("Name")
state_entered_time = event.get("State").get("EnteredTime")
trace_id = _deterministic_md5_hash(execution_id)
parent_id = _deterministic_md5_hash(
f"{execution_id}#{state_name}#{state_entered_time}"
# returning 128 bits since 128bit traceId will be break up into
# traditional traceId and _dd.p.tid tag
# https://github.com/DataDog/dd-trace-py/blob/3e34d21cb9b5e1916e549047158cb119317b96ab/ddtrace/propagation/http.py#L232-L240
trace_id = _deterministic_sha256_hash(execution_id, LOWER_64_BITS)

parent_id = _deterministic_sha256_hash(
f"{execution_id}#{state_name}#{state_entered_time}", HIGHER_64_BITS
)

sampling_priority = SamplingPriority.AUTO_KEEP
return Context(
trace_id=trace_id, span_id=parent_id, sampling_priority=sampling_priority
trace_id=trace_id,
span_id=parent_id,
sampling_priority=sampling_priority,
# take the higher 64 bits as _dd.p.tid tag and use hex to encode
# [2:] to remove '0x' in the hex str
meta={
"_dd.p.tid": hex(
_deterministic_sha256_hash(execution_id, HIGHER_64_BITS)
)[2:]
},
)
except Exception as e:
logger.debug("The Step Functions trace extractor returned with error %s", e)
Expand Down Expand Up @@ -1246,9 +1264,9 @@ def create_function_execution_span(
"function_version": function_version,
"request_id": context.aws_request_id,
"resource_names": context.function_name,
"functionname": context.function_name.lower()
if context.function_name
else None,
"functionname": (
context.function_name.lower() if context.function_name else None
),
"datadog_lambda": datadog_lambda_version,
"dd_trace": ddtrace_version,
"span.name": "aws.lambda",
Expand Down
39 changes: 26 additions & 13 deletions tests/test_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
XraySubsegment,
)
from datadog_lambda.tracing import (
_deterministic_md5_hash,
HIGHER_64_BITS,
LOWER_64_BITS,
_deterministic_sha256_hash,
create_inferred_span,
extract_dd_trace_context,
create_dd_dummy_metadata_subsegment,
Expand Down Expand Up @@ -624,17 +626,19 @@ def test_step_function_trace_data(self):
ctx, source, event_source = extract_dd_trace_context(sqs_event, lambda_ctx)
self.assertEqual(source, "event")
expected_context = Context(
trace_id=1074655265866231755,
span_id=4776286484851030060,
trace_id=3675572987363469717,
span_id=6880978411788117524,
sampling_priority=1,
meta={"_dd.p.tid": "e987c84b36b11ab"},
)
self.assertEqual(ctx, expected_context)
self.assertEqual(
get_dd_trace_context(),
{
TraceHeader.TRACE_ID: "1074655265866231755",
TraceHeader.PARENT_ID: fake_xray_header_value_parent_decimal,
TraceHeader.TRACE_ID: "3675572987363469717",
TraceHeader.PARENT_ID: "10713633173203262661",
TraceHeader.SAMPLING_PRIORITY: "1",
"x-datadog-tags": "_dd.p.tid=e987c84b36b11ab",
},
)
create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY)
Expand Down Expand Up @@ -1992,19 +1996,28 @@ def test_mark_trace_as_error_for_5xx_responses_sends_error_metric_and_set_error_

class TestStepFunctionsTraceContext(unittest.TestCase):
def test_deterministic_m5_hash(self):
result = _deterministic_md5_hash("some_testing_random_string")
self.assertEqual(2251275791555400689, result)
result = _deterministic_sha256_hash("some_testing_random_string", LOWER_64_BITS)
self.assertEqual(7456137785171041414, result)

def test_deterministic_m5_hash__result_the_same_as_backend_1(self):
result = _deterministic_sha256_hash(
"arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j"
"#lambda#1",
HIGHER_64_BITS,
)
self.assertEqual(3711631873188331089, result)
kimi-p marked this conversation as resolved.
Show resolved Hide resolved

def test_deterministic_m5_hash__result_the_same_as_backend(self):
result = _deterministic_md5_hash(
"arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a"
":c8baf081-31f1-464d-971f-70cb17d01111#step-one#2022-12-08T21:08:19.224Z"
def test_deterministic_m5_hash__result_the_same_as_backend_2(self):
result = _deterministic_sha256_hash(
"arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j"
"#lambda#2",
HIGHER_64_BITS,
)
self.assertEqual(8034507082463708833, result)
self.assertEqual(5759173372325510050, result)
kimi-p marked this conversation as resolved.
Show resolved Hide resolved

def test_deterministic_m5_hash__always_leading_with_zero(self):
for i in range(100):
result = _deterministic_md5_hash(str(i))
result = _deterministic_sha256_hash(str(i), 64)
result_in_binary = bin(int(result))
# Leading zeros will be omitted, so only test for full 64 bits present
if len(result_in_binary) == 66: # "0b" + 64 bits.
Expand Down
Loading