From fb237c124cd9bae0dda19640578e53d13d5f3e05 Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Tue, 28 Oct 2025 16:25:29 -0700 Subject: [PATCH] Add metrics for tracking partial granularity --- newrelic/core/application.py | 5 + newrelic/core/node_mixin.py | 4 + newrelic/core/stats_engine.py | 14 + newrelic/core/transaction_node.py | 8 +- tests/agent_unittests/test_harvest_loop.py | 341 ++++++++++++--------- 5 files changed, 231 insertions(+), 141 deletions(-) diff --git a/newrelic/core/application.py b/newrelic/core/application.py index 3ba8168d60..46faab5555 100644 --- a/newrelic/core/application.py +++ b/newrelic/core/application.py @@ -1373,6 +1373,11 @@ def harvest(self, shutdown=False, flexible=False): spans_sampled = spans.num_samples internal_count_metric("Supportability/SpanEvent/TotalEventsSeen", spans_seen) internal_count_metric("Supportability/SpanEvent/TotalEventsSent", spans_sampled) + if configuration.distributed_tracing.sampler.partial_granularity.enabled: + internal_count_metric( + f"Supportability/Python/PartialGranularity/{configuration.distributed_tracing.sampler.partial_granularity.type}", + 1, + ) stats.reset_span_events() diff --git a/newrelic/core/node_mixin.py b/newrelic/core/node_mixin.py index 29f5bedbc1..92f0975827 100644 --- a/newrelic/core/node_mixin.py +++ b/newrelic/core/node_mixin.py @@ -162,8 +162,10 @@ def span_events( partial_granularity_sampled=partial_granularity_sampled, ct_exit_spans=ct_exit_spans, ) + ct_exit_spans["instrumented"] += 1 parent_id = parent_guid if span: # span will be None if the span is an inprocess span or repeated exit span. + ct_exit_spans["kept"] += 1 yield span # Compressed spans are always reparented onto the entry span. if not settings.distributed_tracing.sampler.partial_granularity.type == "compact" or span[0].get( @@ -179,7 +181,9 @@ def span_events( partial_granularity_sampled=partial_granularity_sampled, ct_exit_spans=ct_exit_spans, ): + ct_exit_spans["instrumented"] += 1 if event: # event will be None if the span is an inprocess span or repeated exit span. + ct_exit_spans["kept"] += 1 yield event diff --git a/newrelic/core/stats_engine.py b/newrelic/core/stats_engine.py index f44f82fe13..507139dfd4 100644 --- a/newrelic/core/stats_engine.py +++ b/newrelic/core/stats_engine.py @@ -1190,6 +1190,20 @@ def record_transaction(self, transaction): elif transaction.sampled: for event in transaction.span_events(self.__settings): self._span_events.add(event, priority=transaction.priority) + if transaction.partial_granularity_sampled: + partial_gran_type = settings.distributed_tracing.sampler.partial_granularity.type + self.record_custom_metrics( + [ + ( + f"Supportability/DistributedTrace/PartialGranularity/{partial_gran_type}/Span/Instrumented", + {"count": transaction.instrumented}, + ), + ( + f"Supportability/DistributedTrace/PartialGranularity/{partial_gran_type}/Span/Kept", + {"count": transaction.kept}, + ), + ] + ) # Merge in log events diff --git a/newrelic/core/transaction_node.py b/newrelic/core/transaction_node.py index f1c9f1ea7a..eaa3b5f343 100644 --- a/newrelic/core/transaction_node.py +++ b/newrelic/core/transaction_node.py @@ -634,7 +634,7 @@ def span_events(self, settings, attr_class=dict): ("priority", self.priority), ) ) - ct_exit_spans = {} + ct_exit_spans = {"instrumented": 0, "kept": 0} yield from self.root.span_events( settings, base_attrs, @@ -643,3 +643,9 @@ def span_events(self, settings, attr_class=dict): partial_granularity_sampled=self.partial_granularity_sampled, ct_exit_spans=ct_exit_spans, ) + # If this transaction is partial granularity sampled, record the number of spans + # instrumented and the number of spans kept to monitor cost savings of partial + # granularity tracing. + if self.partial_granularity_sampled: + self.instrumented = ct_exit_spans["instrumented"] + self.kept = ct_exit_spans["kept"] diff --git a/tests/agent_unittests/test_harvest_loop.py b/tests/agent_unittests/test_harvest_loop.py index 8447b18eb5..56476d21f9 100644 --- a/tests/agent_unittests/test_harvest_loop.py +++ b/tests/agent_unittests/test_harvest_loop.py @@ -39,136 +39,139 @@ @pytest.fixture(scope="module") def transaction_node(request): - default_capacity = SampledDataSet().capacity - num_events = default_capacity + 1 - - custom_events = SampledDataSet(capacity=num_events) - for _ in range(num_events): - event = create_custom_event("Custom", {}) - custom_events.add(event) - - ml_events = SampledDataSet(capacity=num_events) - for _ in range(num_events): - event = create_custom_event("Custom", {}) - ml_events.add(event) - - log_events = SampledDataSet(capacity=num_events) - for _ in range(num_events): - event = LogEventNode(1653609717, "WARNING", "A", {}) - log_events.add(event) - - error = ErrorNode( - timestamp=0, - type="foo:bar", - message="oh no! your foo had a bar", - expected=False, - span_id=None, - stack_trace="", - error_group_name=None, - custom_params={}, - source=None, - ) - - errors = tuple(error for _ in range(num_events)) - - function = FunctionNode( - group="Function", - name="foo", - children=(), - start_time=0, - end_time=1, - duration=1, - exclusive=1, - label=None, - params=None, - rollup=None, - guid="GUID", - agent_attributes={}, - user_attributes={}, - ) - - children = tuple(function for _ in range(num_events)) - - root = RootNode( - name="Function/main", - children=children, - start_time=1524764430.0, - end_time=1524764430.1, - duration=0.1, - exclusive=0.1, - guid=None, - agent_attributes={}, - user_attributes={}, - path="OtherTransaction/Function/main", - trusted_parent_span=None, - tracing_vendors=None, - ) - - node = TransactionNode( - settings=finalize_application_settings({"agent_run_id": "1234567"}), - path="OtherTransaction/Function/main", - type="OtherTransaction", - group="Function", - base_name="main", - name_for_metric="Function/main", - port=None, - request_uri=None, - queue_start=0.0, - start_time=1524764430.0, - end_time=1524764430.1, - last_byte_time=0.0, - total_time=0.1, - response_time=0.1, - duration=0.1, - exclusive=0.1, - root=root, - errors=errors, - slow_sql=(), - custom_events=custom_events, - ml_events=ml_events, - log_events=log_events, - apdex_t=0.5, - suppress_apdex=False, - custom_metrics=CustomMetrics(), - dimensional_metrics=DimensionalMetrics(), - guid="4485b89db608aece", - cpu_time=0.0, - suppress_transaction_trace=False, - client_cross_process_id=None, - referring_transaction_guid=None, - record_tt=False, - synthetics_resource_id=None, - synthetics_job_id=None, - synthetics_monitor_id=None, - synthetics_header=None, - synthetics_type=None, - synthetics_initiator=None, - synthetics_attributes=None, - synthetics_info_header=None, - is_part_of_cat=False, - trip_id="4485b89db608aece", - path_hash=None, - referring_path_hash=None, - alternate_path_hashes=[], - trace_intrinsics={}, - distributed_trace_intrinsics={}, - agent_attributes=[], - user_attributes=[], - priority=1.0, - parent_transport_duration=None, - parent_span=None, - parent_type=None, - parent_account=None, - parent_app=None, - parent_tx=None, - parent_transport_type=None, - sampled=True, - root_span_guid=None, - trace_id="4485b89db608aece", - loop_time=0.0, - partial_granularity_sampled=False, - ) - return node + def _transaction_node(partial_granularity=False): + default_capacity = SampledDataSet().capacity + num_events = default_capacity + 1 + + custom_events = SampledDataSet(capacity=num_events) + for _ in range(num_events): + event = create_custom_event("Custom", {}) + custom_events.add(event) + + ml_events = SampledDataSet(capacity=num_events) + for _ in range(num_events): + event = create_custom_event("Custom", {}) + ml_events.add(event) + + log_events = SampledDataSet(capacity=num_events) + for _ in range(num_events): + event = LogEventNode(1653609717, "WARNING", "A", {}) + log_events.add(event) + + error = ErrorNode( + timestamp=0, + type="foo:bar", + message="oh no! your foo had a bar", + expected=False, + span_id=None, + stack_trace="", + error_group_name=None, + custom_params={}, + source=None, + ) + + errors = tuple(error for _ in range(num_events)) + + function = FunctionNode( + group="Function", + name="foo", + children=(), + start_time=0, + end_time=1, + duration=1, + exclusive=1, + label=None, + params=None, + rollup=None, + guid="GUID", + agent_attributes={}, + user_attributes={}, + ) + + children = tuple(function for _ in range(num_events)) + + root = RootNode( + name="Function/main", + children=children, + start_time=1524764430.0, + end_time=1524764430.1, + duration=0.1, + exclusive=0.1, + guid=None, + agent_attributes={}, + user_attributes={}, + path="OtherTransaction/Function/main", + trusted_parent_span=None, + tracing_vendors=None, + ) + + node = TransactionNode( + settings=finalize_application_settings({"agent_run_id": "1234567"}), + path="OtherTransaction/Function/main", + type="OtherTransaction", + group="Function", + base_name="main", + name_for_metric="Function/main", + port=None, + request_uri=None, + queue_start=0.0, + start_time=1524764430.0, + end_time=1524764430.1, + last_byte_time=0.0, + total_time=0.1, + response_time=0.1, + duration=0.1, + exclusive=0.1, + root=root, + errors=errors, + slow_sql=(), + custom_events=custom_events, + ml_events=ml_events, + log_events=log_events, + apdex_t=0.5, + suppress_apdex=False, + custom_metrics=CustomMetrics(), + dimensional_metrics=DimensionalMetrics(), + guid="4485b89db608aece", + cpu_time=0.0, + suppress_transaction_trace=False, + client_cross_process_id=None, + referring_transaction_guid=None, + record_tt=False, + synthetics_resource_id=None, + synthetics_job_id=None, + synthetics_monitor_id=None, + synthetics_header=None, + synthetics_type=None, + synthetics_initiator=None, + synthetics_attributes=None, + synthetics_info_header=None, + is_part_of_cat=False, + trip_id="4485b89db608aece", + path_hash=None, + referring_path_hash=None, + alternate_path_hashes=[], + trace_intrinsics={}, + distributed_trace_intrinsics={}, + agent_attributes=[], + user_attributes=[], + priority=1.0, + parent_transport_duration=None, + parent_span=None, + parent_type=None, + parent_account=None, + parent_app=None, + parent_tx=None, + parent_transport_type=None, + sampled=True, + root_span_guid=None, + trace_id="4485b89db608aece", + loop_time=0.0, + partial_granularity_sampled=partial_granularity, + ) + return node + + return _transaction_node def validate_metric_payload(metrics=None, endpoints_called=None): @@ -322,14 +325,32 @@ def test_serverless_application_harvest(): @pytest.mark.parametrize( - "distributed_tracing_enabled,span_events_enabled,spans_created", - [(True, True, 1), (True, True, 15), (True, False, 1), (True, True, 0), (True, False, 0), (False, True, 0)], + "distributed_tracing_enabled,full_granularity_enabled,partial_granularity_enabled,span_events_enabled,spans_created", + [ + (True, True, False, True, 1), + (True, True, True, True, 1), + (True, True, False, True, 15), + (True, True, False, False, 1), + (True, True, False, True, 0), + (True, True, False, False, 0), + (False, True, False, True, 0), + ], ) -def test_application_harvest_with_spans(distributed_tracing_enabled, span_events_enabled, spans_created): +def test_application_harvest_with_spans( + distributed_tracing_enabled, + full_granularity_enabled, + partial_granularity_enabled, + span_events_enabled, + spans_created, +): span_endpoints_called = [] max_samples_stored = 10 - if distributed_tracing_enabled and span_events_enabled: + if ( + distributed_tracing_enabled + and span_events_enabled + and (full_granularity_enabled or partial_granularity_enabled) + ): seen = spans_created sent = min(spans_created, max_samples_stored) else: @@ -341,6 +362,8 @@ def test_application_harvest_with_spans(distributed_tracing_enabled, span_events spans_required_metrics.extend( [("Supportability/SpanEvent/TotalEventsSeen", seen), ("Supportability/SpanEvent/TotalEventsSent", sent)] ) + if partial_granularity_enabled: + spans_required_metrics.extend([("Supportability/Python/PartialGranularity/essential", 1)]) @validate_metric_payload(metrics=spans_required_metrics, endpoints_called=span_endpoints_called) @override_generic_settings( @@ -349,6 +372,8 @@ def test_application_harvest_with_spans(distributed_tracing_enabled, span_events "developer_mode": True, "license_key": "**NOT A LICENSE KEY**", "distributed_tracing.enabled": distributed_tracing_enabled, + "distributed_tracing.sampler.full_granularity.enabled": full_granularity_enabled, + "distributed_tracing.sampler.partial_granularity.enabled": partial_granularity_enabled, "span_events.enabled": span_events_enabled, # Uses the name from post-translation as this is modifying the settings object, not a config file "event_harvest_config.harvest_limits.span_event_data": max_samples_stored, @@ -367,12 +392,12 @@ def _test(): # Verify that the metric_data endpoint is the 2nd to last and # span_event_data is the 3rd to last endpoint called - assert span_endpoints_called[-2] == "metric_data" + assert span_endpoints_called[-2] == "metric_data", span_endpoints_called if span_events_enabled and spans_created > 0: - assert span_endpoints_called[-3] == "span_event_data" + assert span_endpoints_called[-3] == "span_event_data", span_endpoints_called else: - assert span_endpoints_called[-3] != "span_event_data" + assert span_endpoints_called[-3] != "span_event_data", span_endpoints_called _test() @@ -452,10 +477,11 @@ def _test(): }, ) def test_transaction_count(transaction_node): + txn_node = transaction_node() app = Application("Python Agent Test (Harvest Loop)") app.connect_to_data_collector(None) - app.record_transaction(transaction_node) + app.record_transaction(txn_node) # Harvest has not run yet assert app._transaction_count == 1 @@ -466,9 +492,42 @@ def test_transaction_count(transaction_node): assert app._transaction_count == 0 # Record a transaction - app.record_transaction(transaction_node) + app.record_transaction(txn_node) + assert app._transaction_count == 1 + + app.harvest() + + # Harvest resets the transaction count + assert app._transaction_count == 0 + + +@override_generic_settings( + settings, + { + "developer_mode": True, + "license_key": "**NOT A LICENSE KEY**", + "feature_flag": set(), + "collect_custom_events": False, + "application_logging.forwarding.enabled": False, + "distributed_tracing.sampler.full_granularity.enabled": False, + "distributed_tracing.sampler.partial_granularity.enabled": True, + }, +) +def test_partial_granularity_metrics(transaction_node): + txn_node = transaction_node(True) + app = Application("Python Agent Test (Harvest Loop)") + app.connect_to_data_collector(None) + + app.record_transaction(txn_node) + + # Harvest has not run yet assert app._transaction_count == 1 + instrumented = "Supportability/DistributedTrace/PartialGranularity/essential/Span/Instrumented" + kept = "Supportability/DistributedTrace/PartialGranularity/essential/Span/Kept" + assert app._stats_engine.stats_table[(instrumented, "")][0] == 102 + assert app._stats_engine.stats_table[(kept, "")][0] == 1 + app.harvest() # Harvest resets the transaction count @@ -479,6 +538,7 @@ def test_transaction_count(transaction_node): settings, {"developer_mode": True, "license_key": "**NOT A LICENSE KEY**", "feature_flag": set()} ) def test_adaptive_sampling(transaction_node, monkeypatch): + txn_node = transaction_node() app = Application("Python Agent Test (Harvest Loop)") # Should always return false for sampling prior to connect @@ -523,11 +583,12 @@ def test_adaptive_sampling(transaction_node, monkeypatch): }, ) def test_reservoir_sizes(transaction_node): + txn_node = transaction_node() app = Application("Python Agent Test (Harvest Loop)") app.connect_to_data_collector(None) # Record a transaction with events - app.record_transaction(transaction_node) + app.record_transaction(txn_node) # Test that the samples have been recorded assert app._stats_engine.custom_events.num_samples == 101