diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/include/interface.hpp b/ddtrace/internal/datadog/profiling/dd_wrapper/include/interface.hpp index 34a985d1a16..adde7d01c37 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/include/interface.hpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/include/interface.hpp @@ -21,6 +21,7 @@ extern "C" void ddup_config_profiler_version(std::string_view profiler_version); void ddup_config_url(std::string_view url); void ddup_config_max_nframes(int max_nframes); + void ddup_config_timeline(bool enable); void ddup_config_user_tag(std::string_view key, std::string_view val); void ddup_config_sample_type(unsigned int type); @@ -56,6 +57,7 @@ extern "C" std::string_view _filename, uint64_t address, int64_t line); + void ddup_push_monotonic_ns(Datadog::Sample* sample, int64_t monotonic_ns); void ddup_flush_sample(Datadog::Sample* sample); void ddup_drop_sample(Datadog::Sample* sample); diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/include/profile.hpp b/ddtrace/internal/datadog/profiling/dd_wrapper/include/profile.hpp index 3af8251f5dc..d79c99f75f7 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/include/profile.hpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/include/profile.hpp @@ -76,6 +76,6 @@ class Profile const ValueIndex& val(); // collect - bool collect(const ddog_prof_Sample& sample); + bool collect(const ddog_prof_Sample& sample, int64_t endtime_ns); }; } // namespace Datadog diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp b/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp index 0a7b8750b6e..f28fdb07cf5 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp @@ -25,6 +25,12 @@ class Sample SampleType type_mask; std::string errmsg; + // Timeline support works by endowing each sample with a timestamp. Collection of this data this data is cheap, but + // due to the underlying pprof format, timeline support increases the sample cardinality. Rather than switching + // around the frontend code too much, we push enablement down to whether or not timestamps get added to samples (a + // 0 value suppresses the tag). However, Sample objects are short-lived, so we make the flag static. + static inline bool timeline_enabled = false; + // Keeps temporary buffer of frames in the stack std::vector locations; size_t dropped_frames = 0; @@ -36,6 +42,9 @@ class Sample // Storage for values std::vector values = {}; + // Additional metadata + int64_t endtime_ns = 0; // end of the event + public: // Helpers bool push_label(ExportLabelKey key, std::string_view val); @@ -62,6 +71,11 @@ class Sample bool push_trace_resource_container(std::string_view trace_resource_container); bool push_exceptioninfo(std::string_view exception_type, int64_t count); bool push_class_name(std::string_view class_name); + bool push_monotonic_ns(int64_t monotonic_ns); + + // Interacts with static Sample state + bool is_timeline_enabled() const; + static void set_timeline(bool enabled); // Assumes frames are pushed in leaf-order void push_frame(std::string_view name, // for ddog_prof_Function diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample_manager.hpp b/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample_manager.hpp index ec6e388b96e..a5be316b8c6 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample_manager.hpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample_manager.hpp @@ -24,6 +24,7 @@ class SampleManager // Configuration static void add_type(unsigned int type); static void set_max_nframes(unsigned int _max_nframes); + static void set_timeline(bool enable); // Sampling entrypoint (this could also be called `build_ptr()`) static Sample* start_sample(); diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp index c0ccae883ee..ed158d7ca83 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp @@ -99,12 +99,19 @@ ddup_config_sample_type(unsigned int _type) // cppcheck-suppress unusedFunction { Datadog::SampleManager::add_type(_type); } + void ddup_config_max_nframes(int max_nframes) // cppcheck-suppress unusedFunction { Datadog::SampleManager::set_max_nframes(max_nframes); } +void +ddup_config_timeline(bool enabled) // cppcheck-suppress unusedFunction +{ + Datadog::SampleManager::set_timeline(enabled); +} + bool ddup_is_initialized() // cppcheck-suppress unusedFunction { @@ -247,6 +254,12 @@ ddup_push_frame(Datadog::Sample* sample, // cppcheck-suppress unusedFunction sample->push_frame(_name, _filename, address, line); } +void +ddup_push_monotonic_ns(Datadog::Sample* sample, int64_t monotonic_ns) +{ + sample->push_monotonic_ns(monotonic_ns); +} + void ddup_flush_sample(Datadog::Sample* sample) // cppcheck-suppress unusedFunction { diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/profile.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/profile.cpp index 086e90ee495..6f986f4896b 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/profile.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/profile.cpp @@ -184,11 +184,10 @@ Datadog::Profile::val() } bool -Datadog::Profile::collect(const ddog_prof_Sample& sample) +Datadog::Profile::collect(const ddog_prof_Sample& sample, int64_t endtime_ns) { - // TODO this should propagate some kind of timestamp for timeline support const std::lock_guard lock(profile_mtx); - auto res = ddog_prof_Profile_add(&cur_profile, sample, 0); + auto res = ddog_prof_Profile_add(&cur_profile, sample, endtime_ns); if (!res.ok) { // NOLINT (cppcoreguidelines-pro-type-union-access) auto err = res.err; // NOLINT (cppcoreguidelines-pro-type-union-access) const std::string errmsg = err_to_msg(&err, "Error adding sample to profile"); diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp index d2a28562505..eb7cacff295 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp @@ -1,5 +1,6 @@ #include "sample.hpp" +#include #include Datadog::Sample::Sample(SampleType _type_mask, unsigned int _max_nframes) @@ -117,7 +118,7 @@ Datadog::Sample::flush_sample() .labels = { labels.data(), labels.size() }, }; - const bool ret = profile_state.collect(sample); + const bool ret = profile_state.collect(sample, endtime_ns); clear_buffers(); return ret; } @@ -316,6 +317,47 @@ Datadog::Sample::push_class_name(std::string_view class_name) return true; } +bool +Datadog::Sample::push_monotonic_ns(int64_t _monotonic_ns) +{ + // Monotonic times have their epoch at the system start, so they need an + // adjustment to the standard epoch + // Just set a static for now and use a lambda to compute the offset once + const static auto offset = []() { + // Get the current epoch time + using namespace std::chrono; + auto epoch_ns = duration_cast(system_clock::now().time_since_epoch()).count(); + + // Get the current monotonic time. Use clock_gettime directly because the standard underspecifies + // which clock is actually used in std::chrono + timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + auto monotonic_ns = static_cast(ts.tv_sec) * 1'000'000'000LL + ts.tv_nsec; + + // Compute the difference. We're after 1970, so epoch_ns will be larger + return epoch_ns - monotonic_ns; + }(); + + // If timeline is not enabled, then this is a no-op + if (is_timeline_enabled()) { + endtime_ns = _monotonic_ns + offset; + } + + return true; +} + +void +Datadog::Sample::set_timeline(bool enabled) +{ + timeline_enabled = enabled; +} + +bool +Datadog::Sample::is_timeline_enabled() const +{ + return timeline_enabled; +} + ddog_prof_Profile& Datadog::Sample::profile_borrow() { diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp index 72137b9a5d0..d4a864052e5 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp @@ -21,6 +21,12 @@ Datadog::SampleManager::set_max_nframes(unsigned int _max_nframes) } } +void +Datadog::SampleManager::set_timeline(bool enable) +{ + Datadog::Sample::set_timeline(enable); +} + Datadog::Sample* Datadog::SampleManager::start_sample() { diff --git a/ddtrace/internal/datadog/profiling/ddup/_ddup.pyi b/ddtrace/internal/datadog/profiling/ddup/_ddup.pyi index deb1e23ffed..29749138a98 100644 --- a/ddtrace/internal/datadog/profiling/ddup/_ddup.pyi +++ b/ddtrace/internal/datadog/profiling/ddup/_ddup.pyi @@ -12,6 +12,7 @@ def init( tags: Optional[Dict[Union[str, bytes], Union[str, bytes]]], max_nframes: Optional[int], url: Optional[str], + timeline_enabled: Optional[bool], ) -> None: ... def upload() -> None: ... @@ -30,4 +31,5 @@ class SampleHandle: def push_exceptioninfo(self, exc_type: Union[None, bytes, str, type], count: int) -> None: ... def push_class_name(self, class_name: StringType) -> None: ... def push_span(self, span: Optional[Span], endpoint_collection_enabled: bool) -> None: ... + def push_monotonic_ns(self, monotonic_ns: int) -> None: ... def flush_sample(self) -> None: ... diff --git a/ddtrace/internal/datadog/profiling/ddup/_ddup.pyx b/ddtrace/internal/datadog/profiling/ddup/_ddup.pyx index d76a5aecff7..51c6da81dbd 100644 --- a/ddtrace/internal/datadog/profiling/ddup/_ddup.pyx +++ b/ddtrace/internal/datadog/profiling/ddup/_ddup.pyx @@ -40,6 +40,7 @@ cdef extern from "interface.hpp": void ddup_config_profiler_version(string_view profiler_version) void ddup_config_url(string_view url) void ddup_config_max_nframes(int max_nframes) + void ddup_config_timeline(bint enable) void ddup_config_user_tag(string_view key, string_view val) void ddup_config_sample_type(unsigned int type) @@ -64,6 +65,7 @@ cdef extern from "interface.hpp": void ddup_push_exceptioninfo(Sample *sample, string_view exception_type, int64_t count) void ddup_push_class_name(Sample *sample, string_view class_name) void ddup_push_frame(Sample *sample, string_view _name, string_view _filename, uint64_t address, int64_t line) + void ddup_push_monotonic_ns(Sample *sample, int64_t monotonic_ns) void ddup_flush_sample(Sample *sample) void ddup_drop_sample(Sample *sample) void ddup_set_runtime_id(string_view _id) @@ -130,7 +132,8 @@ def init( version: StringType = None, tags: Optional[Dict[Union[str, bytes], Union[str, bytes]]] = None, max_nframes: Optional[int] = None, - url: StringType = None) -> None: + url: StringType = None, + timeline_enabled: Optional[bool] = None) -> None: # Try to provide a ddtrace-specific default service if one is not given service = service or DEFAULT_SERVICE_NAME @@ -156,6 +159,8 @@ def init( for key, val in tags.items(): if key and val: call_ddup_config_user_tag(ensure_binary_or_empty(key), ensure_binary_or_empty(val)) + if timeline_enabled is True: + ddup_config_timeline(True) ddup_init() @@ -234,7 +239,6 @@ cdef class SampleHandle: def push_task_id(self, task_id: Optional[int]) -> None: if self.ptr is not NULL: if task_id is not None: - ddup_push_task_id(self.ptr, task_id) ddup_push_task_id(self.ptr, clamp_to_int64_unsigned(task_id)) def push_task_name(self, task_name: StringType) -> None: @@ -282,6 +286,10 @@ cdef class SampleHandle: string_view(root_service_bytes, len(root_service_bytes)) ) + def push_monotonic_ns(self, monotonic_ns: int) -> None: + if self.ptr is not NULL: + ddup_push_monotonic_ns(self.ptr, monotonic_ns) + def flush_sample(self) -> None: # Flushing the sample consumes it. The user will no longer be able to use # this handle after flushing it. diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp index 6f158aeeac8..dfda9e7e7ac 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp @@ -29,9 +29,18 @@ StackRenderer::render_thread_begin(PyThreadState* tstate, return; } - // #warning stack_v2 should use a C++ interface instead of re-converting intermediates + // Get the current time in ns in a way compatible with python's time.monotonic_ns(), which is backed by + // clock_gettime(CLOCK_MONOTONIC) on linux and mach_absolute_time() on macOS. + // This is not the same as std::chrono::steady_clock, which is backed by clock_gettime(CLOCK_MONOTONIC_RAW) + // (although this is underspecified in the standard) + timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) { + auto now_ns = static_cast(ts.tv_sec) * 1'000'000'000LL + static_cast(ts.tv_nsec); + ddup_push_monotonic_ns(sample, now_ns); + } + ddup_push_threadinfo(sample, static_cast(thread_id), static_cast(native_id), name); - ddup_push_walltime(sample, 1000 * wall_time_us, 1); + ddup_push_walltime(sample, 1000LL * wall_time_us, 1); } void diff --git a/ddtrace/profiling/collector/_lock.py b/ddtrace/profiling/collector/_lock.py index 35a673d52c5..3c6e32545fb 100644 --- a/ddtrace/profiling/collector/_lock.py +++ b/ddtrace/profiling/collector/_lock.py @@ -11,6 +11,7 @@ from ddtrace._trace.tracer import Tracer from ddtrace.internal import compat from ddtrace.internal.datadog.profiling import ddup +from ddtrace.internal.logger import get_logger from ddtrace.profiling import _threading from ddtrace.profiling import collector from ddtrace.profiling import event @@ -21,6 +22,9 @@ from ddtrace.vendor import wrapt +LOG = get_logger(__name__) + + @event.event_class class LockEventBase(event.StackBasedEvent): """Base Lock event.""" @@ -118,6 +122,7 @@ def acquire(self, *args, **kwargs): thread_native_id = _threading.get_thread_native_id(thread_id) handle = ddup.SampleHandle() + handle.push_monotonic_ns(end) handle.push_lock_name(self._self_name) handle.push_acquire(end - start, 1) # AFAICT, capture_pct does not adjust anything here handle.push_threadinfo(thread_id, thread_native_id, thread_name) @@ -146,7 +151,8 @@ def acquire(self, *args, **kwargs): event.set_trace_info(self._self_tracer.current_span(), self._self_endpoint_collection_enabled) self._self_recorder.push_event(event) - except Exception: + except Exception as e: + LOG.warning("Error recording lock acquire event: %s", e) pass # nosec def release(self, *args, **kwargs): @@ -172,6 +178,7 @@ def release(self, *args, **kwargs): thread_native_id = _threading.get_thread_native_id(thread_id) handle = ddup.SampleHandle() + handle.push_monotonic_ns(end) handle.push_lock_name(self._self_name) handle.push_release( end - self._self_acquired_at, 1 @@ -208,7 +215,8 @@ def release(self, *args, **kwargs): self._self_recorder.push_event(event) finally: del self._self_acquired_at - except Exception: + except Exception as e: + LOG.warning("Error recording lock release event: %s", e) pass # nosec acquire_lock = acquire diff --git a/ddtrace/profiling/collector/memalloc.py b/ddtrace/profiling/collector/memalloc.py index 06dd16463c0..9de3cb73136 100644 --- a/ddtrace/profiling/collector/memalloc.py +++ b/ddtrace/profiling/collector/memalloc.py @@ -13,6 +13,7 @@ except ImportError: _memalloc = None # type: ignore[assignment] +from ddtrace.internal import compat from ddtrace.internal.datadog.profiling import ddup from ddtrace.profiling import _threading from ddtrace.profiling import collector @@ -167,6 +168,7 @@ def collect(self): if thread_id in thread_id_ignore_set: continue handle = ddup.SampleHandle() + handle.push_monotonic_ns(compat.monotonic_ns()) handle.push_alloc(int((ceil(size) * alloc_count) / count), count) # Roundup to help float precision handle.push_threadinfo( thread_id, _threading.get_thread_native_id(thread_id), _threading.get_thread_name(thread_id) diff --git a/ddtrace/profiling/collector/stack.pyx b/ddtrace/profiling/collector/stack.pyx index 3ba4e8af103..6164f477191 100644 --- a/ddtrace/profiling/collector/stack.pyx +++ b/ddtrace/profiling/collector/stack.pyx @@ -290,7 +290,7 @@ cdef collect_threads(thread_id_ignore_list, thread_time, thread_span_links) with ) -cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_time, thread_span_links, collect_endpoint): +cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_time, thread_span_links, collect_endpoint, now_ns = 0): # Do not use `threading.enumerate` to not mess with locking (gevent!) # Also collect the native threads, that are not registered with the built-in # threading module, to keep backward compatibility with the previous @@ -331,6 +331,7 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim if nframes: if use_libdd: handle = ddup.SampleHandle() + handle.push_monotonic_ns(now_ns) handle.push_walltime(wall_time, 1) handle.push_threadinfo(thread_id, thread_native_id, thread_name) handle.push_task_id(task_id) @@ -358,6 +359,7 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim if nframes: if use_libdd: handle = ddup.SampleHandle() + handle.push_monotonic_ns(now_ns) handle.push_cputime( cpu_time, 1) handle.push_walltime( wall_time, 1) handle.push_threadinfo(thread_id, thread_native_id, thread_name) @@ -390,6 +392,7 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim if nframes: if use_libdd: handle = ddup.SampleHandle() + handle.push_monotonic_ns(now_ns) handle.push_threadinfo(thread_id, thread_native_id, thread_name) handle.push_exceptioninfo(exc_type, 1) handle.push_class_name(frames[0].class_name) @@ -541,6 +544,7 @@ class StackCollector(collector.PeriodicCollector): wall_time, self._thread_span_links, self.endpoint_collection_enabled, + now_ns=now, ) used_wall_time_ns = compat.monotonic_ns() - now diff --git a/ddtrace/profiling/profiler.py b/ddtrace/profiling/profiler.py index 4f5a893ba50..acd16b68469 100644 --- a/ddtrace/profiling/profiler.py +++ b/ddtrace/profiling/profiler.py @@ -217,6 +217,7 @@ def _build_default_exporters(self): tags=self.tags, # type: ignore max_nframes=config.max_frames, url=endpoint, + timeline_enabled=config.timeline_enabled, ) return [] except Exception as e: diff --git a/ddtrace/settings/profiling.py b/ddtrace/settings/profiling.py index 71ba2dac9cc..74f02d07f57 100644 --- a/ddtrace/settings/profiling.py +++ b/ddtrace/settings/profiling.py @@ -170,6 +170,15 @@ class ProfilingConfig(En): help="The timeout in seconds before dropping events if the HTTP API does not reply", ) + timeline_enabled = En.v( + bool, + "timeline_enabled", + default=False, + help_type="Boolean", + help="Whether to add timestamp information to captured samples. Adds a small amount of " + "overhead to the profiler, but enables the use of the Timeline view in the UI.", + ) + tags = En.v( dict, "tags",