Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ae17990
Add preliminary timeline support
sanchda Mar 11, 2024
b95cd17
Adjust so that timestamp is set at sample creation
sanchda Mar 11, 2024
5577a57
Remove unneeded method
sanchda Mar 11, 2024
7dfa808
Oops, wrong sample
sanchda Mar 11, 2024
f847c08
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda Mar 11, 2024
72599a9
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda Mar 12, 2024
004841d
Add lock profiling support to ddup
sanchda Mar 13, 2024
cddad12
Oops
sanchda Mar 13, 2024
89ff5da
OK, fixed
sanchda Mar 13, 2024
8454ede
Flush samples
sanchda Mar 13, 2024
c55c24e
Fixups
sanchda Mar 13, 2024
9d2fe43
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda Mar 13, 2024
2470b33
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda Mar 14, 2024
07ba938
Try to standardize clock sources for timeline
sanchda Mar 14, 2024
59bd2d2
Merge branch 'sanchda/add_timeline_ddup' of github.com:DataDog/dd-tra…
sanchda Mar 14, 2024
5ea3022
Remove logging
sanchda Mar 14, 2024
7a46a18
Fixup
sanchda Mar 14, 2024
a8506cf
Adjust for monotonic clock epoch
sanchda Mar 14, 2024
39268c2
Why is lock profiling broken
sanchda Mar 14, 2024
caa6dce
Name fixups
sanchda Mar 14, 2024
b6c9daa
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda May 29, 2024
e66499f
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda May 29, 2024
34c2fad
Allow timeline to be disabled
sanchda May 29, 2024
4e311ce
Better formatting on long ints
sanchda Jun 7, 2024
7470a58
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda Jun 7, 2024
ba290ca
Fixups
sanchda Jun 7, 2024
524cad3
Change default parity
sanchda Jun 7, 2024
bf0da81
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda Jun 7, 2024
5f313c7
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda Jun 10, 2024
dd80928
Merge branch 'main' into sanchda/add_timeline_ddup
sanchda Jun 10, 2024
fc58ef9
Merge branch 'main' into sanchda/add_timeline_ddup
emmettbutler Jun 10, 2024
a7d72f8
Slight refactors
sanchda Jun 10, 2024
1796faa
Merge branch 'sanchda/add_timeline_ddup' of github.com:DataDog/dd-tra…
sanchda Jun 10, 2024
0d270e7
Fixup
sanchda Jun 10, 2024
c9771e3
Style
sanchda Jun 10, 2024
bbbb845
Merge branch 'main' into sanchda/add_timeline_ddup
emmettbutler Jun 10, 2024
aecbeba
Merge branch 'main' into sanchda/add_timeline_ddup
emmettbutler Jun 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ extern "C"
void ddup_config_profiler_version(std::string_view profiler_version);
void ddup_config_url(std::string_view url);
void ddup_config_max_nframes(int max_nframes);
void ddup_config_timeline(bool enable);

void ddup_config_user_tag(std::string_view key, std::string_view val);
void ddup_config_sample_type(unsigned int type);
Expand Down Expand Up @@ -56,6 +57,7 @@ extern "C"
std::string_view _filename,
uint64_t address,
int64_t line);
void ddup_push_monotonic_ns(Datadog::Sample* sample, int64_t monotonic_ns);
void ddup_flush_sample(Datadog::Sample* sample);
void ddup_drop_sample(Datadog::Sample* sample);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,6 @@ class Profile
const ValueIndex& val();

// collect
bool collect(const ddog_prof_Sample& sample);
bool collect(const ddog_prof_Sample& sample, int64_t endtime_ns);
};
} // namespace Datadog
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ class Sample
SampleType type_mask;
std::string errmsg;

// Timeline support works by endowing each sample with a timestamp. Collection of this data this data is cheap, but
// due to the underlying pprof format, timeline support increases the sample cardinality. Rather than switching
// around the frontend code too much, we push enablement down to whether or not timestamps get added to samples (a
// 0 value suppresses the tag). However, Sample objects are short-lived, so we make the flag static.
static inline bool timeline_enabled = false;

// Keeps temporary buffer of frames in the stack
std::vector<ddog_prof_Location> locations;
size_t dropped_frames = 0;
Expand All @@ -36,6 +42,9 @@ class Sample
// Storage for values
std::vector<int64_t> values = {};

// Additional metadata
int64_t endtime_ns = 0; // end of the event

public:
// Helpers
bool push_label(ExportLabelKey key, std::string_view val);
Expand All @@ -62,6 +71,11 @@ class Sample
bool push_trace_resource_container(std::string_view trace_resource_container);
bool push_exceptioninfo(std::string_view exception_type, int64_t count);
bool push_class_name(std::string_view class_name);
bool push_monotonic_ns(int64_t monotonic_ns);

// Interacts with static Sample state
bool is_timeline_enabled() const;
static void set_timeline(bool enabled);

// Assumes frames are pushed in leaf-order
void push_frame(std::string_view name, // for ddog_prof_Function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class SampleManager
// Configuration
static void add_type(unsigned int type);
static void set_max_nframes(unsigned int _max_nframes);
static void set_timeline(bool enable);

// Sampling entrypoint (this could also be called `build_ptr()`)
static Sample* start_sample();
Expand Down
13 changes: 13 additions & 0 deletions ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,19 @@ ddup_config_sample_type(unsigned int _type) // cppcheck-suppress unusedFunction
{
Datadog::SampleManager::add_type(_type);
}

void
ddup_config_max_nframes(int max_nframes) // cppcheck-suppress unusedFunction
{
Datadog::SampleManager::set_max_nframes(max_nframes);
}

void
ddup_config_timeline(bool enabled) // cppcheck-suppress unusedFunction
{
Datadog::SampleManager::set_timeline(enabled);
}

bool
ddup_is_initialized() // cppcheck-suppress unusedFunction
{
Expand Down Expand Up @@ -247,6 +254,12 @@ ddup_push_frame(Datadog::Sample* sample, // cppcheck-suppress unusedFunction
sample->push_frame(_name, _filename, address, line);
}

void
ddup_push_monotonic_ns(Datadog::Sample* sample, int64_t monotonic_ns)
{
sample->push_monotonic_ns(monotonic_ns);
}

void
ddup_flush_sample(Datadog::Sample* sample) // cppcheck-suppress unusedFunction
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,10 @@ Datadog::Profile::val()
}

bool
Datadog::Profile::collect(const ddog_prof_Sample& sample)
Datadog::Profile::collect(const ddog_prof_Sample& sample, int64_t endtime_ns)
{
// TODO this should propagate some kind of timestamp for timeline support
const std::lock_guard<std::mutex> lock(profile_mtx);
auto res = ddog_prof_Profile_add(&cur_profile, sample, 0);
auto res = ddog_prof_Profile_add(&cur_profile, sample, endtime_ns);
if (!res.ok) { // NOLINT (cppcoreguidelines-pro-type-union-access)
auto err = res.err; // NOLINT (cppcoreguidelines-pro-type-union-access)
const std::string errmsg = err_to_msg(&err, "Error adding sample to profile");
Expand Down
44 changes: 43 additions & 1 deletion ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "sample.hpp"

#include <chrono>
#include <thread>

Datadog::Sample::Sample(SampleType _type_mask, unsigned int _max_nframes)
Expand Down Expand Up @@ -117,7 +118,7 @@ Datadog::Sample::flush_sample()
.labels = { labels.data(), labels.size() },
};

const bool ret = profile_state.collect(sample);
const bool ret = profile_state.collect(sample, endtime_ns);
clear_buffers();
return ret;
}
Expand Down Expand Up @@ -316,6 +317,47 @@ Datadog::Sample::push_class_name(std::string_view class_name)
return true;
}

bool
Datadog::Sample::push_monotonic_ns(int64_t _monotonic_ns)
{
// Monotonic times have their epoch at the system start, so they need an
// adjustment to the standard epoch
// Just set a static for now and use a lambda to compute the offset once
const static auto offset = []() {
// Get the current epoch time
using namespace std::chrono;
auto epoch_ns = duration_cast<nanoseconds>(system_clock::now().time_since_epoch()).count();

// Get the current monotonic time. Use clock_gettime directly because the standard underspecifies
// which clock is actually used in std::chrono
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
auto monotonic_ns = static_cast<int64_t>(ts.tv_sec) * 1'000'000'000LL + ts.tv_nsec;

// Compute the difference. We're after 1970, so epoch_ns will be larger
return epoch_ns - monotonic_ns;
}();

// If timeline is not enabled, then this is a no-op
if (is_timeline_enabled()) {
endtime_ns = _monotonic_ns + offset;
}

return true;
}

void
Datadog::Sample::set_timeline(bool enabled)
{
timeline_enabled = enabled;
}

bool
Datadog::Sample::is_timeline_enabled() const
{
return timeline_enabled;
}

ddog_prof_Profile&
Datadog::Sample::profile_borrow()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ Datadog::SampleManager::set_max_nframes(unsigned int _max_nframes)
}
}

void
Datadog::SampleManager::set_timeline(bool enable)
{
Datadog::Sample::set_timeline(enable);
}

Datadog::Sample*
Datadog::SampleManager::start_sample()
{
Expand Down
2 changes: 2 additions & 0 deletions ddtrace/internal/datadog/profiling/ddup/_ddup.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def init(
tags: Optional[Dict[Union[str, bytes], Union[str, bytes]]],
max_nframes: Optional[int],
url: Optional[str],
timeline_enabled: Optional[bool],
) -> None: ...
def upload() -> None: ...

Expand All @@ -30,4 +31,5 @@ class SampleHandle:
def push_exceptioninfo(self, exc_type: Union[None, bytes, str, type], count: int) -> None: ...
def push_class_name(self, class_name: StringType) -> None: ...
def push_span(self, span: Optional[Span], endpoint_collection_enabled: bool) -> None: ...
def push_monotonic_ns(self, monotonic_ns: int) -> None: ...
def flush_sample(self) -> None: ...
12 changes: 10 additions & 2 deletions ddtrace/internal/datadog/profiling/ddup/_ddup.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ cdef extern from "interface.hpp":
void ddup_config_profiler_version(string_view profiler_version)
void ddup_config_url(string_view url)
void ddup_config_max_nframes(int max_nframes)
void ddup_config_timeline(bint enable)

void ddup_config_user_tag(string_view key, string_view val)
void ddup_config_sample_type(unsigned int type)
Expand All @@ -64,6 +65,7 @@ cdef extern from "interface.hpp":
void ddup_push_exceptioninfo(Sample *sample, string_view exception_type, int64_t count)
void ddup_push_class_name(Sample *sample, string_view class_name)
void ddup_push_frame(Sample *sample, string_view _name, string_view _filename, uint64_t address, int64_t line)
void ddup_push_monotonic_ns(Sample *sample, int64_t monotonic_ns)
void ddup_flush_sample(Sample *sample)
void ddup_drop_sample(Sample *sample)
void ddup_set_runtime_id(string_view _id)
Expand Down Expand Up @@ -130,7 +132,8 @@ def init(
version: StringType = None,
tags: Optional[Dict[Union[str, bytes], Union[str, bytes]]] = None,
max_nframes: Optional[int] = None,
url: StringType = None) -> None:
url: StringType = None,
timeline_enabled: Optional[bool] = None) -> None:

# Try to provide a ddtrace-specific default service if one is not given
service = service or DEFAULT_SERVICE_NAME
Expand All @@ -156,6 +159,8 @@ def init(
for key, val in tags.items():
if key and val:
call_ddup_config_user_tag(ensure_binary_or_empty(key), ensure_binary_or_empty(val))
if timeline_enabled is True:
ddup_config_timeline(True)
ddup_init()


Expand Down Expand Up @@ -234,7 +239,6 @@ cdef class SampleHandle:
def push_task_id(self, task_id: Optional[int]) -> None:
if self.ptr is not NULL:
if task_id is not None:
ddup_push_task_id(self.ptr, task_id)
ddup_push_task_id(self.ptr, clamp_to_int64_unsigned(task_id))

def push_task_name(self, task_name: StringType) -> None:
Expand Down Expand Up @@ -282,6 +286,10 @@ cdef class SampleHandle:
string_view(<const char*>root_service_bytes, len(root_service_bytes))
)

def push_monotonic_ns(self, monotonic_ns: int) -> None:
if self.ptr is not NULL:
ddup_push_monotonic_ns(self.ptr, <int64_t>monotonic_ns)

def flush_sample(self) -> None:
# Flushing the sample consumes it. The user will no longer be able to use
# this handle after flushing it.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,18 @@ StackRenderer::render_thread_begin(PyThreadState* tstate,
return;
}

// #warning stack_v2 should use a C++ interface instead of re-converting intermediates
// Get the current time in ns in a way compatible with python's time.monotonic_ns(), which is backed by
// clock_gettime(CLOCK_MONOTONIC) on linux and mach_absolute_time() on macOS.
// This is not the same as std::chrono::steady_clock, which is backed by clock_gettime(CLOCK_MONOTONIC_RAW)
// (although this is underspecified in the standard)
timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
auto now_ns = static_cast<int64_t>(ts.tv_sec) * 1'000'000'000LL + static_cast<int64_t>(ts.tv_nsec);
ddup_push_monotonic_ns(sample, now_ns);
}

ddup_push_threadinfo(sample, static_cast<int64_t>(thread_id), static_cast<int64_t>(native_id), name);
ddup_push_walltime(sample, 1000 * wall_time_us, 1);
ddup_push_walltime(sample, 1000LL * wall_time_us, 1);
}

void
Expand Down
12 changes: 10 additions & 2 deletions ddtrace/profiling/collector/_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ddtrace._trace.tracer import Tracer
from ddtrace.internal import compat
from ddtrace.internal.datadog.profiling import ddup
from ddtrace.internal.logger import get_logger
from ddtrace.profiling import _threading
from ddtrace.profiling import collector
from ddtrace.profiling import event
Expand All @@ -21,6 +22,9 @@
from ddtrace.vendor import wrapt


LOG = get_logger(__name__)


@event.event_class
class LockEventBase(event.StackBasedEvent):
"""Base Lock event."""
Expand Down Expand Up @@ -118,6 +122,7 @@ def acquire(self, *args, **kwargs):
thread_native_id = _threading.get_thread_native_id(thread_id)

handle = ddup.SampleHandle()
handle.push_monotonic_ns(end)
handle.push_lock_name(self._self_name)
handle.push_acquire(end - start, 1) # AFAICT, capture_pct does not adjust anything here
handle.push_threadinfo(thread_id, thread_native_id, thread_name)
Expand Down Expand Up @@ -146,7 +151,8 @@ def acquire(self, *args, **kwargs):
event.set_trace_info(self._self_tracer.current_span(), self._self_endpoint_collection_enabled)

self._self_recorder.push_event(event)
except Exception:
except Exception as e:
LOG.warning("Error recording lock acquire event: %s", e)
pass # nosec

def release(self, *args, **kwargs):
Expand All @@ -172,6 +178,7 @@ def release(self, *args, **kwargs):
thread_native_id = _threading.get_thread_native_id(thread_id)

handle = ddup.SampleHandle()
handle.push_monotonic_ns(end)
handle.push_lock_name(self._self_name)
handle.push_release(
end - self._self_acquired_at, 1
Expand Down Expand Up @@ -208,7 +215,8 @@ def release(self, *args, **kwargs):
self._self_recorder.push_event(event)
finally:
del self._self_acquired_at
except Exception:
except Exception as e:
LOG.warning("Error recording lock release event: %s", e)
pass # nosec

acquire_lock = acquire
Expand Down
2 changes: 2 additions & 0 deletions ddtrace/profiling/collector/memalloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
except ImportError:
_memalloc = None # type: ignore[assignment]

from ddtrace.internal import compat
from ddtrace.internal.datadog.profiling import ddup
from ddtrace.profiling import _threading
from ddtrace.profiling import collector
Expand Down Expand Up @@ -167,6 +168,7 @@ def collect(self):
if thread_id in thread_id_ignore_set:
continue
handle = ddup.SampleHandle()
handle.push_monotonic_ns(compat.monotonic_ns())
handle.push_alloc(int((ceil(size) * alloc_count) / count), count) # Roundup to help float precision
handle.push_threadinfo(
thread_id, _threading.get_thread_native_id(thread_id), _threading.get_thread_name(thread_id)
Expand Down
6 changes: 5 additions & 1 deletion ddtrace/profiling/collector/stack.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ cdef collect_threads(thread_id_ignore_list, thread_time, thread_span_links) with
)


cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_time, thread_span_links, collect_endpoint):
cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_time, thread_span_links, collect_endpoint, now_ns = 0):
# Do not use `threading.enumerate` to not mess with locking (gevent!)
# Also collect the native threads, that are not registered with the built-in
# threading module, to keep backward compatibility with the previous
Expand Down Expand Up @@ -331,6 +331,7 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim
if nframes:
if use_libdd:
handle = ddup.SampleHandle()
handle.push_monotonic_ns(now_ns)
handle.push_walltime(wall_time, 1)
handle.push_threadinfo(thread_id, thread_native_id, thread_name)
handle.push_task_id(task_id)
Expand Down Expand Up @@ -358,6 +359,7 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim
if nframes:
if use_libdd:
handle = ddup.SampleHandle()
handle.push_monotonic_ns(now_ns)
handle.push_cputime( cpu_time, 1)
handle.push_walltime( wall_time, 1)
handle.push_threadinfo(thread_id, thread_native_id, thread_name)
Expand Down Expand Up @@ -390,6 +392,7 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim
if nframes:
if use_libdd:
handle = ddup.SampleHandle()
handle.push_monotonic_ns(now_ns)
handle.push_threadinfo(thread_id, thread_native_id, thread_name)
handle.push_exceptioninfo(exc_type, 1)
handle.push_class_name(frames[0].class_name)
Expand Down Expand Up @@ -541,6 +544,7 @@ class StackCollector(collector.PeriodicCollector):
wall_time,
self._thread_span_links,
self.endpoint_collection_enabled,
now_ns=now,
)

used_wall_time_ns = compat.monotonic_ns() - now
Expand Down
1 change: 1 addition & 0 deletions ddtrace/profiling/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def _build_default_exporters(self):
tags=self.tags, # type: ignore
max_nframes=config.max_frames,
url=endpoint,
timeline_enabled=config.timeline_enabled,
)
return []
except Exception as e:
Expand Down
Loading