Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ddprof-lib/src/main/cpp/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@

#include <stddef.h>

#ifdef _LP64
# define LP64_ONLY(code) code
#else // !_LP64
# define LP64_ONLY(code)
#endif // _LP64

typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
Expand Down
5 changes: 3 additions & 2 deletions ddprof-lib/src/main/cpp/callTraceStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "os.h"
#include <string.h>

#define COMMA ,

static const u32 INITIAL_CAPACITY = 65536;
static const u32 CALL_TRACE_CHUNK = 8 * 1024 * 1024;
static const u32 OVERFLOW_TRACE_ID = 0x7fffffff;
Expand Down Expand Up @@ -81,8 +83,7 @@ class LongHashTable {
}
};

CallTrace CallTraceStorage::_overflow_trace = {
false, 1, {BCI_ERROR, (jmethodID) "storage_overflow"}};
CallTrace CallTraceStorage::_overflow_trace = {false, 1, {BCI_ERROR, LP64_ONLY(0 COMMA) (jmethodID)"storage_overflow"}};

CallTraceStorage::CallTraceStorage() : _allocator(CALL_TRACE_CHUNK), _lock(0) {
_current_table = LongHashTable::allocate(NULL, INITIAL_CAPACITY);
Expand Down
28 changes: 5 additions & 23 deletions ddprof-lib/src/main/cpp/livenessTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,14 @@ void LivenessTracker::cleanup_table(bool forced) {
if (target != i) {
_table[target] = _table[i]; // will clone TrackingEntry at 'i'
_table[i].ref = nullptr; // will nullify the original ref
assert(_table[i].frames == _table[target].frames);
_table[i].frames = nullptr; // will nullify the original frames
assert(_table[target].frames != nullptr);
_table[i].call_trace_id = 0;
}
assert(_table[target].ref != nullptr &&
_table[target].frames != nullptr);
_table[target].age += epoch_diff;
} else {
jweak tmpRef = _table[i].ref;
_table[i].ref = nullptr;
env->DeleteWeakGlobalRef(tmpRef);

jvmtiFrameInfo *tmpFrames = _table[i].frames;
_table[i].frames = nullptr;
assert(_table[i].ref == nullptr && _table[i].frames == nullptr);
delete[] tmpFrames;
_table[i].call_trace_id = 0;
}
}

Expand Down Expand Up @@ -119,8 +111,6 @@ void LivenessTracker::flush_table(std::set<int> *tracked_thread_ids) {
for (int i = 0; i < (sz = _table_size); i++) {
jobject ref = env->NewLocalRef(_table[i].ref);
if (ref != nullptr) {
assert(_table[i].frames != nullptr);

if (tracked_thread_ids != nullptr) {
tracked_thread_ids->insert(_table[i].tid);
}
Expand All @@ -141,9 +131,7 @@ void LivenessTracker::flush_table(std::set<int> *tracked_thread_ids) {
: 0;
env->ReleaseStringUTFChars(name_str, name);

Profiler::instance()->recordExternalSample(
1, _table[i].tid, _table[i].frames, _table[i].frames_size,
/*truncated=*/false, BCI_LIVENESS, &event);
Profiler::instance()->recordDeferredSample(_table[i].tid, _table[i].call_trace_id, BCI_LIVENESS, &event);
}

env->DeleteLocalRef(ref);
Expand Down Expand Up @@ -292,8 +280,7 @@ Error LivenessTracker::initialize(Arguments &args) {
}

void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
jobject object, int num_frames,
jvmtiFrameInfo *frames) {
jobject object, u32 call_trace_id) {
if (!_enabled) {
// disabled
return;
Expand Down Expand Up @@ -340,12 +327,7 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
_table[idx].alloc = event;
_table[idx].skipped = skipped;
_table[idx].age = 0;
_table[idx].frames_size = num_frames;
_table[idx].frames = new jvmtiFrameInfo[_table[idx].frames_size];
if (frames != nullptr) {
memcpy(_table[idx].frames, frames,
sizeof(jvmtiFrameInfo) * _table[idx].frames_size);
}
_table[idx].call_trace_id = call_trace_id;
_table[idx].ctx = Contexts::get(tid);
}

Expand Down
6 changes: 2 additions & 4 deletions ddprof-lib/src/main/cpp/livenessTracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ typedef struct TrackingEntry {
jweak ref;
AllocEvent alloc;
double skipped;
jint frames_size;
jvmtiFrameInfo *frames;
u32 call_trace_id;
jint tid;
jlong time;
jlong age;
Expand Down Expand Up @@ -100,8 +99,7 @@ class LivenessTracker {

Error start(Arguments &args);
void stop();
void track(JNIEnv *env, AllocEvent &event, jint tid, jobject object,
int num_frames, jvmtiFrameInfo *frames);
void track(JNIEnv *env, AllocEvent &event, jint tid, jobject object, u32 call_trace_id);
void flush(std::set<int> &tracked_thread_ids);

static void JNICALL GarbageCollectionFinish(jvmtiEnv *jvmti_env);
Expand Down
34 changes: 5 additions & 29 deletions ddprof-lib/src/main/cpp/objectSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
event._id = id;
}

jint frames_size = 0;
jvmtiFrameInfo *frames = nullptr;

u32 call_trace_id = 0;
// we do record the details and stacktraces only for when recording
// allocations or liveness
if (_record_allocations || _record_liveness) {
Expand All @@ -78,31 +76,14 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
? 1
: 1 / (1 - exp(-size / (double)_interval)));

frames = new jvmtiFrameInfo[_max_stack_depth];
call_trace_id = Profiler::instance()->recordJVMTISample(size, tid, thread, BCI_ALLOC, &event, !_record_allocations);

if (jvmti->GetStackTrace(thread, 0, _max_stack_depth, frames,
&frames_size) != JVMTI_ERROR_NONE ||
frames_size <= 0) {
delete[] frames;
if (call_trace_id == 0) {
return;
}

if (frames_size > 0) {
std::set<jclass> classes;
jclass method_class;
for (int i = 0; i < frames_size; i++) {
if (jvmti->GetMethodDeclaringClass(frames[i].method, &method_class) ==
0) {
classes.insert(method_class);
}
}
}
}

if (_record_allocations) {
Profiler::instance()->recordExternalSample(
size, tid, frames, frames_size, /*truncated=*/false, BCI_ALLOC, &event);

u64 current_samples = __sync_add_and_fetch(&_alloc_event_count, 1);
// in order to lower the number of atomic reads from the timestamp variable
// the check will be performed only each N samples
Expand Down Expand Up @@ -130,15 +111,10 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
}

// Either we are recording liveness or tracking GC generations (lightweight
// livenss samples)
// liveness samples)
if (_gc_generations || _record_liveness) {
LivenessTracker::instance()->track(jni, event, tid, object, frames_size,
frames);
LivenessTracker::instance()->track(jni, event, tid, object, call_trace_id);
}

// it's safe to delete frames - the liveness tracker keeps a full copy of the
// frames and manages its own memory
delete[] frames;
}

Error ObjectSampler::check(Arguments &args) {
Expand Down
106 changes: 39 additions & 67 deletions ddprof-lib/src/main/cpp/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "profiler.h"
#include "asyncSampleMutex.h"
#include "common.h"
#include "context.h"
#include "counters.h"
#include "ctimer.h"
Expand Down Expand Up @@ -548,51 +549,6 @@ int Profiler::getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames,
return trace.frames - frames + 1;
}

int Profiler::getJavaTraceJvmti(jvmtiFrameInfo *jvmti_frames,
ASGCT_CallFrame *frames, int start_depth,
int max_depth) {
int num_frames;
if (VM::jvmti()->GetStackTrace(NULL, start_depth, _max_stack_depth,
jvmti_frames, &num_frames) == 0 &&
num_frames > 0) {
return convertFrames(jvmti_frames, frames, num_frames);
}
return 0;
}

int Profiler::getJavaTraceInternal(jvmtiFrameInfo *jvmti_frames,
ASGCT_CallFrame *frames, int max_depth) {
// We cannot call pure JVM TI here, because it assumes _thread_in_native
// state, but allocation events happen in _thread_in_vm state, see
// https://github.com/jvm-profiling-tools/java-profiler/issues/64
JNIEnv *jni = VM::jni();
if (jni == NULL) {
return 0;
}

JitWriteProtection jit(false);
VMThread *vm_thread = VMThread::fromEnv(jni);
int num_frames;
if (VMStructs::_get_stack_trace(NULL, vm_thread, 0, max_depth, jvmti_frames,
&num_frames) == 0 &&
num_frames > 0) {
return convertFrames(jvmti_frames, frames, num_frames);
}
return 0;
}

inline int Profiler::convertFrames(jvmtiFrameInfo *jvmti_frames,
ASGCT_CallFrame *frames, int num_frames) {
// Convert to AsyncGetCallTrace format.
// Note: jvmti_frames and frames may overlap.
for (int i = 0; i < num_frames; i++) {
jint bci = jvmti_frames[i].location;
frames[i].method_id = jvmti_frames[i].method;
frames[i].bci = bci;
}
return num_frames;
}

void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
NMethod *nmethod) {
if (nmethod->isNMethod() && nmethod->isAlive()) {
Expand Down Expand Up @@ -634,10 +590,7 @@ void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
}
}

void Profiler::recordExternalSample(u64 counter, int tid,
jvmtiFrameInfo *jvmti_frames,
jint num_jvmti_frames, bool truncated,
jint event_type, Event *event) {
u32 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event_type, Event *event, bool deferred) {
atomicInc(_total_samples);

u32 lock_index = getLockIndex(tid);
Expand All @@ -647,29 +600,50 @@ void Profiler::recordExternalSample(u64 counter, int tid,
// Too many concurrent signals already
atomicInc(_failures[-ticks_skipped]);

if (event_type == BCI_CPU && _cpu_engine == &perf_events) {
// Need to reset PerfEvents ring buffer, even though we discard the
// collected trace
PerfEvents::resetBuffer(tid);
}
return;
return 0;
}
u32 call_trace_id = 0;
if (!_omit_stacktraces && jvmti_frames != nullptr) {
if (!_omit_stacktraces) {
ASGCT_CallFrame *frames = _calltrace_buffer[lock_index]->_asgct_frames;
jvmtiFrameInfo *jvmti_frames = _calltrace_buffer[lock_index]->_jvmti_frames;

int num_frames = 0;
if (!_jfr.active() && BCI_ALLOC >= event_type && event_type >= BCI_PARK &&
event->_id) {
num_frames = makeFrame(frames, event_type, event->_id);

if (VM::jvmti()->GetStackTrace(thread, 0, _max_stack_depth, jvmti_frames, &num_frames) == JVMTI_ERROR_NONE && num_frames > 0) {
// Convert to AsyncGetCallTrace format.
// Note: jvmti_frames and frames may overlap.
for (int i = 0; i < num_frames; i++) {
jint bci = jvmti_frames[i].location;
jmethodID mid = jvmti_frames[i].method;
frames[i].method_id = mid;
frames[i].bci = bci;
// see https://github.com/async-profiler/async-profiler/pull/1090
LP64_ONLY(frames[i].padding = 0;)
}
}

num_frames +=
convertFrames(jvmti_frames, frames + num_frames, num_jvmti_frames);
call_trace_id = _call_trace_storage.put(num_frames, frames, false, counter);
}
if (!deferred) {
_jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event);
}

_locks[lock_index].unlock();
return call_trace_id;
}

call_trace_id =
_call_trace_storage.put(num_frames, frames, truncated, counter);
void Profiler::recordDeferredSample(int tid, u32 call_trace_id, jint event_type, Event *event) {
atomicInc(_total_samples);

u32 lock_index = getLockIndex(tid);
if (!_locks[lock_index].tryLock() &&
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
// Too many concurrent signals already
atomicInc(_failures[-ticks_skipped]);
return;
}

_jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event);

_locks[lock_index].unlock();
Expand Down Expand Up @@ -1153,13 +1127,11 @@ Error Profiler::start(Arguments &args, bool reset) {
// (Re-)allocate calltrace buffers
if (_max_stack_depth != args._jstackdepth) {
_max_stack_depth = args._jstackdepth;
size_t buffer_size =
(_max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES) *
sizeof(CallTraceBuffer);
size_t nelem = _max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES;

for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
free(_calltrace_buffer[i]);
_calltrace_buffer[i] = (CallTraceBuffer *)malloc(buffer_size);
_calltrace_buffer[i] = (CallTraceBuffer*)calloc(nelem, sizeof(CallTraceBuffer));
if (_calltrace_buffer[i] == NULL) {
_max_stack_depth = 0;
return Error("Not enough memory to allocate stack trace buffers (try "
Expand Down
14 changes: 4 additions & 10 deletions ddprof-lib/src/main/cpp/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ const int RESERVED_FRAMES = 4;

enum EventMask { EM_CPU = 1 << 0, EM_WALL = 1 << 1, EM_ALLOC = 1 << 2 };

struct CallTraceBuffer {
union CallTraceBuffer {
ASGCT_CallFrame _asgct_frames[1];
jvmtiFrameInfo _jvmti_frames[1];
};

class FrameName;
Expand Down Expand Up @@ -138,12 +139,6 @@ class Profiler {
int tid, StackContext *java_ctx, bool *truncated);
int getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames, int max_depth,
StackContext *java_ctx, bool *truncated);
int getJavaTraceJvmti(jvmtiFrameInfo *jvmti_frames, ASGCT_CallFrame *frames,
int start_depth, int max_depth);
int getJavaTraceInternal(jvmtiFrameInfo *jvmti_frames,
ASGCT_CallFrame *frames, int max_depth);
int convertFrames(jvmtiFrameInfo *jvmti_frames, ASGCT_CallFrame *frames,
int num_frames);
void fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
NMethod *nmethod);
void updateThreadName(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread,
Expand Down Expand Up @@ -223,9 +218,8 @@ class Profiler {
ASGCT_CallFrame *frames);
void recordSample(void *ucontext, u64 weight, int tid, jint event_type,
u32 call_trace_id, Event *event);
void recordExternalSample(u64 weight, int tid, jvmtiFrameInfo *jvmti_frames,
jint num_jvmti_frames, bool truncated,
jint event_type, Event *event);
u32 recordJVMTISample(u64 weight, int tid, jthread thread, jint event_type, Event *event, bool deferred);
void recordDeferredSample(int tid, u32 call_trace_id, jint event_type, Event *event);
void recordExternalSample(u64 weight, int tid, int num_frames,
ASGCT_CallFrame *frames, bool truncated,
jint event_type, Event *event);
Expand Down
4 changes: 4 additions & 0 deletions ddprof-lib/src/main/cpp/vmEntry.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <jvmti.h>

#include "arch.h"
#include "codeCache.h"
#include "frame.h"

Expand Down Expand Up @@ -62,9 +63,12 @@ enum ASGCT_Failure {

typedef struct {
jint bci;
// see https://github.com/async-profiler/async-profiler/pull/1090
LP64_ONLY(jint padding;)
jmethodID method_id;
} ASGCT_CallFrame;


typedef struct {
JNIEnv *env;
jint num_frames;
Expand Down
Loading
Loading