Skip to content

Commit

Permalink
Add ITTAPI timing functions to JL_TIMING (#49448)
Browse files Browse the repository at this point in the history
  • Loading branch information
pchintalapudi authored Apr 22, 2023
1 parent 5ea1a41 commit a34261f
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 55 deletions.
6 changes: 3 additions & 3 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8435,7 +8435,7 @@ jl_llvm_functions_t jl_emit_code(
jl_value_t *jlrettype,
jl_codegen_params_t &params)
{
JL_TIMING(CODEGEN, CODEGEN);
JL_TIMING(CODEGEN, CODEGEN_LLVM);
jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_CURRENT_BLOCK);
// caller must hold codegen_lock
jl_llvm_functions_t decls = {};
Expand Down Expand Up @@ -8478,7 +8478,7 @@ jl_llvm_functions_t jl_emit_codeinst(
jl_code_info_t *src,
jl_codegen_params_t &params)
{
JL_TIMING(CODEGEN, CODEGEN);
JL_TIMING(CODEGEN, CODEGEN_Codeinst);
jl_timing_show_method_instance(codeinst->def, JL_TIMING_CURRENT_BLOCK);
JL_GC_PUSH1(&src);
if (!src) {
Expand Down Expand Up @@ -8558,7 +8558,7 @@ void jl_compile_workqueue(
Module &original,
jl_codegen_params_t &params, CompilationPolicy policy)
{
JL_TIMING(CODEGEN, CODEGEN);
JL_TIMING(CODEGEN, CODEGEN_Workqueue);
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
while (!params.workqueue.empty()) {
Expand Down
8 changes: 4 additions & 4 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ NOINLINE uintptr_t gc_get_stack_ptr(void)

void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
{
JL_TIMING(GC, Stop);
JL_TIMING(GC, GC_Stop);
#ifdef USE_TRACY
TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx);
TracyCZoneColor(ctx, 0x696969);
Expand Down Expand Up @@ -3043,7 +3043,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
uint64_t start_mark_time = jl_hrtime();
JL_PROBE_GC_MARK_BEGIN();
{
JL_TIMING(GC, Mark);
JL_TIMING(GC, GC_Mark);

// 1. fix GC bits of objects in the remset.
assert(gc_n_threads);
Expand Down Expand Up @@ -3204,7 +3204,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
uint64_t start_sweep_time = jl_hrtime();
JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
{
JL_TIMING(GC, Sweep);
JL_TIMING(GC, GC_Sweep);
#ifdef USE_TRACY
if (sweep_full) {
TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx);
Expand Down Expand Up @@ -3405,7 +3405,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
// Doing this on all threads is racy (it's impossible to check
// or wait for finalizers on other threads without dead lock).
if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
JL_TIMING(GC, Finalizers);
JL_TIMING(GC, GC_Finalizers);
run_finalizers(ct);
}
JL_PROBE_GC_FINALIZER();
Expand Down
4 changes: 2 additions & 2 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ static jl_callptr_t _jl_compile_codeinst(
"invalid world for method-instance");
assert(src && jl_is_code_info(src));

JL_TIMING(LLVM_MODULE_FINISH, LLVM_MODULE_FINISH);
JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE);

jl_callptr_t fptr = NULL;
// emit the code in LLVM IR form
Expand Down Expand Up @@ -1435,7 +1435,7 @@ void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)

void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
{
JL_TIMING(LLVM_MODULE_FINISH, LLVM_MODULE_FINISH);
JL_TIMING(LLVM_ORC, LLVM_ORC);
++ModulesAdded;
orc::SymbolLookupSet NewExports;
TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
Expand Down
1 change: 1 addition & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,7 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
lock->count++;
return;
}
JL_TIMING(LOCK_SPIN, LOCK_SPIN);
jl_profile_lock_start_wait(lock);
while (1) {
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
Expand Down
34 changes: 23 additions & 11 deletions src/timing.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,26 @@ extern "C" {
#endif

#ifdef ENABLE_TIMINGS
#include "timing.h"

#ifndef HAVE_TIMING_SUPPORT
#error Timings are not supported on your compiler
#endif

static uint64_t t0;
#ifdef USE_TRACY
#if defined(USE_TRACY) || defined(USE_ITTAPI)
/**
* These sources often generate millions of events / minute. Although Tracy
* can generally keep up with that, those events also bloat the saved ".tracy"
* files, so we disable them by default.
**/
JL_DLLEXPORT uint64_t jl_timing_enable_mask = 0xFFFFFFFFFFFFFFFF &
~(1ull << JL_TIMING_ROOT) &
~(1ull << JL_TIMING_TYPE_CACHE_LOOKUP) &
~(1ull << JL_TIMING_METHOD_MATCH) &
~(1ull << JL_TIMING_METHOD_LOOKUP_FAST) &
~(1ull << JL_TIMING_AST_COMPRESS) &
~(1ull << JL_TIMING_AST_UNCOMPRESS);
JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~((1ull << JL_TIMING_ROOT) |
(1ull << JL_TIMING_TYPE_CACHE_LOOKUP) |
(1ull << JL_TIMING_METHOD_MATCH) |
(1ull << JL_TIMING_METHOD_LOOKUP_FAST) |
(1ull << JL_TIMING_AST_COMPRESS) |
(1ull << JL_TIMING_AST_UNCOMPRESS));
#else
JL_DLLEXPORT uint64_t jl_timing_enable_mask = 0xFFFFFFFFFFFFFFFF;
JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~0ull;
#endif

JL_DLLEXPORT uint64_t jl_timing_counts[(int)JL_TIMING_LAST] = {0};
Expand All @@ -43,11 +41,15 @@ JL_DLLEXPORT uint32_t jl_timing_print_limit = 10;

const char *jl_timing_names[(int)JL_TIMING_LAST] =
{
#define X(name) #name
#define X(name) #name,
JL_TIMING_OWNERS
#undef X
};

#ifdef USE_ITTAPI
JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST];
#endif

void jl_print_timings(void)
{
uint64_t total_time = cycleclock() - t0;
Expand All @@ -66,6 +68,16 @@ void jl_print_timings(void)
void jl_init_timing(void)
{
t0 = cycleclock();

_Static_assert(JL_TIMING_EVENT_LAST < sizeof(uint64_t) * CHAR_BIT, "Too many timing events!");
_Static_assert((int)JL_TIMING_LAST <= (int)JL_TIMING_EVENT_LAST, "More owners than events!");

int i = 0;
#ifdef USE_ITTAPI
#define X(name) jl_timing_ittapi_events[i++] = __itt_event_create(#name, strlen(#name));
JL_TIMING_EVENTS
#undef X
#endif
}

void jl_destroy_timing(void)
Expand Down
117 changes: 82 additions & 35 deletions src/timing.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#ifndef JL_TIMING_H
#define JL_TIMING_H

#include "julia.h"

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -106,39 +108,60 @@ void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...);
#define JL_TIMING_CURRENT_BLOCK (&__timing_block)
#endif

#define JL_TIMING_OWNERS \
X(ROOT), \
X(GC), \
X(LOWERING), \
X(PARSING), \
X(INFERENCE), \
X(CODEGEN), \
X(METHOD_LOOKUP_SLOW), \
X(METHOD_LOOKUP_FAST), \
X(LLVM_OPT), \
X(LLVM_MODULE_FINISH), \
X(METHOD_MATCH), \
X(TYPE_CACHE_LOOKUP), \
X(TYPE_CACHE_INSERT), \
X(STAGED_FUNCTION), \
X(MACRO_INVOCATION), \
X(AST_COMPRESS), \
X(AST_UNCOMPRESS), \
X(SYSIMG_LOAD), \
X(SYSIMG_DUMP), \
X(NATIVE_DUMP), \
X(ADD_METHOD), \
X(LOAD_MODULE), \
X(SAVE_MODULE), \
X(INIT_MODULE),
#define JL_TIMING_OWNERS \
X(ROOT) \
X(GC) \
X(LOWERING) \
X(PARSING) \
X(INFERENCE) \
X(CODEGEN) \
X(METHOD_LOOKUP_SLOW) \
X(METHOD_LOOKUP_FAST) \
X(CODEINST_COMPILE) \
X(LLVM_OPT) \
X(LLVM_ORC) \
X(METHOD_MATCH) \
X(TYPE_CACHE_LOOKUP) \
X(TYPE_CACHE_INSERT) \
X(STAGED_FUNCTION) \
X(MACRO_INVOCATION) \
X(AST_COMPRESS) \
X(AST_UNCOMPRESS) \
X(SYSIMG_LOAD) \
X(SYSIMG_DUMP) \
X(NATIVE_DUMP) \
X(ADD_METHOD) \
X(LOAD_MODULE) \
X(SAVE_MODULE) \
X(INIT_MODULE) \
X(LOCK_SPIN) \


#define JL_TIMING_EVENTS \
JL_TIMING_OWNERS \
X(GC_Stop) \
X(GC_Mark) \
X(GC_Sweep) \
X(GC_Finalizers) \
X(CODEGEN_LLVM) \
X(CODEGEN_Codeinst) \
X(CODEGEN_Workqueue) \


enum jl_timing_owners {
#define X(name) JL_TIMING_ ## name
#define X(name) JL_TIMING_ ## name,
JL_TIMING_OWNERS
#undef X
JL_TIMING_LAST
};

enum jl_timing_events {
#define X(name) JL_TIMING_EVENT_ ## name,
JL_TIMING_EVENTS
#undef X
JL_TIMING_EVENT_LAST
};

/**
* Timing back-ends differ in terms of whether they support nested
* and asynchronous events.
Expand Down Expand Up @@ -177,6 +200,18 @@ enum jl_timing_owners {
#define _TRACY_DESTROY(block)
#endif

#ifdef USE_ITTAPI
#define _ITTAPI_CTX_MEMBER int event;
#define _ITTAPI_CTOR(block, event) block->event = event
#define _ITTAPI_START(block) if (_jl_timing_enabled(block->event)) __itt_event_start(jl_timing_ittapi_events[block->event])
#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->event)) __itt_event_end(jl_timing_ittapi_events[block->event])
#else
#define _ITTAPI_CTX_MEMBER
#define _ITTAPI_CTOR(block, event)
#define _ITTAPI_START(block)
#define _ITTAPI_STOP(block)
#endif

/**
* Implementation: Aggregated counts back-end
**/
Expand Down Expand Up @@ -225,16 +260,27 @@ STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block) JL_NOTSA

extern uint64_t jl_timing_enable_mask;
extern const char *jl_timing_names[(int)JL_TIMING_LAST];
#ifdef USE_ITTAPI
extern __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST];
#endif

struct _jl_timing_block_t { // typedef in julia.h
struct _jl_timing_block_t *prev;
_TRACY_CTX_MEMBER
_ITTAPI_CTX_MEMBER
_COUNTS_CTX_MEMBER
};

STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) JL_NOTSAFEPOINT {
STATIC_INLINE int _jl_timing_enabled(int event) JL_NOTSAFEPOINT {
return !!(jl_timing_enable_mask & (1 << event));
}

STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner, int event) JL_NOTSAFEPOINT {
uint64_t t = cycleclock(); (void)t;
_COUNTS_CTOR(&block->counts_ctx, owner);
_COUNTS_START(&block->counts_ctx, t);
_ITTAPI_CTOR(block, event);
_ITTAPI_START(block);

jl_task_t *ct = jl_current_task;
jl_timing_block_t **prevp = &ct->ptls->timing_stack;
Expand All @@ -248,6 +294,7 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) JL
STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT {
uint64_t t = cycleclock(); (void)t;

_ITTAPI_STOP(block);
_COUNTS_STOP(&block->counts_ctx, t);
_COUNTS_DESTROY(&block->counts_ctx);
_TRACY_DESTROY(block->tracy_ctx);
Expand Down Expand Up @@ -281,8 +328,8 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
#ifdef __cplusplus
struct jl_timing_block_cpp_t {
jl_timing_block_t block;
jl_timing_block_cpp_t(int owner) JL_NOTSAFEPOINT {
_jl_timing_block_ctor(&block, owner);
jl_timing_block_cpp_t(int owner, int event) JL_NOTSAFEPOINT {
_jl_timing_block_ctor(&block, owner, event);
}
~jl_timing_block_cpp_t() JL_NOTSAFEPOINT {
_jl_timing_block_destroy(&block);
Expand All @@ -292,13 +339,13 @@ struct jl_timing_block_cpp_t {
jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &) = delete;
jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &&) = delete;
};
#define JL_TIMING(subsystem, event) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## subsystem); \
#define JL_TIMING(subsystem, event) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \
_TRACY_CTOR(__timing_block.block.tracy_ctx, #event, (jl_timing_enable_mask >> (JL_TIMING_ ## subsystem)) & 1)
#else
#define JL_TIMING(subsystem, event) \
__attribute__((cleanup(_jl_timing_block_destroy))) \
jl_timing_block_t __timing_block; \
_jl_timing_block_ctor(&__timing_block, JL_TIMING_ ## subsystem); \
_jl_timing_block_ctor(&__timing_block, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \
_TRACY_CTOR(__timing_block.tracy_ctx, #event, (jl_timing_enable_mask >> (JL_TIMING_ ## subsystem)) & 1)
#endif

Expand All @@ -311,10 +358,10 @@ struct jl_timing_suspend_cpp_t {
~jl_timing_suspend_cpp_t() JL_NOTSAFEPOINT {
_jl_timing_suspend_destroy(&suspend);
}
jl_timing_suspend_cpp_t(const jl_timing_block_cpp_t&) = delete;
jl_timing_suspend_cpp_t(const jl_timing_block_cpp_t&&) = delete;
jl_timing_suspend_cpp_t& operator=(const jl_timing_block_cpp_t &) = delete;
jl_timing_suspend_cpp_t& operator=(const jl_timing_block_cpp_t &&) = delete;
jl_timing_suspend_cpp_t(const jl_timing_suspend_cpp_t &) = delete;
jl_timing_suspend_cpp_t(jl_timing_suspend_cpp_t &&) = delete;
jl_timing_suspend_cpp_t& operator=(const jl_timing_suspend_cpp_t &) = delete;
jl_timing_suspend_cpp_t& operator=(jl_timing_suspend_cpp_t &&) = delete;
};
#define JL_TIMING_SUSPEND(subsystem, ct) jl_timing_suspend_cpp_t __suspend_block(#subsystem, ct)
#else
Expand Down

2 comments on commit a34261f

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected.
A full report can be found here.

Please sign in to comment.