Skip to content

Commit

Permalink
Profile: Thread and task-specific profiling (#41742)
Browse files Browse the repository at this point in the history
- Adds thread and task ids to profile samples
- Implements thread and task selection for Profile.print()
- Implements thread and task groupby options for Profile.print()
- Add include_meta to Profile.fetch() which defaults to false to ensure backwards compat with external profiling tooling
- store time of each profile sample (cycleclock)
- add sleep_check_state to metadata and show % utilization
  • Loading branch information
IanButterworth authored Sep 2, 2021
1 parent 3aada59 commit c5af061
Show file tree
Hide file tree
Showing 9 changed files with 361 additions and 82 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ Standard library changes
#### Printf
* Now uses `textwidth` for formatting `%s` and `%c` widths ([#41085]).

#### Profile
* Profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
grouping by thread, task, or nested thread/task, task/thread, and `threads` and `tasks` kwargs to allow filtering.
Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at
each sample. `Profile.fetch()` by default strips out the new metadata to ensure backwards compatibility with external
profiling data consumers, but can be included with the `include_meta` kwarg. ([#41742])

#### Random

#### REPL
Expand Down
6 changes: 5 additions & 1 deletion contrib/generate_precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,11 @@ Profile = get(Base.loaded_modules,
nothing)
if Profile !== nothing
hardcoded_precompile_statements *= """
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol})
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
"""
end

Expand Down
4 changes: 2 additions & 2 deletions src/signal-handling.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64

JL_DLLEXPORT int jl_profile_is_buffer_full(void)
{
// the latter `+ 1` is for the block terminator `0`.
return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 1 > bt_size_max;
// the `+ 5` is for the block terminator `0` plus 4 metadata entries
return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 5 > bt_size_max;
}

static uint64_t jl_last_sigint_trigger = 0;
Expand Down
13 changes: 13 additions & 0 deletions src/signals-mach.c
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,19 @@ void *mach_profile_listener(void *arg)
#else
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
#endif
jl_ptls_t ptls = jl_all_tls_states[i];

// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;

// store task id
bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;

// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();

// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;

// Mark the end of this block with 0
bt_data_prof[bt_size_cur++].uintptr = 0;
Expand Down
14 changes: 14 additions & 0 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,20 @@ static void *signal_listener(void *arg)
}
jl_set_safe_restore(old_buf);

jl_ptls_t ptls = jl_all_tls_states[i];

// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;

// store task id
bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;

// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();

// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;

// Mark the end of this block with 0
bt_data_prof[bt_size_cur++].uintptr = 0;
}
Expand Down
15 changes: 15 additions & 0 deletions src/signals-win.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,21 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
// Get backtrace data
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
bt_size_max - bt_size_cur - 1, &ctxThread, NULL);

jl_ptls_t ptls = jl_all_tls_states[0]; // given only profiling hMainThread

// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;

// store task id
bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;

// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();

// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;

// Mark the end of this block with 0
bt_data_prof[bt_size_cur++].uintptr = 0;
}
Expand Down
5 changes: 3 additions & 2 deletions stdlib/Profile/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Serialization"]
test = ["Logging", "Serialization", "Test"]
Loading

0 comments on commit c5af061

Please sign in to comment.