Merge #831

831: Add flame graph for callbacks r=charleskawczynski a=charleskawczynski This PR adds a flame graph where `step!` is called, and then all of the callbacks are called. Co-authored-by: Charles Kawczynski <[email protected]>
CliMA · Sep 9, 2022 · ca53d2a · ca53d2a
2 parents 339a12e + 53f00f0
commit ca53d2a
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 5 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -196,6 +196,8 @@ steps:
       - label: ":computer: held suarez (ρe_tot) equilmoist FCT Zalesak on moisture"
         command: "julia --color=yes --project=examples examples/hybrid/driver.jl --vert_diff true --surface_scheme bulk --moist equil --forcing held_suarez --microphy 0M --tracer_upwinding zalesak --kappa_4 4e17 --job_id sphere_held_suarez_rhoe_equilmoist_zalesak --dt 200secs --t_end 3days"
         artifact_paths: "sphere_held_suarez_rhoe_equilmoist_zalesak/*"
+        agents:
+          slurm_mem: 20GB
 
   - group: "Configs"
     steps:
@@ -373,6 +375,12 @@ steps:
           slurm_ntasks_per_node: 8
           slurm_mem: 20GB
 
+      - label: ":rocket: flame graph: perf target callbacks"
+        command: "julia --color=yes --project=perf perf/flame.jl --job_id flame_perf_target_rhoe_callbacks --dt_save_to_sol 1secs --dt_save_restart 1secs --dt_rad 1secs"
+        artifact_paths: "flame_perf_target_rhoe_callbacks/*"
+        agents:
+          slurm_mem: 20GB
+
       - label: ":rocket: benchmark: baroclinic wave (ρe_tot)"
         command: "julia --color=yes --project=perf perf/benchmark.jl --job_id bm_sphere_baroclinic_wave_rhoe"
 

diff --git a/perf/flame.jl b/perf/flame.jl
@@ -11,6 +11,8 @@ parsed_args_prescribed = parsed_args_from_ARGS(ARGS)
 parsed_args_target = dict["perf_target_unthreaded"];
 parsed_args = merge(parsed_args_target, parsed_args_prescribed);
 
+# The callbacks flame graph is very expensive, so only do 2 steps.
+const n_samples = occursin("callbacks", parsed_args["job_id"]) ? 2 : 20
 
 try # capture integrator
     include(filename)
@@ -21,7 +23,7 @@ catch err
 end
 
 function do_work!(integrator)
-    for _ in 1:20
+    for _ in 1:n_samples
         OrdinaryDiffEq.step!(integrator)
     end
 end
@@ -55,16 +57,20 @@ end
 # We're grouping allocation tests here for convenience.
 
 using Test
-allocs_limit = Dict()
-allocs_limit["flame_perf_target_rhoe"] = 10357712
-allocs_limit["flame_perf_target_rhoe_threaded"] = 90909168
-
 # Threaded allocations are not deterministic, so let's add a buffer
 # TODO: remove buffer, and threaded tests, when
 #       threaded/unthreaded functions are unified
 buffer = occursin("threaded", job_id) ? 1.4 : 1
 
 allocs = @allocated OrdinaryDiffEq.step!(integrator)
+@timev OrdinaryDiffEq.step!(integrator)
+@info "`allocs ($job_id)`: $(allocs)"
+
+allocs_limit = Dict()
+allocs_limit["flame_perf_target_rhoe"] = 10357712
+allocs_limit["flame_perf_target_rhoe_threaded"] = 90909168
+allocs_limit["flame_perf_target_rhoe_callbacks"] = 21523784
+
 if allocs < allocs_limit[job_id] * buffer
     @info "TODO: lower `allocs_limit[$job_id]` to: $(allocs)"
 end