-
Notifications
You must be signed in to change notification settings - Fork 37
Parthenon legacy scaling instructions
Philipp Grete edited this page Feb 5, 2021
·
1 revision
Built using latest develop
branch with Summit machine file.
<parthenon/job>
problem_id = advection
<parthenon/mesh>
refinement = adaptive
numlevel = 2
nx1 = 256
x1min = -0.5
x1max = 0.5
ix1_bc = periodic
ox1_bc = periodic
nx2 = 256
x2min = -0.5
x2max = 0.5
ix2_bc = periodic
ox2_bc = periodic
nx3 = 256
x3min = -0.5
x3max = 0.5
ix3_bc = periodic
ox3_bc = periodic
<parthenon/meshblock>
nx1 = 32
nx2 = 32
nx3 = 32
<parthenon/time>
tlim = 1.0
integrator = rk1
nlim = 10
perf_cycle_offset = 2
ncycle_out_mesh=-10
<Advection>
cfl = 0.30
vx = 1.0
vy = 1.0
vz = 1.0
profile = smooth_gaussian
ang_2 = 0.0
ang_3 = 0.0
ang_2_vert = false
ang_3_vert = false
amp = 1.0
refine_tol = 1.01 # control the package specific refinement tagging function
derefine_tol = 1.001
compute_error = false
num_vars = 1 # number of variables in variable vector
buffer_send_pack = true # send all buffers using packs
buffer_recv_pack = true # receive buffers using packs
buffer_set_pack = true # set received buffers using packs
Tried to mirror Sierra here, i.e., jsrun
and environment is setup to match 4 GPUs per node (rather than the 6 Summit actually has).
Adapt launch command and environment as necessary.
# ensure that only two GPUs are used in each resource set [to mirror Sierra]
export KOKKOS_NUM_DEVICES=2
export MX=256
export MY=256
export MZ=256
# >1 mean we need MPS and to oversubscribe the GPU
export TASKS_PER_GPU=1
export NODES=1
# 1 node, 1 gpu
jsrun --nrs 1 --tasks_per_rs $((1*TASKS_PER_GPU)) --cpu_per_rs 21 --gpu_per_rs 3 --rs_per_host 1 --smpiargs=-gpu ./example/advection/advection-example -i parthinput.legacy parthenon/mesh/nx1=$MX parthenon/mesh/nx2=$MY parthenon/mesh/nx3=$MZ | tee parth.legacy.out.nodes_${NODES}-gpu_1-mps_${TASKS_PER_GPU}
# 1 node, 2 gpu
export MX=512
jsrun --nrs 1 --tasks_per_rs $((2*TASKS_PER_GPU)) --cpu_per_rs 21 --gpu_per_rs 3 --rs_per_host 1 --smpiargs=-gpu ./example/advection/advection-example -i parthinput.legacy parthenon/mesh/nx1=$MX parthenon/mesh/nx2=$MY parthenon/mesh/nx3=$MZ | tee parth.legacy.out.nodes_${NODES}-gpu_2-mps_${TASKS_PER_GPU}
# 1 node, 4 gpu
export MY=512
jsrun --nrs $((2*NODES)) --tasks_per_rs $((2*TASKS_PER_GPU)) --cpu_per_rs 21 --gpu_per_rs 3 --rs_per_host 2 --smpiargs=-gpu ./example/advection/advection-example -i parthinput.legacy parthenon/mesh/nx1=$MX parthenon/mesh/nx2=$MY parthenon/mesh/nx3=$MZ | tee parth.legacy.out.nodes_${NODES}-gpu_4-mps_${TASKS_PER_GPU}
# and now continue by updating NODES and MX, MY, MZ accordingly
# 2 nodes, 4 gpu per node
export NODES=2
export MZ=512
jsrun --nrs $((2*NODES)) --tasks_per_rs $((2*TASKS_PER_GPU)) --cpu_per_rs 21 --gpu_per_rs 3 --rs_per_host 2 --smpiargs=-gpu ./example/advection/advection-example -i parthinput.legacy parthenon/mesh/nx1=$MX parthenon/mesh/nx2=$MY parthenon/mesh/nx3=$MZ | tee parth.legacy.out.nodes_${NODES}-gpu_4-mps_${TASKS_PER_GPU}
##### Now repeat above but with using 4 ranks per GPU
export TASKS_PER_GPU=4