diff --git a/bin/select_gpu_chicoma b/bin/select_gpu_chicoma new file mode 100755 index 00000000..e4033c85 --- /dev/null +++ b/bin/select_gpu_chicoma @@ -0,0 +1,3 @@ +#!/bin/bash +export CUDA_VISIBLE_DEVICES=$SLURM_LOCALID +exec $* diff --git a/bin/mpi_gpu_wrap b/bin/select_gpu_polaris similarity index 100% rename from bin/mpi_gpu_wrap rename to bin/select_gpu_polaris diff --git a/machines/chicoma.sh b/machines/chicoma.sh index 41fc59b6..8f6de402 100644 --- a/machines/chicoma.sh +++ b/machines/chicoma.sh @@ -1,7 +1,7 @@ # LANL Machines: HPC and IC # Chicoma -if [[ "$HOST" == "ch-fe"* ]]; then +if [[ "$HOST" == "ch-fe"* || "$HOST" == "nid00"* ]]; then HOST_ARCH="ZEN2" # Cray environments get confused easy @@ -11,8 +11,6 @@ if [[ "$HOST" == "ch-fe"* ]]; then export CRAY_CPU_TARGET="x86-64" if [[ "$ARGS" == *"cuda"* ]]; then DEVICE_ARCH="AMPERE80" - # System HDF5 can't use compression - EXTRA_FLAGS="-DPARTHENON_DISABLE_HDF5_COMPRESSION=ON $EXTRA_FLAGS" # Runtime MPI_NUM_PROCS=4 if [[ "$ARGS" == *"gnu"* ]]; then @@ -20,21 +18,24 @@ if [[ "$HOST" == "ch-fe"* ]]; then elif [[ "$ARGS" == *"intel"* ]]; then module load PrgEnv-intel elif [[ "$ARGS" == *"nvc++"* ]]; then - module load PrgEnv-nvhpc cray-hdf5-parallel + module load PrgEnv-nvhpc EXTRA_FLAGS="-DCMAKE_CUDA_COMPILER=$HOME/bin/nvc++-wrapper -DCMAKE_CUDA_COMPILER_ID=NVHPC -DCMAKE_CUDA_COMPILER_VERSION=11.6 $EXTRA_FLAGS" else - module load PrgEnv-nvhpc cray-hdf5-parallel + module load PrgEnv-nvhpc fi + # GPU runtime opts + MPI_NUM_PROCS=4 + MPI_EXTRA_ARGS="--cpu-bind=mask_cpu:0x0*16,0x1*16,0x2*16,0x3*16 $SOURCE_DIR/bin/select-gpu" + unset OMP_NUM_THREADS + unset OMP_PROC_BIND + unset OMP_PLACES else module load PrgEnv-aocc fi - module load cmake + module load cray-hdf5-parallel cmake + # System HDF5 can't use compression + EXTRA_FLAGS="-DPARTHENON_DISABLE_HDF5_COMPRESSION=ON $EXTRA_FLAGS" - # Runtime - MPI_NUM_PROCS=4 + # Runtime opts MPI_EXE=srun - MPI_EXTRA_ARGS="--cpu-bind=mask_cpu:0x0*16,0x1*16,0x2*16,0x3*16 ~/bin/select-gpu" - unset OMP_NUM_THREADS - unset OMP_PROC_BIND - unset OMP_PLACES fi diff --git a/scripts/batch/polaris.qsub b/scripts/batch/polaris.qsub index e2cb07ec..5cb698d5 100644 --- a/scripts/batch/polaris.qsub +++ b/scripts/batch/polaris.qsub @@ -8,7 +8,7 @@ #PBS -l filesystems=home:grand KHARMA_DIR=~/kharma-dev -WRAPPER=$KHARMA_DIR/bin/mpi_gpu_wrap +WRAPPER=$KHARMA_DIR/bin/select_gpu_polaris KHARMA_ARGS="-i $KHARMA_DIR/pars/sane_perf.par" # Print ranks diff --git a/scripts/batch/scaling_polaris.qsub b/scripts/batch/scaling_polaris.qsub index efda6705..3e975aac 100755 --- a/scripts/batch/scaling_polaris.qsub +++ b/scripts/batch/scaling_polaris.qsub @@ -17,6 +17,7 @@ DO_STRONG=true DO_WEAK=true KHARMA_DIR=~/kharma-dev +WRAPPER=$KHARMA_DIR/bin/select_gpu_polaris # Gotta specify this inline since bsub doesn't do arguments PARFILE=~/kharma-dev/pars/scaling_torus.par @@ -78,7 +79,7 @@ if [[ $DO_STRONG == "true" ]]; then echo "cycle=100 Running ${size}x${size}x${size} cubed problem with KHARMA on $gpus GPUs (blocksize ${msize1}x${msize2}x${msize3})" - mpiexec -n $gpus --ppn $NRANKS --depth 8 --cpu-bind depth --env OMP_NUM_THREADS=1 -env OMP_PLACES=threads $KHARMA_DIR/bin/mpi_gpu_wrap \ + mpiexec -n $gpus --ppn $NRANKS --depth 8 --cpu-bind depth --env OMP_NUM_THREADS=1 -env OMP_PLACES=threads $WRAPPER \ $KHARMA_DIR/kharma.cuda -i $PARFILE parthenon/time/nlim=102 \ parthenon/mesh/nx1=$size parthenon/mesh/nx2=$size parthenon/mesh/nx3=$size \ parthenon/meshblock/nx1=$msize1 parthenon/meshblock/nx2=$msize2 parthenon/meshblock/nx3=$msize3 @@ -150,7 +151,7 @@ if [[ $DO_WEAK == "true" ]]; then nblock=$(( $mul1 * $mul2 * $mul3 )) echo "cycle=100 Running $size per node problem with KHARMA on $gpus GPUs (total size ${tsize1}x${tsize2}x${tsize3}, $nblock blocks)" - mpiexec -n $gpus --ppn $NRANKS --depth 8 --cpu-bind depth --env OMP_NUM_THREADS=1 -env OMP_PLACES=threads $KHARMA_DIR/bin/mpi_gpu_wrap \ + mpiexec -n $gpus --ppn $NRANKS --depth 8 --cpu-bind depth --env OMP_NUM_THREADS=1 -env OMP_PLACES=threads $WRAPPER \ $KHARMA_DIR/kharma.cuda -i $PARFILE parthenon/time/nlim=102 \ parthenon/mesh/nx1=$tsize1 parthenon/mesh/nx2=$tsize2 parthenon/mesh/nx3=$tsize3 \ parthenon/meshblock/nx1=$size parthenon/meshblock/nx2=$size parthenon/meshblock/nx3=$size