diff --git a/bin/select_gpu_chicoma b/bin/select_gpu_chicoma
new file mode 100755
index 00000000..e4033c85
--- /dev/null
+++ b/bin/select_gpu_chicoma
@@ -0,0 +1,3 @@
+#!/bin/bash
+export CUDA_VISIBLE_DEVICES=$SLURM_LOCALID
+exec $*
diff --git a/bin/mpi_gpu_wrap b/bin/select_gpu_polaris
similarity index 100%
rename from bin/mpi_gpu_wrap
rename to bin/select_gpu_polaris
diff --git a/machines/chicoma.sh b/machines/chicoma.sh
index 41fc59b6..8f6de402 100644
--- a/machines/chicoma.sh
+++ b/machines/chicoma.sh
@@ -1,7 +1,7 @@
 # LANL Machines: HPC and IC
 
 # Chicoma
-if [[ "$HOST" == "ch-fe"* ]]; then
+if [[ "$HOST" == "ch-fe"* || "$HOST" == "nid00"* ]]; then
   HOST_ARCH="ZEN2"
 
   # Cray environments get confused easy
@@ -11,8 +11,6 @@ if [[ "$HOST" == "ch-fe"* ]]; then
   export CRAY_CPU_TARGET="x86-64"
   if [[ "$ARGS" == *"cuda"* ]]; then
     DEVICE_ARCH="AMPERE80"
-    # System HDF5 can't use compression
-    EXTRA_FLAGS="-DPARTHENON_DISABLE_HDF5_COMPRESSION=ON $EXTRA_FLAGS"
     # Runtime
     MPI_NUM_PROCS=4
     if [[ "$ARGS" == *"gnu"* ]]; then
@@ -20,21 +18,24 @@ if [[ "$HOST" == "ch-fe"* ]]; then
     elif [[ "$ARGS" == *"intel"* ]]; then
       module load PrgEnv-intel
     elif [[ "$ARGS" == *"nvc++"* ]]; then
-      module load PrgEnv-nvhpc cray-hdf5-parallel
+      module load PrgEnv-nvhpc
       EXTRA_FLAGS="-DCMAKE_CUDA_COMPILER=$HOME/bin/nvc++-wrapper -DCMAKE_CUDA_COMPILER_ID=NVHPC -DCMAKE_CUDA_COMPILER_VERSION=11.6 $EXTRA_FLAGS"
     else
-      module load PrgEnv-nvhpc cray-hdf5-parallel
+      module load PrgEnv-nvhpc
     fi
+    # GPU runtime opts
+    MPI_NUM_PROCS=4
+    MPI_EXTRA_ARGS="--cpu-bind=mask_cpu:0x0*16,0x1*16,0x2*16,0x3*16 $SOURCE_DIR/bin/select-gpu"
+    unset OMP_NUM_THREADS
+    unset OMP_PROC_BIND
+    unset OMP_PLACES
   else
     module load PrgEnv-aocc
   fi
-  module load cmake
+  module load cray-hdf5-parallel cmake
+  # System HDF5 can't use compression
+  EXTRA_FLAGS="-DPARTHENON_DISABLE_HDF5_COMPRESSION=ON $EXTRA_FLAGS"
 
-  # Runtime
-  MPI_NUM_PROCS=4
+  # Runtime opts
   MPI_EXE=srun
-  MPI_EXTRA_ARGS="--cpu-bind=mask_cpu:0x0*16,0x1*16,0x2*16,0x3*16 ~/bin/select-gpu"
-  unset OMP_NUM_THREADS
-  unset OMP_PROC_BIND
-  unset OMP_PLACES
 fi
diff --git a/scripts/batch/polaris.qsub b/scripts/batch/polaris.qsub
index e2cb07ec..5cb698d5 100644
--- a/scripts/batch/polaris.qsub
+++ b/scripts/batch/polaris.qsub
@@ -8,7 +8,7 @@
 #PBS -l filesystems=home:grand
 
 KHARMA_DIR=~/kharma-dev
-WRAPPER=$KHARMA_DIR/bin/mpi_gpu_wrap
+WRAPPER=$KHARMA_DIR/bin/select_gpu_polaris
 KHARMA_ARGS="-i $KHARMA_DIR/pars/sane_perf.par"
 
 # Print ranks
diff --git a/scripts/batch/scaling_polaris.qsub b/scripts/batch/scaling_polaris.qsub
index efda6705..3e975aac 100755
--- a/scripts/batch/scaling_polaris.qsub
+++ b/scripts/batch/scaling_polaris.qsub
@@ -17,6 +17,7 @@ DO_STRONG=true
 DO_WEAK=true
 
 KHARMA_DIR=~/kharma-dev
+WRAPPER=$KHARMA_DIR/bin/select_gpu_polaris
 
 # Gotta specify this inline since bsub doesn't do arguments
 PARFILE=~/kharma-dev/pars/scaling_torus.par
@@ -78,7 +79,7 @@ if [[ $DO_STRONG == "true" ]]; then
  
       echo "cycle=100 Running ${size}x${size}x${size} cubed problem with KHARMA on $gpus GPUs (blocksize ${msize1}x${msize2}x${msize3})"
 
-      mpiexec -n $gpus --ppn $NRANKS --depth 8 --cpu-bind depth --env OMP_NUM_THREADS=1 -env OMP_PLACES=threads $KHARMA_DIR/bin/mpi_gpu_wrap \
+      mpiexec -n $gpus --ppn $NRANKS --depth 8 --cpu-bind depth --env OMP_NUM_THREADS=1 -env OMP_PLACES=threads $WRAPPER \
               $KHARMA_DIR/kharma.cuda -i $PARFILE parthenon/time/nlim=102 \
                                     parthenon/mesh/nx1=$size parthenon/mesh/nx2=$size parthenon/mesh/nx3=$size \
                                     parthenon/meshblock/nx1=$msize1 parthenon/meshblock/nx2=$msize2 parthenon/meshblock/nx3=$msize3
@@ -150,7 +151,7 @@ if [[ $DO_WEAK == "true" ]]; then
       nblock=$(( $mul1 * $mul2 * $mul3 ))
       echo "cycle=100 Running $size per node problem with KHARMA on $gpus GPUs (total size ${tsize1}x${tsize2}x${tsize3}, $nblock blocks)"
 
-      mpiexec -n $gpus --ppn $NRANKS --depth 8 --cpu-bind depth --env OMP_NUM_THREADS=1 -env OMP_PLACES=threads $KHARMA_DIR/bin/mpi_gpu_wrap \
+      mpiexec -n $gpus --ppn $NRANKS --depth 8 --cpu-bind depth --env OMP_NUM_THREADS=1 -env OMP_PLACES=threads $WRAPPER \
             $KHARMA_DIR/kharma.cuda -i $PARFILE parthenon/time/nlim=102 \
                                     parthenon/mesh/nx1=$tsize1 parthenon/mesh/nx2=$tsize2 parthenon/mesh/nx3=$tsize3 \
                                     parthenon/meshblock/nx1=$size parthenon/meshblock/nx2=$size parthenon/meshblock/nx3=$size