File tree Expand file tree Collapse file tree 4 files changed +15
-7
lines changed
examples/disaggregated/slurm/benchmark Expand file tree Collapse file tree 4 files changed +15
-7
lines changed Original file line number Diff line number Diff line change 77# SBATCH --job-name=${job_name} # add your job name here or specify in the sbatch command
88# SBATCH --time=02:00:00
99
10+ set -u
11+ set -e
12+ set -x
13+
1014# Context servers arguments
1115num_ctx_servers=${1}
1216ctx_tp_size=${2}
@@ -227,9 +231,10 @@ srun -l --container-name=${container_name} \
227231
228232# start benchmarking
229233srun -l --container-name=${container_name} \
230- --container-mounts=${mounts} \
231- --mpi=pmix --overlap -N 1 -n 1 \
232- bash ${workdir} /run_benchmark.sh ${isl} ${osl} ${multi_round} ${model_dir} " ${concurrency} " ${streaming} ${full_logdir} > ${full_logdir} /benchmark.log 2>&1
234+ --container-mounts=${mounts} \
235+ --mpi=pmix --overlap -N 1 -n 1 \
236+ bash ${workdir} /run_benchmark.sh ${isl} ${osl} ${multi_round} ${model_dir} " ${concurrency} " ${streaming} ${full_logdir} \
237+ & > ${full_logdir} /benchmark.log 2>&1
233238
234239# try to kill the server and workers
235240srun -l --container-name=${container_name} \
Original file line number Diff line number Diff line change 11#! /bin/bash
2-
3- # Add error handling
4- set -e
52set -u
3+ set -e
4+ set -x
65trap ' echo "Error occurred at line $LINENO"; exit 1' ERR
76
87# Add parameter validation
@@ -26,7 +25,6 @@ if [[ ${SLURM_PROCID} != "0" ]]; then
2625 exit 0
2726fi
2827
29- set -x
3028config_file=${log_path} /server_config.yaml
3129
3230# check if the config file exists every 10 seconds timeout 1800 seconds
Original file line number Diff line number Diff line change 11#! /bin/bash
2+ set -u
3+ set -e
4+ set -x
25
36num_ctx_servers=$1
47num_gen_servers=$2
Original file line number Diff line number Diff line change 11#! /bin/bash
2+ set -u
3+ set -e
24set -x
35
46role=$1
You can’t perform that action at this time.
0 commit comments