diff --git a/configs/fix-timeouts.sh b/configs/fix-timeouts.sh new file mode 100644 index 00000000..9c2c9937 --- /dev/null +++ b/configs/fix-timeouts.sh @@ -0,0 +1,16 @@ +#!/bin/bash +BRANCH="fastdg" + +# v0.5.8 + cherry-pick https://github.com/sgl-project/sglang/pull/18111 +# Make sure to set SGLANG_JIT_DEEPGEMM_FAST_WARMUP=1 +cd /sgl-workspace/sglang +git remote remove origin +git remote add origin https://github.com/trevor-m/sglang.git +git fetch origin +git checkout origin/${BRANCH} + +# Increase device timeout from 100s -> 1000s +cd /sgl-workspace/DeepEP +sed -i 's/#define NUM_TIMEOUT_CYCLES 200000000000ull/#define NUM_TIMEOUT_CYCLES 2000000000000ull/' csrc/kernels/configs.cuh +TORCH_CUDA_ARCH_LIST="10.0;10.3" MAX_JOBS=$(nproc) pip install --force-reinstall --no-build-isolation . +