diff --git a/scripts/build_flashinfer_jit_cache_whl.sh b/scripts/build_flashinfer_jit_cache_whl.sh index 4d00ae67f0..ad35fbf640 100755 --- a/scripts/build_flashinfer_jit_cache_whl.sh +++ b/scripts/build_flashinfer_jit_cache_whl.sh @@ -11,7 +11,8 @@ echo "==========================================" # MAX_JOBS = min(nproc, max(1, MemAvailable_GB/4)) MEM_AVAILABLE_GB=$(free -g | awk '/^Mem:/ {print $7}') NPROC=$(nproc) -MAX_JOBS=$(( MEM_AVAILABLE_GB / $([ "$(uname -m)" = "aarch64" ] && echo 8 || echo 4) )) +# MAX_JOBS=$(( MEM_AVAILABLE_GB / $([ "$(uname -m)" = "aarch64" ] && echo 8 || echo 4) )) +MAX_JOBS=$(( MEM_AVAILABLE_GB / 8 )) if (( MAX_JOBS < 1 )); then MAX_JOBS=1 elif (( NPROC < MAX_JOBS )); then diff --git a/scripts/task_test_blackwell_kernels.sh b/scripts/task_test_blackwell_kernels.sh index 312cf12eb1..0d7b0b1f4a 100644 --- a/scripts/task_test_blackwell_kernels.sh +++ b/scripts/task_test_blackwell_kernels.sh @@ -25,7 +25,75 @@ if [[ "$1" == "--dry-run" ]] || [[ "${DRY_RUN}" == "true" ]]; then fi if [ "$DRY_RUN" != "true" ]; then + echo "Using CUDA version: ${CUDA_VERSION}" + echo "" + + # Install precompiled kernels (require CI build artifacts) + JIT_ARCH_EFFECTIVE="" + # Map CUDA_VERSION to CUDA_STREAM for artifact lookup + if [[ "${CUDA_VERSION}" == cu* ]]; then + CUDA_STREAM="${CUDA_VERSION}" + elif [ "${CUDA_VERSION}" = "12.9.0" ]; then + CUDA_STREAM="cu129" + else + CUDA_STREAM="cu130" + fi + echo "Using CUDA stream: ${CUDA_STREAM}" + echo "" + if [ -n "${JIT_ARCH}" ]; then + # 12.0a for CUDA 12.9.0, 12.0f for CUDA 13.0.0 + if [ "${JIT_ARCH}" = "12.0" ]; then + if [ "${CUDA_STREAM}" = "cu129" ]; then + JIT_ARCH_EFFECTIVE="12.0a" + else + JIT_ARCH_EFFECTIVE="12.0f" + fi + else + JIT_ARCH_EFFECTIVE="${JIT_ARCH}" + fi + + echo "Using JIT_ARCH from environment: ${JIT_ARCH_EFFECTIVE}" + DIST_CUBIN_DIR="../dist/${CUDA_STREAM}/${JIT_ARCH_EFFECTIVE}/cubin" + DIST_JIT_CACHE_DIR="../dist/${CUDA_STREAM}/${JIT_ARCH_EFFECTIVE}/jit-cache" + + echo "==== Debug: listing artifact directories ====" + echo "Tree under ../dist:" + (cd .. && ls -al dist) || true + echo "" + echo "Tree under ../dist/${CUDA_STREAM}:" + (cd .. && ls -al "dist/${CUDA_STREAM}") || true + echo "" + echo "Contents of ${DIST_CUBIN_DIR}:" + ls -al "${DIST_CUBIN_DIR}" || true + echo "" + echo "Contents of ${DIST_JIT_CACHE_DIR}:" + ls -al "${DIST_JIT_CACHE_DIR}" || true + echo "=============================================" + + if [ -d "${DIST_CUBIN_DIR}" ] && ls "${DIST_CUBIN_DIR}"/*.whl >/dev/null 2>&1; then + echo "Installing flashinfer-cubin from ${DIST_CUBIN_DIR} ..." + pip install -q "${DIST_CUBIN_DIR}"/*.whl + else + echo "ERROR: flashinfer-cubin wheel not found in ${DIST_CUBIN_DIR}. Ensure the CI build stage produced the artifact." >&2 + fi + + if [ -d "${DIST_JIT_CACHE_DIR}" ] && ls "${DIST_JIT_CACHE_DIR}"/*.whl >/dev/null 2>&1; then + echo "Installing flashinfer-jit-cache from ${DIST_JIT_CACHE_DIR} ..." + pip install -q "${DIST_JIT_CACHE_DIR}"/*.whl + else + echo "ERROR: flashinfer-jit-cache wheel not found in ${DIST_JIT_CACHE_DIR} for ${CUDA_VERSION}. Ensure the CI build stage produced the artifact." >&2 + fi + echo "" + fi + + # Install local python sources pip install -e . -v --no-deps + echo "" + + # Verify installation + echo "Verifying installation..." + (cd /tmp && python -m flashinfer show-config) + echo "" fi EXIT_CODE=0