From 79db7968b32a65c8da16d56ef063b8edb11e7ab4 Mon Sep 17 00:00:00 2001 From: Simo Lin Date: Mon, 8 Dec 2025 17:02:09 -0800 Subject: [PATCH] [ci] fix docker release ci and add it to pr test --- .github/workflows/pr-test-rust.yml | 25 +++++++++++++++++++- .github/workflows/release-docker-gateway.yml | 2 +- sgl-model-gateway/README.md | 2 +- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-test-rust.yml b/.github/workflows/pr-test-rust.yml index a4324bb38660..c2e0df2d6400 100644 --- a/.github/workflows/pr-test-rust.yml +++ b/.github/workflows/pr-test-rust.yml @@ -330,8 +330,31 @@ jobs: docker rm oracle-db || true + docker-build-test: + if: | + github.event_name != 'pull_request' || + (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) || + (github.event.action == 'labeled' && github.event.label.name == 'run-ci') + runs-on: ubuntu-24.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image (no push) + uses: docker/build-push-action@v5 + with: + context: . + file: docker/gateway.Dockerfile + push: false + tags: sglang-router:test + cache-from: type=gha + cache-to: type=gha,mode=max + finish: - needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests] + needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests, docker-build-test] runs-on: ubuntu-latest steps: - name: Finish diff --git a/.github/workflows/release-docker-gateway.yml b/.github/workflows/release-docker-gateway.yml index afb9705ce13f..c8c2b7d02b5e 100644 --- a/.github/workflows/release-docker-gateway.yml +++ b/.github/workflows/release-docker-gateway.yml @@ -4,7 +4,7 @@ on: branches: - main paths: - - "sgl-model-gateway/bindings/python/sglang_router/version.py" + - sgl-model-gateway/bindings/python/pyproject.toml workflow_dispatch: jobs: diff --git a/sgl-model-gateway/README.md b/sgl-model-gateway/README.md index f695b2c87f95..acb83ba600b5 100644 --- a/sgl-model-gateway/README.md +++ b/sgl-model-gateway/README.md @@ -27,7 +27,7 @@ High-performance model routing control and data plane for large-scale LLM deploy - Advanced load balancing with cache-aware request reuse, load-aware (power-of-two) selection, and per-model policy overrides. ## Feature Highlights -- Multiple load balancing strategies (`random`, `round_robin`, `cache_aware`, `power_of_two`) with DP-aware scheduling. +- Multiple load balancing strategies (`random`, `round_robin`, `cache_aware`, `power_of_two`, `bucket`) with DP-aware scheduling. - Multi-model HTTP serving and inference gateway routing with model-specific policies. - Prefill/decode disaggregation, including bootstrap port handling and cache-aware merging. - gRPC routing with fully Rust tokenizer loading, reasoning parser selection, and tool parser integration for OpenAI-compatible endpoints—supporting streaming and non-streaming modes across DeepSeek, Llama, Kimi K2, Qwen, GPT-OSS, Mistral, Step-3, GLM4, and other reasoning-capable models.