sgl-project · slin1237 · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025
@@ -330,8 +330,31 @@ jobs:
           docker rm oracle-db || true
 
 
+  docker-build-test:
+    if: |
+      github.event_name != 'pull_request' ||
+      (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
+      (github.event.action == 'labeled' && github.event.label.name == 'run-ci')
+    runs-on: ubuntu-24.04
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Docker image (no push)
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: docker/gateway.Dockerfile
+          push: false
+          tags: sglang-router:test
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
   finish:
-    needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests]
+    needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests, docker-build-test]
     runs-on: ubuntu-latest
     steps:
       - name: Finish

@@ -4,7 +4,7 @@ on:
     branches:
       - main
     paths:
-      - "sgl-model-gateway/bindings/python/sglang_router/version.py"
+      - sgl-model-gateway/bindings/python/pyproject.toml
   workflow_dispatch:
 
 jobs:

@@ -27,7 +27,7 @@ High-performance model routing control and data plane for large-scale LLM deploy
 - Advanced load balancing with cache-aware request reuse, load-aware (power-of-two) selection, and per-model policy overrides.
 
 ## Feature Highlights
-- Multiple load balancing strategies (`random`, `round_robin`, `cache_aware`, `power_of_two`) with DP-aware scheduling.
+- Multiple load balancing strategies (`random`, `round_robin`, `cache_aware`, `power_of_two`, `bucket`) with DP-aware scheduling.
 - Multi-model HTTP serving and inference gateway routing with model-specific policies.
 - Prefill/decode disaggregation, including bootstrap port handling and cache-aware merging.
 - gRPC routing with fully Rust tokenizer loading, reasoning parser selection, and tool parser integration for OpenAI-compatible endpoints—supporting streaming and non-streaming modes across DeepSeek, Llama, Kimi K2, Qwen, GPT-OSS, Mistral, Step-3, GLM4, and other reasoning-capable models.