sgl-project · Fridge003 · Nov 22, 2025 · Aug 20, 2025 · Nov 19, 2025 · Nov 22, 2025
@@ -787,12 +787,13 @@ def forward(
         gemm_output_zero_allocator: BumpAllocator = None,
     ) -> torch.Tensor:
         if not self._enable_a2a_moe:
-            DUAL_STREAM_TOKEN_THRESHOLD = 1024
+            from sglang.srt.model_executor.cuda_graph_runner import get_is_capture_mode
+
             if (
                 self.alt_stream is not None
                 and self.num_fused_shared_experts == 0
                 and hidden_states.shape[0] > 0
-                and hidden_states.shape[0] <= DUAL_STREAM_TOKEN_THRESHOLD
+                and get_is_capture_mode()
             ):
                 return self.forward_normal_dual_stream(
                     hidden_states,