Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions python/sglang/srt/model_executor/breakable_cuda_graph_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,17 @@ def __init__(self, model_runner: ModelRunner):
language_model = getattr(
model_runner.model, "language_model", model_runner.model
)
self.layer_model = (
language_model.model
if hasattr(language_model, "model")
and hasattr(language_model.model, "layers")
else language_model
)
if hasattr(language_model, "model") and hasattr(language_model.model, "layers"):
self.layer_model = language_model.model
else:
# If we can't find the inner layer_model, disable BCG.
self.layer_model = None
logger.warning(
"[BCG] Could not resolve inner layer_model on %s. BCG is "
"disabled for this model; prefill will fall back to eager.",
type(language_model).__name__,
)
return

# Memory pool
if get_global_graph_memory_pool() is None:
Expand Down Expand Up @@ -330,6 +335,8 @@ def _capture_all(self):
self.output_buffers[num_tokens] = output

def can_run(self, forward_batch: "ForwardBatch"):
if self.layer_model is None:
return False
if forward_batch.forward_mode.is_target_verify():
return False
if forward_batch.input_embeds is not None:
Expand Down
Loading