From d04bb6b2bab86d2c207a28dfbc8c071efa00eb29 Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <lsyincs@gmail.com>
Date: Thu, 11 Dec 2025 00:19:02 +0900
Subject: [PATCH 1/8] fix

---
 python/sglang/srt/managers/tp_worker.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py
index f37138a72749..9822ef285e5c 100644
--- a/python/sglang/srt/managers/tp_worker.py
+++ b/python/sglang/srt/managers/tp_worker.py
@@ -239,8 +239,11 @@ def __init__(
             is_draft_model=is_draft_worker,
         )
 
+        # Init DLLM algorithm
         if server_args.dllm_algorithm is not None:
             self.dllm_algorithm = DllmAlgorithm.from_server_args(server_args)
+        else:
+            self.dllm_algorithm = None
 
         self._model_runner = ModelRunner(
             model_config=self.model_config,
@@ -349,7 +352,7 @@ def get_worker_info(self):
         )
 
     def is_dllm(self):
-        return hasattr(self, "dllm_algorithm")
+        return self.dllm_config is not None
 
     def forward_batch_generation(
         self,

From cf4000518f4f940510ff764a25860f89c7de6a6f Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <lsyincs@gmail.com>
Date: Thu, 11 Dec 2025 00:27:01 +0900
Subject: [PATCH 2/8] fix

---
 python/sglang/srt/managers/schedule_batch.py | 28 +++++++++-----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
index f712fe0164e4..72dbec7a0ae3 100644
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -710,7 +710,6 @@ def __init__(
         self.dimensions = dimensions
 
         # For diffusion LLM
-        self.dllm_ids = []
         self.dllm_block_offset = 0
         self.dllm_config = dllm_config
 
@@ -786,22 +785,21 @@ def finished(self) -> bool:
     def is_dllm(self):
         return self.dllm_config is not None
 
+    def _get_fill_ids_for_dllm(self):
+        if not self.fill_ids:
+            dllm_ids = (
+                self.origin_input_ids
+                + [self.dllm_config.mask_id] * self.dllm_config.block_size
+            )
+        else:
+            self.dllm_block_offset += self.dllm_config.block_size
+            dllm_ids += [self.dllm_config.mask_id] * self.dllm_config.block_size
+
+        return dllm_ids
+
     def init_next_round_input(self, tree_cache: Optional[BasePrefixCache] = None):
         if self.is_dllm():
-            if not self.fill_ids:
-                self.dllm_ids = (
-                    self.origin_input_ids
-                    + [
-                        self.dllm_config.mask_id,
-                    ]
-                    * self.dllm_config.block_size
-                )
-            else:
-                self.dllm_block_offset += self.dllm_config.block_size
-                self.dllm_ids += [
-                    self.dllm_config.mask_id
-                ] * self.dllm_config.block_size
-            self.fill_ids = self.dllm_ids
+            self.fill_ids = self._get_fill_ids_for_dllm()
         else:
             self.fill_ids = self.origin_input_ids + self.output_ids
 

From 38d6f3c2c21af1a30d750a20e183fdc9bf241adf Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <lsyincs@gmail.com>
Date: Thu, 11 Dec 2025 00:28:05 +0900
Subject: [PATCH 3/8] fix

---
 python/sglang/srt/managers/tp_worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py
index 9822ef285e5c..215728bb9800 100644
--- a/python/sglang/srt/managers/tp_worker.py
+++ b/python/sglang/srt/managers/tp_worker.py
@@ -352,7 +352,7 @@ def get_worker_info(self):
         )
 
     def is_dllm(self):
-        return self.dllm_config is not None
+        return self.dllm_algorithm is not None
 
     def forward_batch_generation(
         self,

From 87d99fd5b6b8efe54dc7719e4e04a4365e822e9c Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <lsyincs@gmail.com>
Date: Thu, 11 Dec 2025 00:29:19 +0900
Subject: [PATCH 4/8] fix

---
 python/sglang/srt/managers/schedule_batch.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
index 72dbec7a0ae3..db438f73bca2 100644
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -1320,9 +1320,11 @@ def prepare_encoder_info_extend(self, input_ids: List[int], seq_lens: List[int])
         ), f"Expected {len(self.out_cache_loc)}, got {self.extend_num_tokens}"
 
     def prepare_for_extend(self):
-        self.forward_mode = (
-            ForwardMode.DLLM_EXTEND if self.is_dllm() else ForwardMode.EXTEND
-        )
+        self.forward_mode = ForwardMode.EXTEND
+
+        if self.is_dllm():
+            # For DLLM, we use a separate forward mode
+            self.forward_mode = ForwardMode.DLLM_EXTEND
 
         # Init tensors
         reqs = self.reqs

From 3784037d05509a5b8ffe34192575880db3d2343e Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <lsyincs@gmail.com>
Date: Thu, 11 Dec 2025 00:37:50 +0900
Subject: [PATCH 5/8] fix

---
 .../srt/model_executor/cuda_graph_runner.py     | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py
index 1fd483da3752..9d67023ceed9 100644
--- a/python/sglang/srt/model_executor/cuda_graph_runner.py
+++ b/python/sglang/srt/model_executor/cuda_graph_runner.py
@@ -832,16 +832,17 @@ def replay(
             graph_key = self.bs
         self.graphs[graph_key].replay()
         output = self.output_buffers[graph_key]
+
         if isinstance(output, LogitsProcessorOutput):
+            if self.is_dllm:
+                next_token_logits = full_logits = None
+            else:
+                next_token_logits = output.next_token_logits[: self.raw_num_token]
+                full_logits = output.full_logits[: self.raw_num_token]
+
             return LogitsProcessorOutput(
-                next_token_logits=(
-                    output.next_token_logits[: self.raw_num_token]
-                    if not self.is_dllm
-                    else None
-                ),
-                full_logits=(
-                    output.full_logits[: self.raw_num_token] if self.is_dllm else None
-                ),
+                next_token_logits=next_token_logits,
+                full_logits=full_logits,
                 hidden_states=(
                     output.hidden_states[: self.raw_num_token]
                     if output.hidden_states is not None

From c974c5277e45b545aa522c907b86cf599cd82d2c Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <lsyincs@gmail.com>
Date: Thu, 11 Dec 2025 00:44:50 +0900
Subject: [PATCH 6/8] fix

---
 python/sglang/srt/managers/tp_worker.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py
index 215728bb9800..758f0ffc9571 100644
--- a/python/sglang/srt/managers/tp_worker.py
+++ b/python/sglang/srt/managers/tp_worker.py
@@ -354,6 +354,18 @@ def get_worker_info(self):
     def is_dllm(self):
         return self.dllm_algorithm is not None
 
+    def _forward_batch_generation_dllm(
+        self, forward_batch: ForwardBatch
+    ) -> GenerationBatchResult:
+        logits_output, next_token_ids, can_run_cuda_graph = self.dllm_algorithm.run(
+            self.model_runner, forward_batch
+        )
+        return GenerationBatchResult(
+            logits_output=logits_output,
+            next_token_ids=next_token_ids,
+            can_run_cuda_graph=can_run_cuda_graph,
+        )
+
     def forward_batch_generation(
         self,
         model_worker_batch: ModelWorkerBatch,
@@ -383,14 +395,7 @@ def forward_batch_generation(
 
         if self.pp_group.is_last_rank:
             if self.is_dllm():
-                logits_output, next_token_ids, can_run_cuda_graph = (
-                    self.dllm_algorithm.run(self.model_runner, forward_batch)
-                )
-                return GenerationBatchResult(
-                    logits_output=logits_output,
-                    next_token_ids=next_token_ids,
-                    can_run_cuda_graph=can_run_cuda_graph,
-                )
+                return self._forward_batch_generation_dllm(forward_batch)
 
             logits_output, can_run_cuda_graph = self.model_runner.forward(
                 forward_batch,

From a95a5a43ec0e2c21f195ac630c2f8b326c87d8ca Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <lsyincs@gmail.com>
Date: Thu, 11 Dec 2025 00:56:45 +0900
Subject: [PATCH 7/8] fix

---
 python/sglang/srt/model_executor/cuda_graph_runner.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py
index 9d67023ceed9..06726733aaeb 100644
--- a/python/sglang/srt/model_executor/cuda_graph_runner.py
+++ b/python/sglang/srt/model_executor/cuda_graph_runner.py
@@ -835,10 +835,11 @@ def replay(
 
         if isinstance(output, LogitsProcessorOutput):
             if self.is_dllm:
-                next_token_logits = full_logits = None
+                next_token_logits = None
+                full_logits = output.full_logits[: self.raw_num_token]
             else:
+                full_logits = None
                 next_token_logits = output.next_token_logits[: self.raw_num_token]
-                full_logits = output.full_logits[: self.raw_num_token]
 
             return LogitsProcessorOutput(
                 next_token_logits=next_token_logits,

From 8558e4979661389dafc730ef49c3e26d02ce8069 Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <lsyincs@gmail.com>
Date: Thu, 11 Dec 2025 01:16:14 +0900
Subject: [PATCH 8/8] fix

---
 python/sglang/srt/managers/schedule_batch.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
index db438f73bca2..baeba5298182 100644
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -785,21 +785,19 @@ def finished(self) -> bool:
     def is_dllm(self):
         return self.dllm_config is not None
 
-    def _get_fill_ids_for_dllm(self):
+    def _init_fill_ids_for_dllm(self):
         if not self.fill_ids:
-            dllm_ids = (
+            self.fill_ids = (
                 self.origin_input_ids
                 + [self.dllm_config.mask_id] * self.dllm_config.block_size
             )
         else:
             self.dllm_block_offset += self.dllm_config.block_size
-            dllm_ids += [self.dllm_config.mask_id] * self.dllm_config.block_size
-
-        return dllm_ids
+            self.fill_ids += [self.dllm_config.mask_id] * self.dllm_config.block_size
 
     def init_next_round_input(self, tree_cache: Optional[BasePrefixCache] = None):
         if self.is_dllm():
-            self.fill_ids = self._get_fill_ids_for_dllm()
+            self._init_fill_ids_for_dllm()
         else:
             self.fill_ids = self.origin_input_ids + self.output_ids