[AutoParallel] fix pp step return (#74913)

Xing-lil · web-flow · commit 2fd8a7edc570 · 2025-08-27T21:14:56.000+08:00
diff --git a/python/paddle/distributed/auto_parallel/pipelining/schedules.py b/python/paddle/distributed/auto_parallel/pipelining/schedules.py
@@ -225,7 +225,14 @@ def _step_microbatches(
         raise NotImplementedError
 
     @abstractmethod
-    def step(self, *args, target=None, losses: list | None = None, **kwargs):
+    def step(
+        self,
+        *args,
+        target=None,
+        losses: list | None = None,
+        return_output: bool = False,
+        **kwargs,
+    ):
         """
         Run one iteration of the pipeline schedule with *whole-batch* input.
         Will chunk the input into microbatches automatically, and go through the
@@ -362,7 +369,14 @@ def _initialize_stage(self, args, kwargs, labels):
             self._stage._prepare_backward_infra(self._n_microbatches, loss)
         self._stage_initialized = True
 
-    def step(self, *args, target=None, losses: list | None = None, **kwargs):
+    def step(
+        self,
+        *args,
+        target=None,
+        losses: list | None = None,
+        return_output: bool = False,
+        **kwargs,
+    ):
         """
         Run one iteration of the pipeline schedule with *whole-batch* input.
         Will chunk the input into microbatches automatically, and go through the
@@ -390,10 +404,10 @@ def step(self, *args, target=None, losses: list | None = None, **kwargs):
         self._step_microbatches(args_split, kwargs_split, targets_split, losses)
 
         # Return merged results per original format
-        if self._stage.is_last:
-            return self._merge_outputs(self._stage.output_chunks)
-        else:
-            return None
+        if return_output:
+            if self._stage.is_last:
+                return self._merge_outputs(self._stage.output_chunks)
+        return None
 
 
 def _batch_p2p(p2p_ops: list[dist.P2POp], desc: str | None = None):
@@ -879,7 +893,14 @@ def _initialize_stages(self, args: tuple[Any, ...], kwargs, labels):
                     )
         self._stages_initialized = True
 
-    def step(self, *args, target=None, losses: list | None = None, **kwargs):
+    def step(
+        self,
+        *args,
+        target=None,
+        losses: list | None = None,
+        return_output: bool = False,
+        **kwargs,
+    ):
         """
         Run one iteration of the pipeline schedule with *whole-batch* input.
         Will chunk the input into microbatches automatically, and go through the
@@ -906,9 +927,10 @@ def step(self, *args, target=None, losses: list | None = None, **kwargs):
         self._step_microbatches(args_split, kwargs_split, targets_split, losses)
 
         # Return merged results per original format
-        for stage in self._stages:
-            if stage.is_last:
-                return self._merge_outputs(stage.output_chunks)
+        if return_output:
+            for stage in self._stages:
+                if stage.is_last:
+                    return self._merge_outputs(stage.output_chunks)
         # Does not contain the last stage
         return None