diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
new file mode 100644
index 00000000000..ff07c07fc39
--- /dev/null
+++ b/.github/workflows/doc.yml
@@ -0,0 +1,52 @@
+name: doc_test
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  push:
+    branches:
+      - main
+      - v0.*
+  pull_request:
+    branches:
+      - main
+      - v0.*
+    paths:
+      - "**/*.py"
+      - "docs/**"
+      - .github/workflows/doc.yml
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+  contents: read      # for checkout
+  pages: write        # for deploy-pages
+  id-token: write     # for deploy-pages
+
+jobs:
+  doc_test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 5 # Increase this timeout value as needed
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install the current repository
+        run: |
+          pip install -e .[test]
+          pip install -r docs/requirements-docs.txt
+
+      - name: Run doc make html
+        run: |
+          cd docs 
+          make clean
+          make html
diff --git a/docs/data.rst b/docs/api/data.rst
similarity index 100%
rename from docs/data.rst
rename to docs/api/data.rst
diff --git a/docs/api/single_controller.rst b/docs/api/single_controller.rst
new file mode 100644
index 00000000000..f10b6521c87
--- /dev/null
+++ b/docs/api/single_controller.rst
@@ -0,0 +1,26 @@
+Single Controller interface
+============================
+
+The Single Controller provides a unified interface for managing distributed workers
+using Ray or other backends and executing functions across them.
+It simplifies the process of dispatching tasks and collecting results, particularly 
+when dealing with data parallelism or model parallelism. 
+
+
+Core APIs
+~~~~~~~~~~~~~~~~~
+
+.. autoclass:: verl.single_controller.Worker
+   :members: __init__, __new__, get_master_addr_port, get_cuda_visible_devices, world_size, rank
+
+.. autoclass:: verl.single_controller.WorkerGroup
+   :members: __init__,  world_size
+
+.. autoclass:: verl.single_controller.ClassWithInitArgs
+   :members: __init__, __call__
+
+.. autoclass:: verl.single_controller.ResourcePool
+   :members: __init__, world_size, local_world_size_list, local_rank_list
+
+.. automodule:: verl.single_controller.ray
+   :members: RayWorkerGroup, create_colocated_worker_cls
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 041730a2a25..9f456bebf1e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -106,8 +106,9 @@ verl is fast with:
    :maxdepth: 1
    :caption: API References
 
-   data
+   api/data
    api/utils
+   api/single_controller.rst
 
 
 .. toctree::
diff --git a/docs/start/install.rst b/docs/start/install.rst
index 81cae89d089..0b6f415b87e 100644
--- a/docs/start/install.rst
+++ b/docs/start/install.rst
@@ -214,11 +214,10 @@ Install with AMD GPUs - ROCM kernel support
 ------------------------------------------------------------------
 
 When you run on AMD GPUs (MI300) with ROCM platform, you cannot use the previous quickstart to run verl. You should follow the following steps to build a docker and run it. 
-
 If you encounter any issues in using AMD GPUs running verl, feel free to contact me - `Yusheng Su <https://yushengsu-thu.github.io/>`_.
 
 Find the docker for AMD ROCm: `docker/Dockerfile.rocm <https://github.com/volcengine/verl/blob/main/docker/Dockerfile.rocm>`_
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 
 .. code-block:: bash
 
@@ -267,15 +266,15 @@ Find the docker for AMD ROCm: `docker/Dockerfile.rocm <https://github.com/volcen
         pybind11 && \
         pip install -e . --no-deps
 
-Build the image:
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Build the image
+::::::::::::::::::::::::
 
 .. code-block:: bash
 
     docker build -t verl-rocm .
 
 Launch the container
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+::::::::::::::::::::::::::::
 
 .. code-block:: bash
 
diff --git a/docs/workers/sglang_worker.rst b/docs/workers/sglang_worker.rst
index 8cbdab9a842..08d343b22e2 100644
--- a/docs/workers/sglang_worker.rst
+++ b/docs/workers/sglang_worker.rst
@@ -19,6 +19,7 @@ Installation
 Please always follow the following command to install SGLang with verl. 
 
 .. code-block:: bash
+    
     pip install --upgrade pip
     # Currently 0.4.6.post1, subject to updates at any time, please refer to the latest version specified in `setup.py`
     pip install -e ".[sglang]"
diff --git a/verl/single_controller/base/decorator.py b/verl/single_controller/base/decorator.py
index 8ba5206bd16..a8b48b0dea5 100644
--- a/verl/single_controller/base/decorator.py
+++ b/verl/single_controller/base/decorator.py
@@ -25,6 +25,13 @@
 
 
 class Dispatch(DynamicEnum):
+    """Enum class defining different dispatch modes for distributed computation.
+
+    Each mode represents a specific strategy for distributing data across
+    different ranks in a distributed system. The modes are used to control
+    how data is partitioned and processed across different worker groups.
+    """
+
     _registry = {}
     _next_value = 0
 
@@ -47,6 +54,12 @@ def init_predefined_dispatch_mode():
 
 
 class Execute(DynamicEnum):
+    """Enum class defining different execution modes for distributed computation.
+
+    These modes control how a function should be executed across different ranks
+    in a distributed system.
+    """
+
     _registry = {}
     _next_value = 0
 
@@ -490,6 +503,26 @@ def _materialize_futures(*args, **kwargs):
 
 
 def register(dispatch_mode=Dispatch.ALL_TO_ALL, execute_mode=Execute.ALL, blocking=True, materialize_futures=True):
+    """Register a function with distributed execution configuration.
+
+    This decorator registers a function with specific dispatch and execution modes
+    for distributed computation. It handles both synchronous and asynchronous
+    functions, and optionally materializes futures before execution.
+
+    Args:
+        dispatch_mode:
+            Dispatch mode for computation distribution. Default: Dispatch.ALL_TO_ALL.
+        execute_mode:
+            Execute mode for computation distribution. Default: Execute.ALL.
+        blocking:
+            Whether the execution should be blocking. Defaults to True.
+        materialize_futures:
+            Whether to materialize the data before dispatching. Defaults to True.
+
+    Returns:
+        A decorator that wraps the original function with distributed execution
+        configuration.
+    """
     _check_dispatch_mode(dispatch_mode=dispatch_mode)
     _check_execute_mode(execute_mode=execute_mode)
 
diff --git a/verl/single_controller/base/worker.py b/verl/single_controller/base/worker.py
index 24601884753..5305fd6fb38 100644
--- a/verl/single_controller/base/worker.py
+++ b/verl/single_controller/base/worker.py
@@ -73,11 +73,17 @@ def _get_pid(self):
 
 # we assume that in each WorkerGroup, there is a Master Worker
 class Worker(WorkerHelper):
-    """A (distributed) worker."""
+    """A distributed worker that handles initialization and configuration for distributed training.
+
+    This class manages worker initialization, configuration, and provides methods for executing
+    distributed operations. It handles communication settings, device configuration, and worker
+    metadata management.
+    """
 
     fused_worker_attr_name = "fused_worker_dict"
 
     def __new__(cls, *args, **kwargs):
+        """Create a new Worker instance with proper initialization based on environment settings."""
         instance = super().__new__(cls)
 
         # note that here we use int to distinguish
@@ -95,6 +101,14 @@ def __new__(cls, *args, **kwargs):
         return instance
 
     def _configure_before_init(self, register_center_name: str, rank: int):
+        """Configure worker settings before initialization.
+
+        Args:
+            register_center_name (str):
+                Name of the register center Ray actor for worker coordination
+            rank (int):
+                Rank of the worker in the distributed setup
+        """
         assert isinstance(rank, int), f"rank must be int, instead of {type(rank)}"
 
         if rank == 0:
@@ -122,6 +136,12 @@ def env_keys(cls):
         return ["WORLD_SIZE", "RANK", "LOCAL_WORLD_SIZE", "LOCAL_RANK", "MASTER_ADDR", "MASTER_PORT", "CUDA_VISIBLE_DEVICES"]
 
     def __init__(self, cuda_visible_devices=None) -> None:
+        """Initialize the worker with environment settings and device configuration.
+
+        Args:
+            cuda_visible_devices (str, optional):
+                CUDA visible devices configuration. Defaults to None.
+        """
         # construct a meta from environment variable. Note that the import must be inside the class because it is executed remotely
         import os
 
@@ -175,6 +195,12 @@ def __init__(self, cuda_visible_devices=None) -> None:
         self.fused_worker_dict = {}
 
     def get_fused_worker_by_name(self, worker_name: str):
+        """Get a fused worker by its name.
+
+        Args:
+            worker_name (str):
+                Name of the worker to retrieve
+        """
         return self.fused_worker_dict.get(worker_name, None)
 
     def _configure_with_store(self, store: Dict):
@@ -192,9 +218,11 @@ def _configure_with_store(self, store: Dict):
         os.environ["REDIS_STORE_SERVER_HOST"] = str(self._master_addr).replace("[", "").replace("]", "") if self._master_addr else ""
 
     def get_master_addr_port(self):
+        """Get the master address and port for distributed communication."""
         return self._master_addr, self._master_port
 
     def get_cuda_visible_devices(self):
+        """Get the CUDA visible devices configuration."""
         import os
 
         cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "not set")
@@ -202,18 +230,40 @@ def get_cuda_visible_devices(self):
 
     @property
     def world_size(self):
+        """Get the total number of workers in the distributed setup."""
         return self._world_size
 
     @property
     def rank(self):
+        """Get the rank of this worker in the distributed setup."""
         return self._rank
 
     @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO_WITH_FUNC)
     def execute_with_func_generator(self, func, *args, **kwargs):
+        """Execute a function with function generator dispatch mode.
+
+        Args:
+            func:
+                Function to execute
+            *args:
+                Positional arguments for the function
+            **kwargs:
+                Keyword arguments for the function
+        """
         ret_proto = func(self, *args, **kwargs)
         return ret_proto
 
     @register(dispatch_mode=Dispatch.ALL_TO_ALL, execute_mode=Execute.RANK_ZERO)
     def execute_func_rank_zero(self, func, *args, **kwargs):
+        """Execute a function in rank zero execution mode.
+
+        Args:
+            func:
+                Function to execute
+            *args:
+                Positional arguments for the function
+            **kwargs:
+                Keyword arguments for the function
+        """
         result = func(*args, **kwargs)
         return result
diff --git a/verl/single_controller/base/worker_group.py b/verl/single_controller/base/worker_group.py
index d7761c40613..04c4f15bede 100644
--- a/verl/single_controller/base/worker_group.py
+++ b/verl/single_controller/base/worker_group.py
@@ -25,9 +25,20 @@
 
 
 class ResourcePool:
-    """The resource pool with meta info such as world_size."""
+    """
+    Manages a pool of resources across multiple nodes, tracking process counts and GPU allocations.
+    The class provides methods to calculate world size, local world sizes, and local ranks
+    across all nodes in the pool.
+    """
 
     def __init__(self, process_on_nodes=None, max_colocate_count: int = 10, n_gpus_per_node=8) -> None:
+        """Initialize the ResourcePool with node processes and GPU configuration.
+
+        Args:
+            process_on_nodes (List[int], optional): List of process counts per node. Defaults to empty list.
+            max_colocate_count (int, optional): Maximum number of processes that can be colocated. Defaults to 10.
+            n_gpus_per_node (int, optional): Number of GPUs available per node. Defaults to 8.
+        """
         if process_on_nodes is None:
             process_on_nodes = []
         self._store = process_on_nodes
@@ -39,6 +50,7 @@ def add_node(self, process_count):
 
     @property
     def world_size(self):
+        """Total number of processes across all nodes in the pool."""
         return sum(self._store)
 
     def __call__(self) -> Any:
@@ -49,38 +61,53 @@ def store(self):
         return self._store
 
     def local_world_size_list(self) -> List[int]:
+        """Returns a flat list where each process has its local world size."""
         nested_local_world_size_list = [[local_world_size for _ in range(local_world_size)] for local_world_size in self._store]
         return [item for row in nested_local_world_size_list for item in row]
 
     def local_rank_list(self) -> List[int]:
+        """Returns a flat list of local ranks for all processes across all nodes."""
         nested_local_rank_list = [[i for i in range(local_world_size)] for local_world_size in self._store]
         return [item for row in nested_local_rank_list for item in row]
 
 
 class ClassWithInitArgs:
     """
-    This class stores a class constructor and the args/kwargs to construct the class.
-    It is used to instantiate the remote class.
+    Wrapper class that stores constructor arguments for deferred instantiation.
+    This class is particularly useful for remote class instantiation where
+    the actual construction needs to happen at a different time or location.
     """
 
     def __init__(self, cls, *args, **kwargs) -> None:
+        """Initialize the ClassWithInitArgs instance.
+
+        Args:
+            cls: The class to be instantiated later
+            *args: Positional arguments for the class constructor
+            **kwargs: Keyword arguments for the class constructor
+        """
         self.cls = cls
         self.args = args
         self.kwargs = kwargs
 
         self.fused_worker_used = False
 
-    # def add_arg(self, arg):
-    #     self.args += (arg,)
-
-    # def add_kwarg(self, key, value):
-    #     self.kwargs[key] = value
-
     def __call__(self) -> Any:
+        """Instantiate the stored class with the stored arguments."""
         return self.cls(*self.args, **self.kwargs)
 
 
 def check_workers_alive(workers: List, is_alive: Callable, gap_time: float = 1) -> None:
+    """Continuously monitors worker processes and raises SIGABRT if any worker dies.
+
+    Args:
+        workers (List):
+            List of worker objects to monitor
+        is_alive (Callable):
+            Function to check if a worker is alive
+        gap_time (float):
+            Time interval between checks
+    """
     import time
 
     while True:
@@ -92,7 +119,10 @@ def check_workers_alive(workers: List, is_alive: Callable, gap_time: float = 1)
 
 
 class WorkerGroup:
-    """A group of workers"""
+    """
+    Base class for managing a group of workers in a distributed system.
+    The class provides methods for worker management, aliveness checking, and method binding.
+    """
 
     fused_worker_execute_fn_name = "_fuw_execute"
 
@@ -116,9 +146,11 @@ def __init__(self, resource_pool: ResourcePool, **kwargs) -> None:
         self._checker_thread: threading.Thread = None
 
     def _is_worker_alive(self, worker):
+        """Check if a worker is alive. Must be implemented by derived classes."""
         raise NotImplementedError("WorkerGroup._is_worker_alive called, should be implemented in derived class.")
 
     def _block_until_all_workers_alive(self) -> None:
+        """Blocks until all workers in the group are alive."""
         while True:
             all_state = [self._is_worker_alive(worker) for worker in self._workers]
             if False in all_state:
@@ -127,6 +159,11 @@ def _block_until_all_workers_alive(self) -> None:
                 break
 
     def start_worker_aliveness_check(self, every_n_seconds=1) -> None:
+        """Starts a background thread to monitor worker aliveness.
+
+        Args:
+            every_n_seconds (int): Interval between aliveness checks
+        """
         # before starting checking worker aliveness, make sure all workers are already alive
         self._block_until_all_workers_alive()
 
@@ -135,16 +172,19 @@ def start_worker_aliveness_check(self, every_n_seconds=1) -> None:
 
     @property
     def world_size(self):
+        """Number of workers in the group."""
         return len(self._workers)
 
-    # execute_all_async and execute_rank_zero_async should be implemented by RayWorkerGroup, TorchRPCWorkerGroup,
-    # MegatronWorkerGroup, XperfWorkerGroup should skip
-
     def _bind_worker_method(self, user_defined_cls, func_generator):
-        """
-        Bind the worker method to the WorkerGroup
-        """
+        """Binds worker methods to the WorkerGroup based on registered attributes.
 
+        Args:
+            user_defined_cls (type): The class containing methods to bind
+            func_generator (Callable): Function that generates the bound method
+
+        Returns:
+            List[str]: List of method names that were successfully bound
+        """
         method_names = []
         for method_name in dir(user_defined_cls):
             try:
diff --git a/verl/single_controller/ray/base.py b/verl/single_controller/ray/base.py
index 1941ea2f353..c0822e3cf27 100644
--- a/verl/single_controller/ray/base.py
+++ b/verl/single_controller/ray/base.py
@@ -145,6 +145,13 @@ def merge_resource_pool(rp1: RayResourcePool, rp2: RayResourcePool) -> RayResour
 
 
 class RayClassWithInitArgs(ClassWithInitArgs):
+    """A wrapper class for Ray actors with initialization arguments.
+
+    This class extends ClassWithInitArgs to provide additional functionality for
+    configuring and creating Ray actors with specific resource requirements and
+    scheduling strategies.
+    """
+
     def __init__(self, cls, *args, **kwargs) -> None:
         # self._options = kwargs.pop('options', dict())
         super().__init__(cls, *args, **kwargs)
@@ -152,12 +159,34 @@ def __init__(self, cls, *args, **kwargs) -> None:
         self._additional_resource = {}
 
     def set_additional_resource(self, additional_resource):
+        """Set additional resource requirements for the actor.
+
+        Args:
+            additional_resource: Dictionary specifying additional resource requirements
+        """
         self._additional_resource = additional_resource
 
     def update_options(self, options: Dict):
+        """Update the Ray actor creation options.
+
+        Args:
+            options: Dictionary of options to update
+        """
         self._options.update(options)
 
     def __call__(self, placement_group, placement_group_bundle_idx, use_gpu: bool = True, num_gpus=1, sharing_with=None) -> Any:
+        """Create and return a Ray actor with the configured options.
+
+        Args:
+            placement_group: Ray placement group for scheduling
+            placement_group_bundle_idx: Index of the bundle in the placement group
+            use_gpu: Whether to use GPU resources
+            num_gpus: Number of GPUs to allocate
+            sharing_with: Actor to share resources with
+
+        Returns:
+            A Ray actor handle with the configured options
+        """
         if sharing_with is not None:
             target_node_id = ray.get(sharing_with.get_node_id.remote())
             cuda_visible_devices = ray.get(sharing_with.get_cuda_visible_devices.remote())
@@ -181,6 +210,13 @@ def __call__(self, placement_group, placement_group_bundle_idx, use_gpu: bool =
 
 
 class RayWorkerGroup(WorkerGroup):
+    """A group of Ray workers that can be managed collectively.
+
+    This class extends WorkerGroup to provide Ray-specific functionality for
+    creating and managing groups of Ray actors with specific resource requirements
+    and scheduling strategies.
+    """
+
     def __init__(
         self,
         resource_pool: RayResourcePool = None,
@@ -193,6 +229,18 @@ def __init__(
         ray_wait_register_center_timeout: int = 300,
         **kwargs,
     ) -> None:
+        """Initialize a RayWorkerGroup.
+
+        Args:
+            resource_pool: Resource pool for worker allocation
+            ray_cls_with_init: Class with initialization arguments for workers
+            bin_pack: Whether to use strict bin packing for resource allocation
+            name_prefix: Prefix for worker names
+            detached: Whether workers should be detached
+            worker_names: Names of existing workers to attach to
+            ray_wait_register_center_timeout: Timeout for waiting on register center
+            **kwargs: Additional keyword arguments
+        """
         super().__init__(resource_pool=resource_pool, **kwargs)
         self.ray_cls_with_init = ray_cls_with_init
         self.name_prefix = get_random_string(length=6) if name_prefix is None else name_prefix
@@ -218,6 +266,14 @@ def __init__(
         self.method_names = []
 
     def _is_worker_alive(self, worker: ray.actor.ActorHandle):
+        """Check if a worker actor is still alive.
+
+        Args:
+            worker: Ray actor handle to check
+
+        Returns:
+            bool: True if the worker is alive, False otherwise
+        """
         worker_state_dict = get_actor(worker._actor_id.hex())
         return worker_state_dict.get("state", "undefined") == "ALIVE" if worker_state_dict is not None else False
 
@@ -231,6 +287,14 @@ def _init_with_detached_workers(self, worker_names, worker_handles):
         self._world_size = len(worker_names)
 
     def _init_with_resource_pool(self, resource_pool, ray_cls_with_init, bin_pack, detached):
+        """Initialize the worker group by creating new workers from a resource pool.
+
+        Args:
+            resource_pool: Resource pool for worker allocation
+            ray_cls_with_init: Class with initialization arguments for workers
+            bin_pack: Whether to use strict bin packing for resource allocation
+            detached: Whether workers should be detached
+        """
         use_gpu = resource_pool.use_gpu
 
         strategy = "PACK"
@@ -327,21 +391,32 @@ def from_detached(
         worker_handles=None,
         ray_cls_with_init=None,
     ):
+        """Create a worker group from existing detached workers.
+
+        Args:
+            name_prefix: Prefix for worker names
+            worker_names: Names of existing workers to attach to
+            ray_cls_with_init: Class with initialization arguments for workers
+
+        Returns:
+            A new RayWorkerGroup instance
+        """
         worker_group = cls(resource_pool=None, ray_cls_with_init=ray_cls_with_init, name_prefix=name_prefix, worker_names=worker_names, worker_handles=worker_handles)
         return worker_group
 
     def spawn(self, prefix_set):
-        """
-        spawn to a dictionary of worker groups, each with a subset of method with prefix.
+        """Spawn to a dictionary of worker groups, each with a subset of method with prefix.
+
+        Args:
+            prefix_set: Set of prefixes to create worker groups for
 
+        Returns:
+            Dictionary of worker groups keyed by prefix
         """
         if self.fused_worker_used:
             return self.spawn_fused(prefix_set)
 
         def _rebind_actor_methods(worker_group, actor_name):
-            """
-            bind the method with actor_prefix to its original name
-            """
             prefix: str = actor_name + "_"
             for method_name in dir(worker_group):
                 if method_name.startswith(prefix):
@@ -364,6 +439,14 @@ def _rebind_actor_methods(worker_group, actor_name):
         return new_worker_group_dict
 
     def spawn_fused(self, prefix_set):
+        """Create a dictionary of worker groups for fused workers.
+
+        Args:
+            prefix_set: Set of prefixes to create worker groups for
+
+        Returns:
+            Dictionary of worker groups keyed by prefix
+        """
         wg_dict = dict()
         for key in prefix_set:
             new_wg = deepcopy(self)
@@ -373,6 +456,11 @@ def spawn_fused(self, prefix_set):
         return wg_dict
 
     def fuse(self, prefix_set):
+        """Fuse multiple worker groups into the current worker group.
+
+        Args:
+            prefix_set: Set of prefixes to fuse into the worker group
+        """
         if self.wg_dict is None:
             self.wg_dict = self.spawn(prefix_set)
         for role_name, role_wg in self.wg_dict.items():
@@ -380,6 +468,17 @@ def fuse(self, prefix_set):
         self.method_names = self._bind_worker_method(self.ray_cls_with_init.cls, func_generator)
 
     def _execute_remote_single_worker(self, worker, method_name: str, *args, **kwargs):
+        """Execute a method on a single worker remotely.
+
+        Args:
+            worker: The worker actor handle
+            method_name: Name of the method to execute
+            *args: Positional arguments for the method
+            **kwargs: Keyword arguments for the method
+
+        Returns:
+            Remote object reference to the method execution
+        """
         if self.fused_worker_used and method_name not in self.method_names:
             remote_call = getattr(worker, self.fused_worker_execute_fn_name)
             return remote_call.remote(f"{self.sub_cls_name}_fwmn_{method_name}", *args, **kwargs)
@@ -388,21 +487,81 @@ def _execute_remote_single_worker(self, worker, method_name: str, *args, **kwarg
         return remote_call.remote(*args, **kwargs)
 
     def execute_rank_zero_sync(self, method_name: str, *args, **kwargs):
+        """Execute a method on rank zero worker synchronously.
+
+        Args:
+            method_name: Name of the method to execute
+            *args: Positional arguments for the method
+            **kwargs: Keyword arguments for the method
+
+        Returns:
+            Result of the method execution
+        """
         return ray.get(self.execute_rank_zero_async(method_name, *args, **kwargs))
 
     def execute_rank_zero_async(self, method_name: str, *args, **kwargs):
+        """Execute a method on rank zero worker asynchronously.
+
+        Args:
+            method_name: Name of the method to execute
+            *args: Positional arguments for the method
+            **kwargs: Keyword arguments for the method
+
+        Returns:
+            Remote object reference to the method execution
+        """
         return self._execute_remote_single_worker(self._workers[0], method_name, *args, **kwargs)
 
     def execute_rank_zero(self, method_name: str, *args, **kwargs):
+        """Alias for execute_rank_zero_async.
+
+        Args:
+            method_name: Name of the method to execute
+            *args: Positional arguments for the method
+            **kwargs: Keyword arguments for the method
+
+        Returns:
+            Remote object reference to the method execution
+        """
         return self.execute_rank_zero_async(method_name, *args, **kwargs)
 
     def execute_all(self, method_name: str, *args, **kwargs):
+        """Alias for execute_all_async.
+
+        Args:
+            method_name: Name of the method to execute
+            *args: Positional arguments for the method
+            **kwargs: Keyword arguments for the method
+
+        Returns:
+            List of remote object references to the method executions
+        """
         return self.execute_all_async(method_name, *args, **kwargs)
 
     def execute_all_sync(self, method_name: str, *args, **kwargs):
+        """Execute a method on all workers synchronously.
+
+        Args:
+            method_name: Name of the method to execute
+            *args: Positional arguments for the method
+            **kwargs: Keyword arguments for the method
+
+        Returns:
+            List of results from all workers
+        """
         return ray.get(self.execute_all_async(method_name, *args, **kwargs))
 
     def execute_all_async(self, method_name: str, *args, **kwargs):
+        """Execute a method on all workers asynchronously.
+
+        Args:
+            method_name: Name of the method to execute
+            *args: Positional arguments for the method
+            **kwargs: Keyword arguments for the method
+
+        Returns:
+            List of remote object references to the method executions
+        """
         # Here, we assume that if all arguments in args and kwargs are lists,
         # and their lengths match len(self._workers), we'll distribute each
         # element in these lists to the corresponding worker
diff --git a/verl/single_controller/ray/megatron.py b/verl/single_controller/ray/megatron.py
index 8baf03e6f17..4f56ac1bfab 100644
--- a/verl/single_controller/ray/megatron.py
+++ b/verl/single_controller/ray/megatron.py
@@ -30,6 +30,14 @@ class NVMegatronRayWorkerGroup(RayWorkerGroup, MegatronWorkerGroup):
     """
 
     def __init__(self, resource_pool: RayResourcePool, ray_cls_with_init: RayClassWithInitArgs, **kwargs):
+        """
+        Initialize the NVMegatronRayWorkerGroup.
+
+        Args:
+            resource_pool (RayResourcePool): The resource pool containing worker resources
+            ray_cls_with_init (RayClassWithInitArgs): The Ray class with initialization arguments
+            **kwargs: Additional keyword arguments to pass to the parent class
+        """
         super().__init__(resource_pool=resource_pool, ray_cls_with_init=ray_cls_with_init, **kwargs)
         self._megatron_rank_info: DistRankInfo = self.execute_all_sync(method_name="get_megatron_rank_info")
         self._megatron_global_info: DistGlobalInfo = ray.get(self.execute_rank_zero_async(method_name="get_megatron_global_info"))