-
Notifications
You must be signed in to change notification settings - Fork 7k
[PR 1/6] Collective in Ray #12637
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PR 1/6] Collective in Ray #12637
Changes from all commits
18ea0e2
bf1051c
3c5628a
0714c4a
20df179
5267df1
8ff63ad
88fbea1
1e66354
c41f046
912bd0f
5db388f
bd91da9
d971237
3f2f86b
135b9ec
03e49e7
ec02002
5588322
49e59a3
be40e84
0133c6a
cbeaafe
893142d
ec1c07a
8f15ba4
5b40ec3
c76a645
793830c
f8587df
d7e4aee
cd62a50
bdb90de
63973ec
e027891
4136fa9
970f99a
6b46e54
658bbd3
175cabe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| from .collective import nccl_available, mpi_available, is_group_initialized, \ | ||
| init_collective_group, destroy_collective_group, get_rank, \ | ||
| get_world_size, allreduce, barrier | ||
|
|
||
| __all__ = [ | ||
| "nccl_available", "mpi_available", "is_group_initialized", | ||
| "init_collective_group", "destroy_collective_group", "get_rank", | ||
| "get_world_size", "allreduce", "barrier" | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,275 @@ | ||
| """APIs exposed under the namespace ray.util.collective.""" | ||
| import logging | ||
|
|
||
| import numpy as np | ||
| import ray | ||
| from ray.util.collective import types | ||
| from ray.util.collective.const import get_nccl_store_name | ||
|
|
||
| _MPI_AVAILABLE = False | ||
| _NCCL_AVAILABLE = True | ||
|
|
||
| # try: | ||
| # from ray.util.collective.collective_group.mpi_collective_group \ | ||
| # import MPIGroup | ||
| # except ImportError: | ||
| # _MPI_AVAILABLE = False | ||
| try: | ||
| from ray.util.collective.collective_group import NCCLGroup | ||
| from ray.util.collective.collective_group import nccl_util | ||
| except ImportError: | ||
| _NCCL_AVAILABLE = False | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def nccl_available(): | ||
| return _NCCL_AVAILABLE | ||
|
|
||
|
|
||
| def mpi_available(): | ||
| return _MPI_AVAILABLE | ||
|
|
||
|
|
||
| class GroupManager(object): | ||
| """ | ||
| Use this class to manage the collective groups we created so far. | ||
|
|
||
| Each process will have an instance of `GroupManager`. Each process | ||
| could belong to multiple collective groups. The membership information | ||
| and other metadata are stored in the global `_group_mgr` object. | ||
| """ | ||
|
|
||
| def __init__(self): | ||
| self._name_group_map = {} | ||
| self._group_name_map = {} | ||
|
|
||
| def create_collective_group(self, backend, world_size, rank, group_name): | ||
| """ | ||
| The entry to create new collective groups and register in the manager. | ||
|
|
||
| Put the registration and the group information into the manager | ||
| metadata as well. | ||
| """ | ||
| backend = types.Backend(backend) | ||
| if backend == types.Backend.MPI: | ||
| raise NotImplementedError() | ||
| elif backend == types.Backend.NCCL: | ||
| # create the ncclUniqueID | ||
| if rank == 0: | ||
| # availability has been checked before entering here. | ||
| group_uid = nccl_util.get_nccl_unique_id() | ||
| store_name = get_nccl_store_name(group_name) | ||
| # Avoid a potential circular dependency in ray/actor.py | ||
| from ray.util.collective.util import NCCLUniqueIDStore | ||
| store = NCCLUniqueIDStore.options( | ||
| name=store_name, lifetime="detached").remote(store_name) | ||
zhisbug marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ray.wait([store.set_id.remote(group_uid)]) | ||
|
|
||
| logger.debug("creating NCCL group: '{}'".format(group_name)) | ||
| g = NCCLGroup(world_size, rank, group_name) | ||
| self._name_group_map[group_name] = g | ||
| self._group_name_map[g] = group_name | ||
| return self._name_group_map[group_name] | ||
|
|
||
| def is_group_exist(self, group_name): | ||
| return group_name in self._name_group_map | ||
|
|
||
| def get_group_by_name(self, group_name): | ||
| """Get the collective group handle by its name.""" | ||
| if not self.is_group_exist(group_name): | ||
| logger.warning( | ||
| "The group '{}' is not initialized.".format(group_name)) | ||
| return None | ||
| return self._name_group_map[group_name] | ||
|
|
||
| def destroy_collective_group(self, group_name): | ||
| """Group destructor.""" | ||
| if not self.is_group_exist(group_name): | ||
| logger.warning("The group '{}' does not exist.".format(group_name)) | ||
| return | ||
|
|
||
| # release the collective group resource | ||
| g = self._name_group_map[group_name] | ||
| rank = g.rank | ||
| backend = g.backend() | ||
|
|
||
| # clean up the dicts | ||
| del self._group_name_map[g] | ||
| del self._name_group_map[group_name] | ||
| if backend == types.Backend.NCCL: | ||
| # release the named actor | ||
| if rank == 0: | ||
| store_name = get_nccl_store_name(group_name) | ||
| store = ray.get_actor(store_name) | ||
| ray.wait([store.__ray_terminate__.remote()]) | ||
| ray.kill(store) | ||
| # Release the communicator resources | ||
| g.destroy_group() | ||
|
|
||
|
|
||
| _group_mgr = GroupManager() | ||
|
|
||
|
|
||
| def is_group_initialized(group_name): | ||
| """Check if the group is initialized in this process by the group name.""" | ||
| return _group_mgr.is_group_exist(group_name) | ||
|
|
||
|
|
||
| def init_collective_group(world_size: int, | ||
| rank: int, | ||
| backend=types.Backend.NCCL, | ||
| group_name: str = "default"): | ||
| """ | ||
| Initialize a collective group inside an actor process. | ||
|
|
||
| Args: | ||
| world_size (int): the total number of processed in the group. | ||
| rank (int): the rank of the current process. | ||
| backend: the CCL backend to use, NCCL or MPI. | ||
| group_name (str): the name of the collective group. | ||
|
|
||
| Returns: | ||
| None | ||
| """ | ||
| _check_inside_actor() | ||
| backend = types.Backend(backend) | ||
| _check_backend_availability(backend) | ||
| global _group_mgr | ||
| # TODO(Hao): implement a group auto-counter. | ||
| if not group_name: | ||
| raise ValueError("group_name '{}' needs to be a string." | ||
| .format(group_name)) | ||
|
|
||
| if _group_mgr.is_group_exist(group_name): | ||
| raise RuntimeError("Trying to initialize a group twice.") | ||
|
|
||
| assert (world_size > 0) | ||
| assert (rank >= 0) | ||
| assert (rank < world_size) | ||
| _group_mgr.create_collective_group(backend, world_size, rank, group_name) | ||
|
|
||
|
|
||
| def destroy_collective_group(group_name: str = "default") -> None: | ||
| """Destroy a collective group given its group name.""" | ||
| _check_inside_actor() | ||
| global _group_mgr | ||
| _group_mgr.destroy_collective_group(group_name) | ||
|
|
||
|
|
||
| def get_rank(group_name: str = "default") -> int: | ||
| """ | ||
| Return the rank of this process in the given group. | ||
|
|
||
| Args: | ||
| group_name (str): the name of the group to query | ||
|
|
||
| Returns: | ||
| the rank of this process in the named group, | ||
| -1 if the group does not exist or the process does | ||
| not belong to the group. | ||
| """ | ||
| _check_inside_actor() | ||
| if not is_group_initialized(group_name): | ||
| return -1 | ||
| g = _group_mgr.get_group_by_name(group_name) | ||
| return g.rank | ||
|
|
||
|
|
||
| def get_world_size(group_name="default") -> int: | ||
| """ | ||
| Return the size of the collective gropu with the given name. | ||
|
|
||
| Args: | ||
| group_name: the name of the group to query | ||
|
|
||
| Returns: | ||
| The world size of the collective group, | ||
| -1 if the group does not exist or the process does | ||
| not belong to the group. | ||
| """ | ||
| _check_inside_actor() | ||
| if not is_group_initialized(group_name): | ||
| return -1 | ||
| g = _group_mgr.get_group_by_name(group_name) | ||
| return g.world_size | ||
|
|
||
|
|
||
| def allreduce(tensor, group_name: str, op=types.ReduceOp.SUM): | ||
| """ | ||
| Collective allreduce the tensor across the group with name group_name. | ||
|
|
||
| Args: | ||
| tensor: the tensor to be all-reduced on this process. | ||
| group_name (str): the collective group name to perform allreduce. | ||
| op: The reduce operation. | ||
|
|
||
| Returns: | ||
| None | ||
| """ | ||
| _check_single_tensor_input(tensor) | ||
| g = _check_and_get_group(group_name) | ||
| opts = types.AllReduceOptions | ||
| opts.reduceOp = op | ||
|
Comment on lines
+212
to
+213
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't this set the global variable? can we instead create an instance?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an exposed user API: it does not write; It only reads from the global variable
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't anyways, i think this is a nit :) |
||
| g.allreduce(tensor, opts) | ||
|
|
||
|
|
||
| def barrier(group_name): | ||
| """ | ||
| Barrier all processes in the collective group. | ||
|
|
||
| Args: | ||
| group_name (str): the name of the group to barrier. | ||
|
|
||
| Returns: | ||
| None | ||
| """ | ||
| g = _check_and_get_group(group_name) | ||
| g.barrier() | ||
|
|
||
|
|
||
| def _check_and_get_group(group_name): | ||
| """Check the existence and return the group handle.""" | ||
| _check_inside_actor() | ||
| if not is_group_initialized(group_name): | ||
| raise RuntimeError("The collective group '{}' is not " | ||
| "initialized in the process.".format(group_name)) | ||
| g = _group_mgr.get_group_by_name(group_name) | ||
| return g | ||
|
|
||
|
|
||
| def _check_backend_availability(backend: types.Backend): | ||
| """Check whether the backend is available.""" | ||
| if backend == types.Backend.MPI: | ||
| if not mpi_available(): | ||
| raise RuntimeError("MPI is not available.") | ||
| elif backend == types.Backend.NCCL: | ||
| # expect some slowdown at the first call | ||
| # as I defer the import to invocation. | ||
| if not nccl_available(): | ||
| raise RuntimeError("NCCL is not available.") | ||
|
|
||
|
|
||
| def _check_single_tensor_input(tensor): | ||
| """Check if the tensor is with a supported type.""" | ||
| if isinstance(tensor, np.ndarray): | ||
| return | ||
| if types.cupy_available(): | ||
| if isinstance(tensor, types.cp.ndarray): | ||
| return | ||
| if types.torch_available(): | ||
| if isinstance(tensor, types.th.Tensor): | ||
| return | ||
| raise RuntimeError("Unrecognized tensor type '{}'. Supported types are: " | ||
| "np.ndarray, torch.Tensor, cupy.ndarray.".format( | ||
| type(tensor))) | ||
|
|
||
|
|
||
| def _check_inside_actor(): | ||
| """Check if currently it is inside a Ray actor/task.""" | ||
| worker = ray.worker.global_worker | ||
| if worker.mode == ray.WORKER_MODE: | ||
| return | ||
| else: | ||
| raise RuntimeError("The collective APIs shall be only used inside " | ||
| "a Ray actor or task.") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| from .nccl_collective_group import NCCLGroup | ||
|
|
||
| __all__ = ["NCCLGroup"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| """Abstract class for collective groups.""" | ||
| from abc import ABCMeta | ||
| from abc import abstractmethod | ||
|
|
||
| from ray.util.collective.types import AllReduceOptions, BarrierOptions | ||
|
|
||
|
|
||
| class BaseGroup(metaclass=ABCMeta): | ||
| def __init__(self, world_size, rank, group_name): | ||
| """ | ||
| Init the process group with basic information. | ||
|
|
||
| Args: | ||
| world_size (int): The total number of processes in the group. | ||
| rank (int): The rank of the current process. | ||
| group_name (str): The group name. | ||
| """ | ||
| self._world_size = world_size | ||
| self._rank = rank | ||
| self._group_name = group_name | ||
|
|
||
| @property | ||
| def rank(self): | ||
| """Return the rank of the current process.""" | ||
| return self._rank | ||
|
|
||
| @property | ||
| def world_size(self): | ||
| """Return the number of processes in this group.""" | ||
| return self._world_size | ||
|
|
||
| @property | ||
| def group_name(self): | ||
| """Return the group name of this group.""" | ||
| return self._group_name | ||
|
|
||
| def destroy_group(self): | ||
| """GC the communicators.""" | ||
| pass | ||
|
|
||
| @classmethod | ||
| def backend(cls): | ||
| """The backend of this collective group.""" | ||
| raise NotImplementedError() | ||
|
|
||
| @abstractmethod | ||
| def allreduce(self, tensor, allreduce_options=AllReduceOptions()): | ||
| raise NotImplementedError() | ||
|
|
||
| @abstractmethod | ||
| def barrier(self, barrier_options=BarrierOptions()): | ||
| raise NotImplementedError() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| """Implementation of the MPI collective group.""" | ||
| try: | ||
| import mpi4py # noqa: F401 | ||
| except ImportError: | ||
| raise |
Uh oh!
There was an error while loading. Please reload this page.