Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deepspeed/autotuning/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
thread-N: start each experiment in its own thread
"""

import deepspeed.comm as dist
from deepspeed import comm as dist

from datetime import datetime

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/comm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
from .utils import *
import deepspeed.utils as utils
from deepspeed import utils

supported_torch_version = False

Expand Down
4 changes: 2 additions & 2 deletions deepspeed/comm/comm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
deepspeed.comm API
-- must be kept fully compatible (same signatures) as torch.dist API to ensure backward/cross-framework compatibility.
-- e.g. if a client code used
import deepspeed.comm as dist
from deepspeed import comm as dist

instead of
import torch.distributed as dist
Expand Down Expand Up @@ -49,7 +49,7 @@ class ReduceOp(Enum):
from deepspeed.comm.backend import Backend
from deepspeed.comm.torch import TorchBackend

import deepspeed.utils as utils
from deepspeed import utils
from datetime import timedelta

# Current deepspeed.comm backend (cdb) global object for simple access by client code
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/comm/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import torch

import deepspeed.utils as utils
from deepspeed import utils
from ..constants import TORCH_DISTRIBUTED_DEFAULT_PORT, default_pg_timeout
from datetime import timedelta

Expand Down
4 changes: 2 additions & 2 deletions deepspeed/inference/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import torch
import os

import deepspeed.comm as dist
import deepspeed.utils.groups as groups
from deepspeed import comm as dist
from deepspeed.utils import groups

from torch.nn.modules import Module
from packaging import version as pkg_version
Expand Down
4 changes: 2 additions & 2 deletions deepspeed/moe/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

import torch.nn.init as init
import torch
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.utils import logger, log_dist

import deepspeed.utils.groups as groups
from deepspeed.utils import groups
from .sharded_moe import MOELayer, TopKGate
from .experts import Experts
import copy
Expand Down
4 changes: 2 additions & 2 deletions deepspeed/moe/sharded_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from time import perf_counter
import torch
from torch import Tensor
import deepspeed.comm as dist
from deepspeed import comm as dist
from torch.nn import Module, ModuleList
import torch.nn.functional as F

Expand Down Expand Up @@ -80,7 +80,7 @@ def gumbel_rsample(shape: Tuple, device: torch.device) -> Tensor:
return gumbel(shape)


import deepspeed.comm as dist
from deepspeed import comm as dist

# einsum dimensions: (g)roup, (s)equence, (e)xpert, (m)odel, (c)apacity
# See https://arxiv.org/pdf/2006.16668.pdf for details.
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/moe/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Tuple, Dict
import torch
import deepspeed.utils.groups as groups
from deepspeed.utils import groups
from .layer import MoE


Expand Down
2 changes: 1 addition & 1 deletion deepspeed/ops/transformer/inference/moe_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import torch.nn as nn
from .transformer_inference import DeepSpeedSelfAttention, DeepSpeedInferenceConfig
from ....moe.sharded_moe import TopKGate
import deepspeed.comm as dist
from deepspeed import comm as dist

import torch.nn.functional as F

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import time
from ... import op_builder
import torch.nn as nn
import deepspeed.comm as dist
from deepspeed import comm as dist
# Cuda modules will be imported if needed
inference_cuda_module = None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import copy
import torch
import contextlib
import deepspeed.comm as dist
from deepspeed import comm as dist

import mmap
from torch import _C
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/bf16_optimizer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import torch
import deepspeed.comm as dist
from deepspeed import comm as dist
from deepspeed.runtime.constants import PIPE_REPLICATED
from deepspeed.ops.op_builder import UtilsBuilder
from deepspeed.runtime import ZeROOptimizer
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/comm/coalesced_collectives.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import torch
from torch import Tensor
import deepspeed.comm as dist
from deepspeed import comm as dist
# NOTE: Use torch.distributed's ProcessGroup class until we have our own.
from torch.distributed import ProcessGroup
import torch.nn.functional
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/comm/nccl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
'''

import torch
import deepspeed.comm as dist
from deepspeed import comm as dist
import time
import cupy
import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from .activation_checkpointing.config import DeepSpeedActivationCheckpointingConfig
from ..monitor.config import DeepSpeedMonitorConfig

import deepspeed.comm as dist
from deepspeed import comm as dist

from ..git_version_info import version as __version__
from ..utils import logger
Expand Down
6 changes: 3 additions & 3 deletions deepspeed/runtime/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
from deepspeed.checkpoint.constants import OPTIMIZER_STATE_DICT
from deepspeed.runtime.sparse_tensor import SparseTensor

import deepspeed.runtime.lr_schedules as lr_schedules
import deepspeed.utils.groups as groups
from deepspeed.runtime import lr_schedules
from deepspeed.utils import groups
from deepspeed.runtime.utils import get_grad_norm
from deepspeed.utils import logger, log_dist, instrument_w_nvtx
from deepspeed.comm.comm import init_distributed
Expand Down Expand Up @@ -219,7 +219,7 @@ def __init__(
self.use_ds_comm = False # False --> Use torch.dist, True --> Use ds.comm backend.

global dist
import deepspeed.comm as dist
from deepspeed import comm as dist
self._is_gradient_accumulation_boundary = None

# for debug purposes - can then debug print: debug_get_module_name(module)
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/fused_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from deepspeed.runtime.utils import get_global_norm, get_grad_norm, CheckOverflow, get_weight_norm
from deepspeed.runtime.fp16.loss_scaler import INITIAL_LOSS_SCALE, SCALE_WINDOW, MIN_LOSS_SCALE
from deepspeed.utils import groups, logger, log_dist
import deepspeed.comm as dist
from deepspeed import comm as dist
from deepspeed.checkpoint.constants import OPTIMIZER_STATE_DICT, CLIP_GRAD


Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/onebit/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import importlib
import numpy as np
import time
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.utils.logging import logger

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/onebit/lamb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import types
import torch
import numpy as np
import deepspeed.comm as dist
from deepspeed import comm as dist
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors


Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/onebit/zoadam.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import importlib
import numpy as np
import time
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.utils.logging import logger

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/unfused_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from deepspeed.runtime.fp16.loss_scaler import INITIAL_LOSS_SCALE, SCALE_WINDOW, MIN_LOSS_SCALE
from deepspeed.utils import logger
from deepspeed.checkpoint.constants import OPTIMIZER_STATE_DICT
import deepspeed.comm as dist
from deepspeed import comm as dist


class FP16_UnfusedOptimizer(DeepSpeedOptimizer):
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/pipe/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import torch
import torch.nn as nn
import torch.optim as optim
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.utils.logging import logger
from deepspeed.utils.timer import SynchronizedWallClockTimer, ThroughputTimer
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/pipe/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import torch
import torch.nn as nn
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.utils import logger
from .. import utils as ds_utils
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/pipe/p2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import typing

import torch
import deepspeed.comm as dist
from deepspeed import comm as dist

# To query whether we have send/recv support
from packaging.version import Version
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/pipe/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from deepspeed.utils import logger

import deepspeed.comm as dist
from deepspeed import comm as dist
import sys

from collections import namedtuple
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/swap_tensor/async_swapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""
import torch

import deepspeed.comm as dist
from deepspeed import comm as dist
from deepspeed.utils.logging import logger
from deepspeed.runtime.swap_tensor.utils import swap_out_tensors, SwapBuffer

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/swap_tensor/optimizer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os
import torch

import deepspeed.comm as dist
from deepspeed import comm as dist
from deepspeed.utils.logging import logger
from deepspeed.runtime.zero.offload_constants import *
from deepspeed.runtime.swap_tensor.constants import *
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from deepspeed.utils.logging import logger
from deepspeed.ops.aio import AsyncIOBuilder
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.runtime.swap_tensor.constants import *
from deepspeed.runtime.swap_tensor.utils import swap_in_tensors, swap_out_tensors, print_object, \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import shutil
from enum import Enum
import torch
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.utils.logging import logger
from deepspeed.ops.aio import AsyncIOBuilder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from deepspeed.utils.logging import logger
from deepspeed.ops.aio import AsyncIOBuilder
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.runtime.zero.offload_constants import *
from deepspeed.runtime.swap_tensor.constants import *
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/swap_tensor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch
from deepspeed.utils.logging import logger

import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.runtime.swap_tensor.constants import AIO_BLOCK_SIZE, AIO_QUEUE_DEPTH, \
AIO_THREAD_COUNT, AIO_SINGLE_SUBMIT, AIO_OVERLAP_EVENTS
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import torch
from torch._six import inf
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.utils import groups, logger
from deepspeed.runtime.constants import PIPE_REPLICATED
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/contiguous_memory_allocator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch

import deepspeed.comm as dist
from deepspeed import comm as dist


def print_rank_0(message):
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch.nn import init
from torch.nn.modules.module import Module
from deepspeed.runtime.utils import noop_decorator
import deepspeed.comm as dist
from deepspeed import comm as dist

tensor_map = {}

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/partition_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import torch
from torch import Tensor
import deepspeed.comm as dist
from deepspeed import comm as dist
from torch.nn import Module
from torch.nn import Parameter

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/partitioned_param_coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from torch.cuda import Event, Stream
from torch.nn import Module, Parameter

import deepspeed.comm as dist
from deepspeed import comm as dist
from deepspeed.utils.logging import logger
from deepspeed.runtime.zero.partition_parameters import *
from deepspeed.runtime.zero.offload_constants import *
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/stage3.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import torch
from torch.cuda import Event, Stream
from torch.nn import Module, Parameter
import deepspeed.comm as dist
from deepspeed import comm as dist
import math
from torch._six import inf
from torch.nn import Module
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/stage_1_and_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
'''

import torch
import deepspeed.comm as dist
from deepspeed import comm as dist
from torch._six import inf
from packaging import version as pkg_version

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List

import torch
import deepspeed.comm as dist
from deepspeed import comm as dist
from deepspeed.utils import logger
from deepspeed.ops.adam import DeepSpeedCPUAdam
from deepspeed.ops.adam import FusedAdam
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/utils/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"""

import torch
import deepspeed.comm as dist
from deepspeed import comm as dist

from deepspeed.utils import logger, log_dist
from deepspeed.utils.exceptions import DeprecatedException
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import sys
import os

import deepspeed.comm as dist
from deepspeed import comm as dist

log_levels = {
"debug": logging.DEBUG,
Expand Down
Loading