diff --git a/deepspeed/__init__.py b/deepspeed/__init__.py index de5d1efcd1aa..a6c906f1119a 100755 --- a/deepspeed/__init__.py +++ b/deepspeed/__init__.py @@ -4,16 +4,18 @@ import sys import types -from deepspeed.runtime.engine import DeepSpeedEngine -from deepspeed.runtime.engine import ADAM_OPTIMIZER, LAMB_OPTIMIZER -from deepspeed.runtime.lr_schedules import add_tuning_arguments -from deepspeed.runtime.config import DeepSpeedConfig -from deepspeed.runtime.activation_checkpointing import checkpointing -from deepspeed.ops.transformer import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig -from deepspeed.utils import logger +from . import ops + +from .runtime.engine import DeepSpeedEngine +from .runtime.engine import ADAM_OPTIMIZER, LAMB_OPTIMIZER +from .runtime.lr_schedules import add_tuning_arguments +from .runtime.config import DeepSpeedConfig +from .runtime.activation_checkpointing import checkpointing +from .ops.transformer import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig +from .utils import logger try: - from deepspeed.git_version_info import version, git_hash, git_branch + from .git_version_info import version, git_hash, git_branch except ImportError: version = "0.0.0+unknown" git_hash = None diff --git a/deepspeed/ops/__init__.py b/deepspeed/ops/__init__.py index e69de29bb2d1..6c4187415aae 100644 --- a/deepspeed/ops/__init__.py +++ b/deepspeed/ops/__init__.py @@ -0,0 +1,7 @@ +from ..git_version_info import installed_ops as __installed_ops__ +from . import lamb +from . import transformer +if __installed_ops__['sparse-attn']: + from . import sparse_attention +if __installed_ops__['cpu-adam']: + from . import adam diff --git a/deepspeed/ops/adam/__init__.py b/deepspeed/ops/adam/__init__.py index 13883dc7b590..1d8844409374 100755 --- a/deepspeed/ops/adam/__init__.py +++ b/deepspeed/ops/adam/__init__.py @@ -1 +1 @@ -from deepspeed.ops.adam.cpu_adam import DeepSpeedCPUAdam +from .cpu_adam import DeepSpeedCPUAdam diff --git a/deepspeed/ops/adam/cpu_adam.py b/deepspeed/ops/adam/cpu_adam.py index 5387090e3815..888be4dc9ad4 100755 --- a/deepspeed/ops/adam/cpu_adam.py +++ b/deepspeed/ops/adam/cpu_adam.py @@ -1,7 +1,8 @@ import math import torch -import deepspeed.ops.adam.cpu_adam_op as ds_opt_adam +import importlib +ds_opt_adam = None class DeepSpeedCPUAdam(torch.optim.Optimizer): @@ -24,6 +25,9 @@ def __init__(self, super(DeepSpeedCPUAdam, self).__init__(model_params, default_args) self.opt_id = DeepSpeedCPUAdam.optimizer_id DeepSpeedCPUAdam.optimizer_id = DeepSpeedCPUAdam.optimizer_id + 1 + + global ds_opt_adam + ds_opt_adam = importlib.import_module('deepspeed.ops.adam.cpu_adam_op') ds_opt_adam.create_adam(self.opt_id, lr, bettas[0], bettas[1], eps, weight_decay) def __setstate__(self, state): diff --git a/setup.py b/setup.py index 02f2f7fd005a..ae5010a65b11 100755 --- a/setup.py +++ b/setup.py @@ -27,6 +27,12 @@ def fetch_requirements(path): dev_requires = fetch_requirements('requirements/requirements-dev.txt') sparse_attn_requires = fetch_requirements('requirements/requirements-sparse-attn.txt') +# Constants for each op +LAMB = "lamb" +TRANSFORMER = "transformer" +SPARSE_ATTN = "sparse-attn" +ADAM = "cpu-adam" + # Build environment variables for custom builds DS_BUILD_LAMB_MASK = 1 DS_BUILD_TRANSFORMER_MASK = 10 @@ -50,15 +56,15 @@ def fetch_requirements(path): BUILD_MASK = (DS_BUILD_LAMB | DS_BUILD_TRANSFORMER | DS_BUILD_SPARSE_ATTN | DS_BUILD_ADAM) -install_ops = [] +install_ops = dict.fromkeys([LAMB, TRANSFORMER, SPARSE_ATTN, ADAM], False) if BUILD_MASK & DS_BUILD_LAMB: - install_ops.append('lamb') + install_ops[LAMB] = True if BUILD_MASK & DS_BUILD_ADAM: - install_ops.append('adam') + install_ops[ADAM] = True if BUILD_MASK & DS_BUILD_TRANSFORMER: - install_ops.append('transformer') + install_ops[TRANSFORMER] = True if BUILD_MASK & DS_BUILD_SPARSE_ATTN: - install_ops.append('sparse-attn') + install_ops[SPARSE_ATTN] = True if len(install_ops) == 0: print("Building without any cuda/cpp extensions") print(f'BUILD_MASK={BUILD_MASK}, install_ops={install_ops}') @@ -196,14 +202,21 @@ def fetch_requirements(path): def command_exists(cmd): - result = subprocess.Popen(f'type {cmd}', stdout=subprocess.PIPE, shell=True) - return result.wait() == 0 + if '|' in cmd: + cmds = cmd.split("|") + else: + cmds = [cmd] + valid = False + for cmd in cmds: + result = subprocess.Popen(f'type {cmd}', stdout=subprocess.PIPE, shell=True) + valid = valid or result.wait() == 0 + return valid ## Sparse transformer ## if BUILD_MASK & DS_BUILD_SPARSE_ATTN: # Check to see if llvm and cmake are installed since they are dependencies - required_commands = ['llc-9', 'cmake'] + required_commands = ['llvm-config|llvm-config-9', 'cmake'] command_status = list(map(command_exists, required_commands)) if not all(command_status): @@ -213,6 +226,8 @@ def command_exists(cmd): ) warnings.warn( 'Skipping sparse attention installation due to missing required packages') + # remove from installed ops list + install_ops[SPARSE_ATTN] = False elif TORCH_MAJOR == 1 and TORCH_MINOR >= 5: ext_modules.append( CppExtension(name='deepspeed.ops.sparse_attention.cpp_utils', @@ -223,6 +238,8 @@ def command_exists(cmd): install_requires += sparse_attn_requires else: warnings.warn('Unable to meet requirements to install sparse attention') + # remove from installed ops list + install_ops[SPARSE_ATTN] = False # Add development requirements install_requires += dev_requires @@ -243,6 +260,7 @@ def command_exists(cmd): fd.write(f"version='{VERSION}+{git_hash}'\n") fd.write(f"git_hash='{git_hash}'\n") fd.write(f"git_branch='{git_branch}'\n") + fd.write(f"installed_ops={install_ops}\n") print(f'install_requires={install_requires}') diff --git a/tests/unit/adam_test.py b/tests/unit/adam_test.py index 8e6caa382c12..0fedfeb18e42 100755 --- a/tests/unit/adam_test.py +++ b/tests/unit/adam_test.py @@ -1,5 +1,5 @@ import torch -from deepspeed import DeepSpeedCPUAdam +from deepspeed.ops.adam import DeepSpeedCPUAdam import time device = 'cpu' diff --git a/tests/unit/test_adam_acuracy.py b/tests/unit/test_adam_acuracy.py index 410543f516d1..b002231622f4 100755 --- a/tests/unit/test_adam_acuracy.py +++ b/tests/unit/test_adam_acuracy.py @@ -6,10 +6,10 @@ import pytest import copy -from deepspeed import DeepSpeedCPUAdam +from deepspeed.ops.adam import DeepSpeedCPUAdam def check_equal(first, second, atol=1e-2, verbose=False): - if verbos: + if verbose: print(first) print(second) x = first.detach().numpy()