diff --git a/deepspeed/git_version_info.py b/deepspeed/git_version_info.py index d17948ae41a7..f04982c74f0d 100644 --- a/deepspeed/git_version_info.py +++ b/deepspeed/git_version_info.py @@ -2,8 +2,12 @@ # This is populated by setup.py from .git_version_info_installed import * except ModuleNotFoundError: - # Will be missing from checkouts that haven't been installed (e.g., readthedocs) - version = open('version.txt', 'r').read().strip() + import os + if os.path.isfile('version.txt'): + # Will be missing from checkouts that haven't been installed (e.g., readthedocs) + version = open('version.txt', 'r').read().strip() + else: + version = "0.0.0" git_hash = '[none]' git_branch = '[none]' diff --git a/deepspeed/ops/sparse_attention/softmax.py b/deepspeed/ops/sparse_attention/softmax.py index cd18fbcae71f..a0805ada4bc0 100644 --- a/deepspeed/ops/sparse_attention/softmax.py +++ b/deepspeed/ops/sparse_attention/softmax.py @@ -224,8 +224,8 @@ class Softmax: For more details about sparsity config, please see `Generative Modeling with Sparse Transformers`: https://arxiv.org/abs/1904.10509 """ - - sparse_softmax = _sparse_softmax.apply + def sparse_softmax(*args, **kwargs): + return _sparse_softmax.apply(*args, **kwargs) def make_lut(self, device): """Generates the sparsity layout used in block-sparse softmax diff --git a/docs/code-docs/source/conf.py b/docs/code-docs/source/conf.py index 167f6427d7b4..eb9a412d8a4a 100644 --- a/docs/code-docs/source/conf.py +++ b/docs/code-docs/source/conf.py @@ -79,4 +79,4 @@ autoclass_content = 'both' -autodoc_mock_imports = ["torch", "apex", "mpi4py", "tensorboardX", "numpy"] +autodoc_mock_imports = ["torch", "apex", "mpi4py", "tensorboardX", "numpy", "cupy"] diff --git a/requirements/requirements-readthedocs.txt b/requirements/requirements-readthedocs.txt index c032a8c9fdad..78620c472c9d 100644 --- a/requirements/requirements-readthedocs.txt +++ b/requirements/requirements-readthedocs.txt @@ -1,2 +1 @@ tqdm -psutil