diff --git a/deepspeed/git_version_info.py b/deepspeed/git_version_info.py
index d17948ae41a7..f04982c74f0d 100644
--- a/deepspeed/git_version_info.py
+++ b/deepspeed/git_version_info.py
@@ -2,8 +2,12 @@
     #  This is populated by setup.py
     from .git_version_info_installed import *
 except ModuleNotFoundError:
-    # Will be missing from checkouts that haven't been installed (e.g., readthedocs)
-    version = open('version.txt', 'r').read().strip()
+    import os
+    if os.path.isfile('version.txt'):
+        # Will be missing from checkouts that haven't been installed (e.g., readthedocs)
+        version = open('version.txt', 'r').read().strip()
+    else:
+        version = "0.0.0"
     git_hash = '[none]'
     git_branch = '[none]'
 
diff --git a/deepspeed/ops/sparse_attention/softmax.py b/deepspeed/ops/sparse_attention/softmax.py
index cd18fbcae71f..a0805ada4bc0 100644
--- a/deepspeed/ops/sparse_attention/softmax.py
+++ b/deepspeed/ops/sparse_attention/softmax.py
@@ -224,8 +224,8 @@ class Softmax:
 
     For more details about sparsity config, please see `Generative Modeling with Sparse Transformers`: https://arxiv.org/abs/1904.10509
     """
-
-    sparse_softmax = _sparse_softmax.apply
+    def sparse_softmax(*args, **kwargs):
+        return _sparse_softmax.apply(*args, **kwargs)
 
     def make_lut(self, device):
         """Generates the sparsity layout used in block-sparse softmax
diff --git a/docs/code-docs/source/conf.py b/docs/code-docs/source/conf.py
index 167f6427d7b4..eb9a412d8a4a 100644
--- a/docs/code-docs/source/conf.py
+++ b/docs/code-docs/source/conf.py
@@ -79,4 +79,4 @@
 
 autoclass_content = 'both'
 
-autodoc_mock_imports = ["torch", "apex", "mpi4py", "tensorboardX", "numpy"]
+autodoc_mock_imports = ["torch", "apex", "mpi4py", "tensorboardX", "numpy", "cupy"]
diff --git a/requirements/requirements-readthedocs.txt b/requirements/requirements-readthedocs.txt
index c032a8c9fdad..78620c472c9d 100644
--- a/requirements/requirements-readthedocs.txt
+++ b/requirements/requirements-readthedocs.txt
@@ -1,2 +1 @@
 tqdm
-psutil