0.14.1 +deepspeed (not in prebuilt docker)

matatonic · Jun 27, 2024 · c957ad8 · c957ad8
1 parent ae6a384
commit c957ad8
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 3 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -5,6 +5,11 @@ RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg
 RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then apt-get install --no-install-recommends -y build-essential ; fi
 RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; fi
 ENV PATH="/root/.cargo/bin:${PATH}"
+# for deepspeed support - doesn't seem worth it, image +7.5GB, over the 10GB ghcr.io limit, and no noticable gain in speed or VRAM usage?
+#RUN curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb
+#RUN dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb
+#RUN apt-get update && apt-get install --no-install-recommends -y build-essential cuda-toolkit
+#ENV CUDA_HOME=/usr/local/cuda
 RUN apt-get clean && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
@@ -17,7 +22,7 @@ COPY requirements*.txt /app/
 RUN if [ "${USE_ROCM}" = "1" ]; then mv /app/requirements-rocm.txt /app/requirements.txt; fi
 RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt
 
-COPY speech.py openedai.py say.py *.sh *.default.yaml README.md LICENSE /app/
+COPY *.py *.sh *.default.yaml README.md LICENSE /app/
 
 ARG PRELOAD_MODEL
 ENV PRELOAD_MODEL=${PRELOAD_MODEL}

diff --git a/README.md b/README.md
@@ -29,6 +29,10 @@ If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know s
 
 ## Recent Changes
 
+Version 0.14.1, 2024-06-26
+
+* Make deepspeed possible (`--use-deepspeed`), but not enabled in pre-built docker images (too large). Requires the cuda-toolkit installed, see the Dockerfile comment for details
+
 Version 0.14.0, 2024-06-26
 
 * Added `response_format`: `wav` and `pcm` support

diff --git a/requirements-rocm.txt b/requirements-rocm.txt
@@ -6,7 +6,8 @@ piper-tts==1.2.0
 # xtts
 TTS==0.22.0
 # https://github.com/huggingface/transformers/issues/31040
-transformers<4.41.0 
+transformers<4.41.0
+deepspeed<0.14.0
 # XXX, 3.8+ has some issue for now
 spacy==3.7.4
 

diff --git a/requirements.txt b/requirements.txt
@@ -7,6 +7,7 @@ piper-tts==1.2.0
 TTS==0.22.0
 # https://github.com/huggingface/transformers/issues/31040
 transformers<4.41.0 
+deepspeed<0.14.0
 # XXX, 3.8+ has some issue for now
 spacy==3.7.4
 

diff --git a/speech.py b/speech.py
@@ -64,7 +64,7 @@ def __init__(self, model_name, device, model_path=None, unload_timer=None):
         config = XttsConfig()
         config.load_json(config_path)
         self.xtts = Xtts.init_from_config(config)
-        self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=False)  # XXX there are no prebuilt deepspeed wheels??
+        self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=args.use_deepspeed)  # XXX there are no prebuilt deepspeed wheels??
         self.xtts = self.xtts.to(device=device)
         self.xtts.eval()
 
@@ -314,6 +314,7 @@ def auto_torch_device():
     parser.add_argument('--xtts_device', action='store', default=auto_torch_device(), help="Set the device for the xtts model. The special value of 'none' will use piper for all models.")
     parser.add_argument('--preload', action='store', default=None, help="Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on first use.")
     parser.add_argument('--unload-timer', action='store', default=None, type=int, help="Idle unload timer for the XTTS model in seconds")
+    parser.add_argument('--use-deepspeed', action='store_true', default=False, help="Use deepspeed for faster generation and lower VRAM usage in xtts")
     parser.add_argument('-P', '--port', action='store', default=8000, type=int, help="Server tcp port")
     parser.add_argument('-H', '--host', action='store', default='0.0.0.0', help="Host to listen on, Ex. 0.0.0.0")
     parser.add_argument('-L', '--log-level', default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Set the log level")