split into sh scripts

jmgirard · jmgirard · commit bc7385d95a65 · 2024-11-17T16:18:44.000-06:00
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,6 @@
+# .dockerignore
+.dockerfile
+.git
+.gitignore
+LICENSE
+README.md
diff --git a/LICENSE b/LICENSE
@@ -1,21 +1,21 @@
-MIT License
-
-Copyright (c) 2024 Jeffrey Girard
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+MIT License
+
+Copyright (c) 2024 Jeffrey Girard
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -1,21 +1,21 @@
-# wsl-cuda-whisper
-The audio.whisper R package allows users to easily use OpenAI's Whisper model (e.g., for automated transcription of audio files) from R. Significant speedups can be achieved on machines with CUDA-enabled graphics cards, but setting this up can be complicated. This docker image allows a user on Windows to easily install all the dependencies needed to run audio.whisper with CUDA support via Windows Subsystem for Linux (WSL2). It is built on top of the rocker/tidyverse image, which means it comes with RStudio Server installed.
-
-Versions:
-- `jmgirard/wsl-cuda-whisper:vad` is a larger image that contains voice activity detection (VAD) via {audio.vadwebrtc} and {audio.vadsilero}. It also uses CUDA 11.8 as required by these packages.
-- `jmgirard/wsl-cuda-whisper:novad` is a more streamlined image that does not contain VAD and uses the newest CUDA 12.6 version.
-
-Usage:
-1. Verify that your machine's graphics card supports CUDA: https://developer.nvidia.com/cuda-gpus
-2. On Windows, install the latest game-ready driver from NVIDIA: https://www.nvidia.com/Download/index.aspx#
-3. On Windows, install the latest version of Docker Desktop: https://www.docker.com/products/docker-desktop/
-4. Open Docker Desktop and click the Terminal button on the bottom of the screen
-5. In the Terminal, type `docker pull jmgirard/wsl-cuda-whisper` (hit Enter and wait, it may take a while)
-6. In the Terminal, type `docker run --gpus all --rm -it -e PASSWORD=pass -p 8787:8787 jmgirard/wsl-cuda-whisper`
-7. If you want access to the Windows filesystem, you can add `-v "C:\Users\jmgirard:/data"` and then access `/data` in R
-8. Once the Terminal has a line beginning with "TTY detected.", the container is ready
-9. In Docker Desktop, click the Containers tab on the left and click the "8787:8787" link
-10. Your browser should show a login page, enter "rstudio" as the username and "pass" for the password
-11. You should now be shown the RStudio page, so enter `library(audio.whisper)` 
-12. Now you can download and load whisper models via, e.g., `model <- whisper("tiny", use_gpu = TRUE)`
-13. You can now use the `model` object and the `predict()` function with great speed
+# wsl-cuda-whisper
+The audio.whisper R package allows users to easily use OpenAI's Whisper model (e.g., for automated transcription of audio files) from R. Significant speedups can be achieved on machines with CUDA-enabled graphics cards, but setting this up can be complicated. This docker image allows a user on Windows to easily install all the dependencies needed to run audio.whisper with CUDA support via Windows Subsystem for Linux (WSL2). It is built on top of the rocker/tidyverse image, which means it comes with RStudio Server installed.
+
+Versions:
+- `jmgirard/wsl-cuda-whisper:vad` is a larger image that contains voice activity detection (VAD) via {audio.vadwebrtc} and {audio.vadsilero}. It also uses CUDA 11.8 as required by these packages.
+- `jmgirard/wsl-cuda-whisper:novad` is a more streamlined image that does not contain VAD and uses the newest CUDA 12.6 version.
+
+Usage:
+1. Verify that your machine's graphics card supports CUDA: https://developer.nvidia.com/cuda-gpus
+2. On Windows, install the latest game-ready driver from NVIDIA: https://www.nvidia.com/Download/index.aspx#
+3. On Windows, install the latest version of Docker Desktop: https://www.docker.com/products/docker-desktop/
+4. Open Docker Desktop and click the Terminal button on the bottom of the screen
+5. In the Terminal, type `docker pull jmgirard/wsl-cuda-whisper` (hit Enter and wait, it may take a while)
+6. In the Terminal, type `docker run --gpus all --rm -it -e PASSWORD=pass -p 8787:8787 jmgirard/wsl-cuda-whisper`
+7. If you want access to the Windows filesystem, you can add `-v "C:\Users\jmgirard:/data"` and then access `/data` in R
+8. Once the Terminal has a line beginning with "TTY detected.", the container is ready
+9. In Docker Desktop, click the Containers tab on the left and click the "8787:8787" link
+10. Your browser should show a login page, enter "rstudio" as the username and "pass" for the password
+11. You should now be shown the RStudio page, so enter `library(audio.whisper)` 
+12. Now you can download and load whisper models via, e.g., `model <- whisper("tiny", use_gpu = TRUE)`
+13. You can now use the `model` object and the `predict()` function with great speed
diff --git a/install_cuda_11.8.sh b/install_cuda_11.8.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# install_cuda_11.8.sh
+
+set -e
+
+# A function to install apt packages only if they are not installed
+function apt_install() {
+    if ! dpkg -s "$@" >/dev/null 2>&1; then
+        if [ "$(find /var/lib/apt/lists/* | wc -l)" = "0" ]; then
+            apt-get update
+        fi
+        apt-get install -y --no-install-recommends "$@"
+    fi
+}
+
+# Install apt dependencies
+apt_install \
+    build-essential \
+    ca-certificates \
+    curl \
+    ffmpeg \
+    gnupg \
+    software-properties-common \
+    wget
+
+# Set up the NVIDIA CUDA repository for WSL Ubuntu
+wget -nv https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin
+mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600
+wget -nv https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-wsl-ubuntu-11-8-local_11.8.0-1_amd64.deb
+dpkg -i cuda-repo-wsl-ubuntu-11-8-local_11.8.0-1_amd64.deb
+cp /var/cuda-repo-wsl-ubuntu-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/
+
+# Install CUDA Toolkit 11.8 for WSL Ubuntu
+apt-get update
+apt-get install -y cuda-toolkit-11-8
+
+# Clean up
+rm -rf /var/lib/apt/lists/* /cuda-repo-wsl-ubuntu-11-8-local_11.8.0-1_amd64.deb /tmp/*
diff --git a/install_cuda_latest.sh b/install_cuda_latest.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# install_cuda_latest.sh
+
+set -e
+
+# A function to install apt packages only if they are not installed
+function apt_install() {
+    if ! dpkg -s "$@" >/dev/null 2>&1; then
+        if [ "$(find /var/lib/apt/lists/* | wc -l)" = "0" ]; then
+            apt-get update
+        fi
+        apt-get install -y --no-install-recommends "$@"
+    fi
+}
+
+# Install apt dependencies
+apt_install \
+    build-essential \
+    ca-certificates \
+    curl \
+    ffmpeg \
+    gnupg \
+    software-properties-common \
+    wget
+
+# Set up the NVIDIA CUDA repository for WSL Ubuntu
+wget -nv https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.1-1_all.deb
+dpkg -i cuda-keyring_1.1-1_all.deb
+
+# Install CUDA Toolkit
+apt-get update
+apt_install cuda-toolkit
+
+# Clean up
+rm -rf /var/lib/apt/lists/* cuda-keyring_1.1-1_all.deb /tmp/*
diff --git a/install_vad.sh b/install_vad.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# install_vad.sh
+
+set -e
+
+## Build ARGs
+NCPUS=$(nproc || echo 1)
+
+# Install R packages
+install2.r --error --skipinstalled -n "$NCPUS" \
+    abseil \
+    audio \
+    remotes \
+    torch
+
+# Install torch (requires CUDA 11.8)
+R --no-save --no-restore -e "torch::install_torch()"
+
+# Install VAD packages
+R --no-save --no-restore -e "remotes::install_github('bnosac/audio.vadwebrtc')"
+R --no-save --no-restore -e "remotes::install_github('bnosac/audio.vadsilero')"
+
+# Clean up
+rm -rf /tmp/*
diff --git a/novad.Dockerfile b/novad.Dockerfile
@@ -0,0 +1,16 @@
+ARG R_VERSION=4.4.1
+
+FROM rocker/tidyverse:${R_VERSION}
+
+COPY install_cuda_latest.sh /rocker_scripts/install_cuda_latest.sh
+
+RUN /rocker_scripts/install_cuda_latest.sh
+
+ENV PATH=/usr/local/cuda/bin${PATH:+:${PATH}}
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64
+ENV CUDA_PATH=/usr/local/cuda
+ENV WHISPER_CUBLAS=1
+
+RUN install2.r --error --skipinstalled remotes
+
+CMD ["bash", "-c", "R --no-save --no-restore -e 'remotes::install_github(\"bnosac/audio.whisper\")' && exec /init"]
diff --git a/novad/Dockerfile b/novad/Dockerfile
diff --git a/vad.Dockerfile b/vad.Dockerfile
@@ -0,0 +1,15 @@
+ARG R_VERSION=4.4.1
+
+FROM rocker/tidyverse:${R_VERSION}
+
+COPY install_cuda_11.8.sh /rocker_scripts/install_cuda_11.8.sh
+COPY install_vad.sh /rocker_scripts/install_vad.sh
+
+RUN /rocker_scripts/install_cuda_11.8.sh && /rocker_scripts/install_vad.sh
+
+ENV PATH=/usr/local/cuda/bin${PATH:+:${PATH}}
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64
+ENV CUDA_PATH=/usr/local/cuda
+ENV WHISPER_CUBLAS=1
+
+CMD ["bash", "-c", "R --no-save --no-restore -e 'remotes::install_github(\"bnosac/audio.whisper\")' && exec /init"]
diff --git a/vad/Dockerfile b/vad/Dockerfile