oobabooga · oobabooga · Nov 30, 2023 · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/docker/.dockerignore → .dockerignore b/docker/.dockerignore → .dockerignore
diff --git a/.gitignore b/.gitignore
@@ -26,7 +26,6 @@
 .DS_Store
 .eslintrc.js
 .idea
-.env
 .venv
 venv
 .envrc
@@ -42,3 +41,9 @@ package.json
 package-lock.json
 Thumbs.db
 wandb
+
+# ignore user docker config and top level links to docker files
+/docker-compose.yaml
+/docker-compose.yml
+/Dockerfile
+.env
diff --git a/README.md b/README.md
@@ -163,14 +163,18 @@ The requirments*.txt above contain various precompiled wheels. If you wish to co
 ### Alternative: Docker
 
 ```
-ln -s docker/{Dockerfile,docker-compose.yml,.dockerignore} .
+ln -s docker/{nvidia/Dockerfile,docker-compose.yml} .
 cp docker/.env.example .env
-# Edit .env and set TORCH_CUDA_ARCH_LIST based on your GPU model
+# Edit .env and set: 
+#   TORCH_CUDA_ARCH_LIST based on your GPU model
+#   APP_RUNTIME_GID      your host user's group id (run `id -g` in a terminal)
+#   BUILD_EXTENIONS      optionally add comma separated list of extensions to build
 docker compose up --build
 ```
 
 * You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.
 * For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
+* Currently breaks GPTQ-for-Llama
 
 ### Updating the requirements
 

diff --git a/docker/.env.example b/docker/.env.example
@@ -2,19 +2,21 @@
 # however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
 # https://developer.nvidia.com/cuda-gpus you can find the version for your card here
 TORCH_CUDA_ARCH_LIST=7.5
-
 # your command-line flags go here:
 CLI_ARGS=--listen
-
 # the port the webui binds to on the host
 HOST_PORT=7860
 # the port the webui binds to inside the container
 CONTAINER_PORT=7860
-
 # the port the api binds to on the host
 HOST_API_PORT=5000
 # the port the api binds to inside the container
 CONTAINER_API_PORT=5000
-
-# the version used to install text-generation-webui from
-WEBUI_VERSION=HEAD
+# Comma separated extensions to build
+BUILD_EXTENSIONS=""
+# Set APP_RUNTIME_GID to an appropriate host system group to enable access to mounted volumes 
+# You can find your current host user group id with the command `id -g`
+APP_RUNTIME_GID=6972
+# override default app build permissions (handy for deploying to cloud)
+#APP_GID=6972
+#APP_UID=6972
diff --git a/docker/Dockerfile b/docker/Dockerfile
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
@@ -5,28 +5,31 @@ services:
       context: .
       args:
         # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
-        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
-        WEBUI_VERSION: ${WEBUI_VERSION:-HEAD}
+        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} 
+        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
+        APP_GID: ${APP_GID:-6972} 
+        APP_UID: ${APP_UID-6972} 
     env_file: .env
+    user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
     ports:
       - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
       - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
     stdin_open: true
     tty: true
     volumes:
-      - ./characters:/app/characters
-      - ./extensions:/app/extensions
-      - ./loras:/app/loras
-      - ./models:/app/models
-      - ./presets:/app/presets
-      - ./prompts:/app/prompts
-      - ./softprompts:/app/softprompts
-      - ./training:/app/training
+      - ./characters:/home/app/text-generation-webui/characters
+      - ./extensions:/home/app/text-generation-webui/extensions
+      - ./loras:/home/app/text-generation-webui/loras
+      - ./models:/home/app/text-generation-webui/models
+      - ./presets:/home/app/text-generation-webui/presets
+      - ./prompts:/home/app/text-generation-webui/prompts
+      - ./softprompts:/home/app/text-generation-webui/softprompts
+      - ./training:/home/app/text-generation-webui/training
       - ./cloudflared:/etc/cloudflared
     deploy:
       resources:
         reservations:
           devices:
             - driver: nvidia
-              device_ids: ['0']
+              count: all
               capabilities: [gpu]
diff --git a/docker/nvidia/Dockerfile b/docker/nvidia/Dockerfile
@@ -0,0 +1,56 @@
+# BUILDER
+FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder
+WORKDIR /builder
+ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
+ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
+ARG APP_UID="${APP_UID:-6972}"
+ARG APP_GID="${APP_GID:-6972}"
+# create / update build env
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
+    apt update && \
+    apt install --no-install-recommends -y git vim build-essential python3-dev pip && \
+    rm -rf /var/lib/apt/lists/*
+RUN --mount=type=cache,target=/root/.cache/pip,rw \
+    pip3 install --global --upgrade pip wheel setuptools && \
+    # make shared builder & runtime app user
+    addgroup --gid $APP_GID app_grp && \
+    useradd -m -u $APP_UID --gid app_grp app
+USER app:app_grp
+# build wheels for runtime
+WORKDIR /home/app/build
+COPY --chown=app:app_grp requirements.txt /home/app/build
+COPY --chown=app:app_grp extensions /home/app/build/extensions
+RUN --mount=type=cache,target=/root/.cache/pip,rw \
+    # build all requirements files as wheel dists
+    pip3 wheel -w wheels -r requirements.txt `echo "$BUILD_EXTENSIONS" | sed -r 's/([^,]+)\s*,?\s*/ -r \/home\/app\/build\/extensions\/\1\/requirements.txt/g'`
+    # drop wheel and setuptools .whl to avoid install issues
+RUN rm wheels/setuptools*.whl
+
+# RUNTIME
+FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
+ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6}"
+ARG APP_UID="${APP_UID:-6972}"
+ARG APP_GID="${APP_GID:-6972}"
+ENV CLI_ARGS=""
+# create / update runtime env
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
+    apt update && \
+    apt install --no-install-recommends -y git python3 pip && \
+    rm -rf /var/lib/apt/lists/* && \
+    pip3 install --global --no-cache --upgrade pip wheel setuptools && \
+    # make shared builder & runtime app user
+    addgroup --gid $APP_GID app_grp && \
+    useradd -m -u $APP_UID --gid app_grp app
+USER app:app_grp
+# install locally built wheels for app
+WORKDIR /home/app/wheels
+COPY --from=builder /home/app/build/wheels /home/app/wheels
+COPY --chown=app:app_grp . /home/app/text-generation-webui
+RUN umask 0002 && \
+    chmod g+rwX /home/app/text-generation-webui && \
+    pip3 install --global --no-build-isolation --no-cache --no-index ./*.whl && \
+    rm -r /home/app/wheels
+WORKDIR /home/app/text-generation-webui
+EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
+# set umask to ensure group read / write at runtime
+CMD umask 0002 && export HOME=/home/app && python3 server.py ${CLI_ARGS}