Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions Colab-TextGen-GPU.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,9 @@
" torver = torch.__version__\n",
" print(f\"TORCH: {torver}\")\n",
" is_cuda118 = '+cu118' in torver # 2.1.0+cu118\n",
" is_cuda117 = '+cu117' in torver # 2.0.1+cu117\n",
"\n",
" textgen_requirements = open('requirements.txt').read().splitlines()\n",
" if is_cuda117:\n",
" textgen_requirements = [req.replace('+cu121', '+cu117').replace('+cu122', '+cu117').replace('torch2.1', 'torch2.0') for req in textgen_requirements]\n",
" elif is_cuda118:\n",
" if is_cuda118:\n",
" textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",
" with open('temp_requirements.txt', 'w') as file:\n",
" file.write('\\n'.join(textgen_requirements))\n",
Expand Down Expand Up @@ -130,4 +127,4 @@
"outputs": []
}
]
}
}
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,23 @@ The `requirements*.txt` above contain various wheels precompiled through GitHub
### Alternative: Docker

```
ln -s docker/{nvidia/Dockerfile,docker-compose.yml,.dockerignore} .
For NVIDIA GPU:
ln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} .
For AMD GPU:
ln -s docker/{amd/Dockerfile,intel/docker-compose.yml,.dockerignore} .
For Intel GPU:
ln -s docker/{intel/Dockerfile,amd/docker-compose.yml,.dockerignore} .
For CPU only
ln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} .
cp docker/.env.example .env
#Create logs/cache dir :
mkdir -p logs cache
# Edit .env and set:
# TORCH_CUDA_ARCH_LIST based on your GPU model
# APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal)
# BUILD_EXTENIONS optionally add comma separated list of extensions to build
# Edit CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)
#
docker compose up --build
```

Expand Down
3 changes: 3 additions & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ APP_RUNTIME_GID=6972
# override default app build permissions (handy for deploying to cloud)
#APP_GID=6972
#APP_UID=6972
# Set cache env
TRANSFORMERS_CACHE=/home/app/text-generation-webui/cache/
HF_HOME=/home/app/text-generation-webui/cache/
21 changes: 21 additions & 0 deletions docker/amd/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# BUILDER
FROM ubuntu:22.04
WORKDIR /builder
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/app/
RUN git clone https://github.com/oobabooga/text-generation-webui.git
WORKDIR /home/app/text-generation-webui
RUN GPU_CHOICE=B USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
WORKDIR /home/app/text-generation-webui
# set umask to ensure group read / write at runtime
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
57 changes: 57 additions & 0 deletions docker/amd/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
version: "3.3"
services:
text-generation-webui:
build:
context: .
args:
# Requirements file to use:
# | GPU | CPU | requirements file to use |
# |--------|---------|---------|
# | NVIDIA | has AVX2 | `requirements.txt` |
# | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
# | AMD | has AVX2 | `requirements_amd.txt` |
# | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
# | CPU only | has AVX2 | `requirements_cpu_only.txt` |
# | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
# | Apple | Intel | `requirements_apple_intel.txt` |
# | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
# Default: requirements.txt`
# BUILD_REQUIREMENTS: requirements.txt

# Extension requirements to build:
# BUILD_EXTENSIONS:

# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
APP_GID: ${APP_GID:-6972}
APP_UID: ${APP_UID-6972}
env_file: .env
user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
ports:
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
stdin_open: true
group_add:
- video
tty: true
ipc: host
devices:
- /dev/kfd
- /dev/dri
cap_add:
- SYS_PTRACE
security_opt:
- seccomp=unconfined
volumes:
- ./cache:/home/app/text-generation-webui/cache
- ./characters:/home/app/text-generation-webui/characters
- ./extensions:/home/app/text-generation-webui/extensions
- ./loras:/home/app/text-generation-webui/loras
- ./logs:/home/app/text-generation-webui/logs
- ./models:/home/app/text-generation-webui/models
- ./presets:/home/app/text-generation-webui/presets
- ./prompts:/home/app/text-generation-webui/prompts
- ./softprompts:/home/app/text-generation-webui/softprompts
- ./training:/home/app/text-generation-webui/training
- ./cloudflared:/etc/cloudflared
25 changes: 25 additions & 0 deletions docker/cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# BUILDER
FROM ubuntu:22.04
WORKDIR /builder
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"
ARG GPU_CHOICE=A
ARG USE_CUDA118=FALSE
ARG LAUNCH_AFTER_INSTALL=FALSE
ARG INSTALL_EXTENSIONS=TRUE

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/app/
RUN git clone https://github.com/oobabooga/text-generation-webui.git
WORKDIR /home/app/text-generation-webui
RUN GPU_CHOICE=N USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
# set umask to ensure group read / write at runtime
WORKDIR /home/app/text-generation-webui
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
47 changes: 47 additions & 0 deletions docker/cpu/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
version: "3.3"
services:
text-generation-webui:
build:
context: .
args:
# Requirements file to use:
# | GPU | CPU | requirements file to use |
# |--------|---------|---------|
# | NVIDIA | has AVX2 | `requirements.txt` |
# | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
# | AMD | has AVX2 | `requirements_amd.txt` |
# | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
# | CPU only | has AVX2 | `requirements_cpu_only.txt` |
# | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
# | Apple | Intel | `requirements_apple_intel.txt` |
# | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
# Default: requirements.txt`
# BUILD_REQUIREMENTS: requirements.txt

# Extension requirements to build:
# BUILD_EXTENSIONS:

# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
APP_GID: ${APP_GID:-6972}
APP_UID: ${APP_UID-6972}
env_file: .env
user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
ports:
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
stdin_open: true
tty: true
volumes:
- ./cache:/home/app/text-generation-webui/cache
- ./characters:/home/app/text-generation-webui/characters
- ./extensions:/home/app/text-generation-webui/extensions
- ./loras:/home/app/text-generation-webui/loras
- ./logs:/home/app/text-generation-webui/logs
- ./models:/home/app/text-generation-webui/models
- ./presets:/home/app/text-generation-webui/presets
- ./prompts:/home/app/text-generation-webui/prompts
- ./softprompts:/home/app/text-generation-webui/softprompts
- ./training:/home/app/text-generation-webui/training
- ./cloudflared:/etc/cloudflared
21 changes: 21 additions & 0 deletions docker/intel/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# BUILDER
FROM ubuntu:22.04
WORKDIR /builder
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/app/
RUN git clone https://github.com/oobabooga/text-generation-webui.git
WORKDIR /home/app/text-generation-webui
RUN GPU_CHOICE=D USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
# set umask to ensure group read / write at runtime
WORKDIR /home/app/text-generation-webui
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
55 changes: 55 additions & 0 deletions docker/intel/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
version: "3.3"
services:
text-generation-webui:
build:
context: .
args:
# Requirements file to use:
# | GPU | CPU | requirements file to use |
# |--------|---------|---------|
# | NVIDIA | has AVX2 | `requirements.txt` |
# | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
# | AMD | has AVX2 | `requirements_amd.txt` |
# | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
# | CPU only | has AVX2 | `requirements_cpu_only.txt` |
# | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
# | Apple | Intel | `requirements_apple_intel.txt` |
# | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
# Default: requirements.txt`
# BUILD_REQUIREMENTS: requirements.txt

# Extension requirements to build:
# BUILD_EXTENSIONS:

# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
APP_GID: ${APP_GID:-6972}
APP_UID: ${APP_UID-6972}
env_file: .env
user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
ports:
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
stdin_open: true
group_add:
- video
tty: true
ipc: host
devices:
- /dev/kfd
- /dev/dri
cap_add:
- SYS_PTRACE
security_opt:
- seccomp=unconfined
volumes:
- ./characters:/home/app/text-generation-webui/characters
- ./extensions:/home/app/text-generation-webui/extensions
- ./loras:/home/app/text-generation-webui/loras
- ./models:/home/app/text-generation-webui/models
- ./presets:/home/app/text-generation-webui/presets
- ./prompts:/home/app/text-generation-webui/prompts
- ./softprompts:/home/app/text-generation-webui/softprompts
- ./training:/home/app/text-generation-webui/training
- ./cloudflared:/etc/cloudflared
54 changes: 9 additions & 45 deletions docker/nvidia/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,57 +1,21 @@
# BUILDER
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder
FROM ubuntu:22.04
WORKDIR /builder
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
ARG BUILD_REQUIREMENTS="${BUILD_REQUIREMENTS:-requirements.txt}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"
# create / update build env
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git vim build-essential python3-dev pip && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/pip,rw \
pip3 install --global --upgrade pip wheel setuptools && \
# make shared builder & runtime app user
addgroup --gid $APP_GID app_grp && \
useradd -m -u $APP_UID --gid app_grp app
USER app:app_grp
# build wheels for runtime
WORKDIR /home/app/build
COPY --chown=app:app_grp "$BUILD_REQUIREMENTS" /home/app/build/requirements.txt
COPY --chown=app:app_grp extensions /home/app/build/extensions
RUN --mount=type=cache,target=/root/.cache/pip,rw \
# build all requirements files as wheel dists
pip3 wheel -w wheels -r requirements.txt `echo "$BUILD_EXTENSIONS" | sed -r 's/([^,]+)\s*,?\s*/ -r \/home\/app\/build\/extensions\/\1\/requirements.txt/g'`
# drop wheel and setuptools .whl to avoid install issues
RUN rm wheels/setuptools*.whl

# RUNTIME
FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"
ENV CLI_ARGS=""
# create / update runtime env
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git python3 pip && \
rm -rf /var/lib/apt/lists/* && \
pip3 install --global --no-cache --upgrade pip wheel setuptools && \
# make shared builder & runtime app user
addgroup --gid $APP_GID app_grp && \
useradd -m -u $APP_UID --gid app_grp app
USER app:app_grp
# install locally built wheels for app
WORKDIR /home/app/wheels
COPY --from=builder /home/app/build/wheels /home/app/wheels
COPY --chown=app:app_grp . /home/app/text-generation-webui
RUN umask 0002 && \
chmod g+rwX /home/app/text-generation-webui && \
pip3 install --global --no-build-isolation --no-cache --no-index ./*.whl && \
rm -r /home/app/wheels
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/app/
RUN git clone https://github.com/oobabooga/text-generation-webui.git
WORKDIR /home/app/text-generation-webui
RUN GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
WORKDIR /home/app/text-generation-webui
# set umask to ensure group read / write at runtime
CMD umask 0002 && export HOME=/home/app && python3 server.py ${CLI_ARGS}
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ services:
stdin_open: true
tty: true
volumes:
- ./cache:/home/app/text-generation-webui/cache
- ./characters:/home/app/text-generation-webui/characters
- ./extensions:/home/app/text-generation-webui/extensions
- ./loras:/home/app/text-generation-webui/loras
- ./logs:/home/app/text-generation-webui/logs
- ./models:/home/app/text-generation-webui/models
- ./presets:/home/app/text-generation-webui/presets
- ./prompts:/home/app/text-generation-webui/prompts
Expand Down
12 changes: 10 additions & 2 deletions download-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,18 @@ def __init__(self, max_retries=5):
if max_retries:
self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))

if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
if os.getenv('HF_TOKEN') is not None:
self.session.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}

try:
from huggingface_hub import get_token
token = get_token()
except ImportError:
token = os.getenv("HF_TOKEN")

if token is not None:
self.session.headers = {'authorization': f'Bearer {token}'}

def sanitize_model_and_branch_names(self, model, branch):
if model[-1] == '/':
Expand Down
Loading