Skip to content

Commit

Permalink
feat(intel): add diffusers/transformers support (#1746)
Browse files Browse the repository at this point in the history
* feat(intel): add diffusers support

* try to consume upstream container image

* Debug

* Manually install deps

* Map transformers/hf cache dir to modelpath if not specified

* fix(compel): update initialization, pass by all gRPC options

* fix: add dependencies, implement transformers for xpu

* base it from the oneapi image

* Add pillow

* set threads if specified when launching the API

* Skip conda install if intel

* defaults to non-intel

* ci: add to pipelines

* prepare compel only if enabled

* Skip conda install if intel

* fix cleanup

* Disable compel by default

* Install torch 2.1.0 with Intel

* Skip conda on some setups

* Detect python

* Quiet output

* Do not override system python with conda

* Prefer python3

* Fixups

* exllama2: do not install without conda (overrides pytorch version)

* exllama/exllama2: do not install if not using cuda

* Add missing dataset dependency

* Small fixups, symlink to python, add requirements

* Add neural_speed to the deps

* correctly handle model offloading

* fix: device_map == xpu

* go back at calling python, fixed at dockerfile level

* Exllama2 restricted to only nvidia gpus

* Tokenizer to xpu
  • Loading branch information
mudler authored Mar 7, 2024
1 parent ad6fd7a commit 5d10184
Show file tree
Hide file tree
Showing 23 changed files with 250 additions and 81 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/image-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ jobs:
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: 'sycl-f16-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
core-image-build:
uses: ./.github/workflows/image_build.yml
with:
Expand Down Expand Up @@ -105,4 +113,4 @@ jobs:
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
base-image: "ubuntu:22.04"
16 changes: 16 additions & 0 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,22 @@ jobs:
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f16-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f32-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
# Core images
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
Expand Down
34 changes: 20 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ ARG BASE_IMAGE=ubuntu:22.04
# extras or core
FROM ${BASE_IMAGE} as requirements-core

USER root

ARG GO_VERSION=1.21.7
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=11
Expand All @@ -21,7 +23,7 @@ RUN apt-get update && \
apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean

# Install Go
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
ENV PATH $PATH:/usr/local/go/bin

COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
Expand Down Expand Up @@ -79,6 +81,10 @@ RUN pip install --upgrade pip
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN apt-get install -y espeak-ng espeak && apt-get clean

RUN if [ ! -e /usr/bin/python ]; then \
ln -s /usr/bin/python3 /usr/bin/python \
; fi

###################################
###################################

Expand Down Expand Up @@ -166,43 +172,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/

## Duplicated from Makefile to avoid having a big layer that's hard to push
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \
make -C backend/python/autogptq \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/bark \
make -C backend/python/bark \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \
make -C backend/python/diffusers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
make -C backend/python/vllm \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/mamba \
make -C backend/python/mamba \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
make -C backend/python/sentencetransformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
make -C backend/python/transformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
make -C backend/python/vall-e-x \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
make -C backend/python/exllama \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \
make -C backend/python/exllama2 \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
make -C backend/python/petals \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
make -C backend/python/transformers-musicgen \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/coqui \
make -C backend/python/coqui \
; fi

# Make sure the models directory exists
Expand Down
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -557,3 +557,10 @@ docker-image-intel:
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .

docker-image-intel-xpu:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
7 changes: 7 additions & 0 deletions backend/python/common-env/transformers/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ ifeq ($(BUILD_TYPE), hipblas)
CONDA_ENV_PATH = "transformers-rocm.yml"
endif

# Intel GPU are supposed to have dependencies installed in the main python
# environment, so we skip conda installation for SYCL builds.
# https://github.com/intel/intel-extension-for-pytorch/issues/538
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export SKIP_CONDA=1
endif

.PHONY: transformers
transformers:
@echo "Installing $(CONDA_ENV_PATH)..."
Expand Down
34 changes: 24 additions & 10 deletions backend/python/common-env/transformers/install.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,38 @@
#!/bin/bash
set -ex

SKIP_CONDA=${SKIP_CONDA:-0}

# Check if environment exist
conda_env_exists(){
! conda list --name "${@}" >/dev/null 2>/dev/null
}

if conda_env_exists "transformers" ; then
echo "Creating virtual environment..."
conda env create --name transformers --file $1
echo "Virtual environment created."
else
echo "Virtual environment already exists."
if [ $SKIP_CONDA -eq 1 ]; then
echo "Skipping conda environment installation"
else
export PATH=$PATH:/opt/conda/bin
if conda_env_exists "transformers" ; then
echo "Creating virtual environment..."
conda env create --name transformers --file $1
echo "Virtual environment created."
else
echo "Virtual environment already exists."
fi
fi

if [ "$PIP_CACHE_PURGE" = true ] ; then
export PATH=$PATH:/opt/conda/bin
if [ -d "/opt/intel" ]; then
# Intel GPU: If the directory exists, we assume we are using the intel image
# (no conda env)
# https://github.com/intel/intel-extension-for-pytorch/issues/538
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed
fi

# Activate conda environment
source activate transformers
if [ "$PIP_CACHE_PURGE" = true ] ; then
if [ $SKIP_CONDA -eq 0 ]; then
# Activate conda environment
source activate transformers
fi

pip cache purge
fi
7 changes: 7 additions & 0 deletions backend/python/diffusers/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ ifeq ($(BUILD_TYPE), hipblas)
export CONDA_ENV_PATH = "diffusers-rocm.yml"
endif

# Intel GPU are supposed to have dependencies installed in the main python
# environment, so we skip conda installation for SYCL builds.
# https://github.com/intel/intel-extension-for-pytorch/issues/538
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export SKIP_CONDA=1
endif

.PHONY: diffusers
diffusers:
@echo "Installing $(CONDA_ENV_PATH)..."
Expand Down
25 changes: 19 additions & 6 deletions backend/python/diffusers/backend_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,26 @@
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
from diffusers.pipelines.stable_diffusion import safety_checker
from diffusers.utils import load_image,export_to_video
from compel import Compel
from compel import Compel, ReturnedEmbeddingsType

from transformers import CLIPTextModel
from safetensors.torch import load_file


_ONE_DAY_IN_SECONDS = 60 * 60 * 24
COMPEL=os.environ.get("COMPEL", "1") == "1"
COMPEL=os.environ.get("COMPEL", "0") == "1"
XPU=os.environ.get("XPU", "0") == "1"
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
FPS=os.environ.get("FPS", "7")
DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
FRAMES=os.environ.get("FRAMES", "64")

if XPU:
import intel_extension_for_pytorch as ipex
print(ipex.xpu.get_device_name(0))

# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))

Expand Down Expand Up @@ -231,8 +236,13 @@ def LoadModel(self, request, context):
if request.SchedulerType != "":
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)

if not self.img2vid:
self.compel = Compel(tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder)
if COMPEL:
self.compel = Compel(
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ],
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True]
)


if request.ControlNet:
Expand All @@ -247,6 +257,8 @@ def LoadModel(self, request, context):
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
# Assume directory from request.ModelFile.
# Only if request.LoraAdapter it's not an absolute path
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
Expand Down Expand Up @@ -386,8 +398,9 @@ def GenerateImage(self, request, context):

image = {}
if COMPEL:
conditioning = self.compel.build_conditioning_tensor(prompt)
kwargs["prompt_embeds"]= conditioning
conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
kwargs["prompt_embeds"] = conditioning
kwargs["pooled_prompt_embeds"] = pooled
# pass the kwargs dictionary to the self.pipe method
image = self.pipe(
guidance_scale=self.cfg_scale,
Expand Down
46 changes: 36 additions & 10 deletions backend/python/diffusers/install.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,50 @@
#!/bin/bash
set -ex

SKIP_CONDA=${SKIP_CONDA:-0}

# Check if environment exist
conda_env_exists(){
! conda list --name "${@}" >/dev/null 2>/dev/null
}

if conda_env_exists "diffusers" ; then
echo "Creating virtual environment..."
conda env create --name diffusers --file $1
echo "Virtual environment created."
else
echo "Virtual environment already exists."
if [ $SKIP_CONDA -eq 1 ]; then
echo "Skipping conda environment installation"
else
export PATH=$PATH:/opt/conda/bin
if conda_env_exists "diffusers" ; then
echo "Creating virtual environment..."
conda env create --name diffusers --file $1
echo "Virtual environment created."
else
echo "Virtual environment already exists."
fi
fi

if [ "$PIP_CACHE_PURGE" = true ] ; then
export PATH=$PATH:/opt/conda/bin
if [ -d "/opt/intel" ]; then
# Intel GPU: If the directory exists, we assume we are using the Intel image
# https://github.com/intel/intel-extension-for-pytorch/issues/538
pip install torch==2.1.0a0 \
torchvision==0.16.0a0 \
torchaudio==2.1.0a0 \
intel-extension-for-pytorch==2.1.10+xpu \
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

pip install google-api-python-client \
grpcio \
grpcio-tools \
diffusers==0.24.0 \
transformers>=4.25.1 \
accelerate \
compel==2.0.2 \
Pillow
fi

# Activate conda environment
source activate diffusers
if [ "$PIP_CACHE_PURGE" = true ] ; then
if [ $SKIP_CONDA -ne 1 ]; then
# Activate conda environment
source activate diffusers
fi

pip cache purge
fi
13 changes: 9 additions & 4 deletions backend/python/diffusers/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@
##
## A bash script wrapper that runs the diffusers server with conda

export PATH=$PATH:/opt/conda/bin

# Activate conda environment
source activate diffusers
if [ -d "/opt/intel" ]; then
# Assumes we are using the Intel oneAPI container image
# https://github.com/intel/intel-extension-for-pytorch/issues/538
export XPU=1
else
export PATH=$PATH:/opt/conda/bin
# Activate conda environment
source activate diffusers
fi

# get the directory where the bash script is located
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
Expand Down
5 changes: 5 additions & 0 deletions backend/python/exllama/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ set -ex

export PATH=$PATH:/opt/conda/bin

if [ "$BUILD_TYPE" != "cublas" ]; then
echo "[exllama] Attention!!! Nvidia GPU is required - skipping installation"
exit 0
fi

# Check if environment exist
conda_env_exists(){
! conda list --name "${@}" >/dev/null 2>/dev/null
Expand Down
8 changes: 6 additions & 2 deletions backend/python/exllama2/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
set -e
##
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
export PATH=$PATH:/opt/conda/bin
export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f

# Activate conda environment
if [ "$BUILD_TYPE" != "cublas" ]; then
echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation"
exit 0
fi

export PATH=$PATH:/opt/conda/bin
source activate transformers

echo $CONDA_PREFIX
Expand Down
Loading

0 comments on commit 5d10184

Please sign in to comment.