Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
1668192
chore: add fork OWNERS
z103cb Apr 30, 2024
cc99216
add ubi Dockerfile
dtrifiro May 21, 2024
d15b373
Dockerfile.ubi: remove references to grpc/protos
dtrifiro May 21, 2024
bc7dccc
Dockerfile.ubi: use vllm-tgis-adapter
dtrifiro May 28, 2024
adc357d
gha: add sync workflow
dtrifiro Jun 3, 2024
e9a9553
Dockerfile.ubi: use distributed-executor-backend=mp as default
dtrifiro Jun 10, 2024
43c7876
Dockerfile.ubi: remove vllm-nccl workaround
dtrifiro Jun 13, 2024
2648a1f
Dockerfile.ubi: add missing requirements-*.txt bind mounts
dtrifiro Jun 18, 2024
510aa47
add triton CustomCacheManger
tdoublep May 29, 2024
910a985
gha: sync-with-upstream workflow create PRs as draft
dtrifiro Jun 19, 2024
3a99c2d
add smoke/unit tests scripts
dtrifiro Jun 19, 2024
f29efce
extras: exit unit tests on err
dtrifiro Jun 20, 2024
6efc7b0
Dockerfile.ubi: misc improvements
dtrifiro May 28, 2024
3bb9e9f
update OWNERS
dtrifiro Jun 21, 2024
88a0456
Dockerfile.ubi: use tensorizer (#64)
prashantgupta24 Jun 25, 2024
e15634d
Dockerfile.ubi: pin vllm-tgis-adapter to 0.1.2
dtrifiro Jun 26, 2024
b2fd1af
gha: fix fetch step in upstream sync workflow
dtrifiro Jul 2, 2024
fd4204b
gha: always update sync workflow PR body/title
dtrifiro Jul 2, 2024
8551e8f
Dockerfile.ubi: bump vllm-tgis-adapter to 0.1.3
dtrifiro Jul 3, 2024
5fe6a00
Dockerfile.ubi: get rid of --distributed-executor-backend=mp
dtrifiro Jul 10, 2024
f9ae74b
Dockerfile.ubi: add flashinfer
dtrifiro Jul 9, 2024
280bc9f
pin adapter to 2.0.0
prashantgupta24 Jul 12, 2024
b92b6d6
deps: bump flashinfer to 0.0.9
dtrifiro Jul 15, 2024
afd1436
Update OWNERS with IBM folks
heyselbi Jun 27, 2024
1a74d61
Dockerfile.ubi: bind mount .git dir to allow inclusion of git commit …
dtrifiro Jul 17, 2024
d05d51f
gha: remove reminder_comment
dtrifiro Jul 17, 2024
97cd508
Dockerfile: bump vllm-tgis-adapter to 0.2.1
dtrifiro Jul 18, 2024
08a7f70
fix: update setup.py to differentiate between fork and upstream
nathan-weinberg Jul 18, 2024
242ea7e
Dockerfile.ubi: properly mount .git dir
dtrifiro Jul 19, 2024
76aa5cf
Revert "[CI/Build] fix: update setup.py to differentiate between fork…
dtrifiro Jul 19, 2024
61207a7
Dockerfile.ubi: bump vllm-tgis-adapter to 0.2.2
dtrifiro Jul 19, 2024
3c182aa
gha: remove unused upstream workflows
dtrifiro Jul 23, 2024
d379e0a
deps: bump vllm-tgis-adapter to 0.2.3
dtrifiro Jul 24, 2024
7a21f52
Dockerfile.ubi: get rid of custom cache manager
dtrifiro Jul 24, 2024
5cb3a9c
Dockerfile.ubi: use uv to install dependencies
dtrifiro May 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions .github/workflows/add_label_automerge.yml

This file was deleted.

23 changes: 0 additions & 23 deletions .github/workflows/add_label_ready_comment.yml

This file was deleted.

21 changes: 0 additions & 21 deletions .github/workflows/reminder_comment.yml

This file was deleted.

84 changes: 84 additions & 0 deletions .github/workflows/sync-with-upstream.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: "Sync with upstream"

on:
schedule:
- cron: 20 4 * * *

workflow_dispatch:


env:
# repo to fetch changes from
UPSTREAM_REPO: vllm-project/vllm
# branch to sync
BRANCH: main

jobs:
upstream-sync:
name: Sync with upstream
runs-on: ubuntu-latest
permissions:
pull-requests: write
contents: write

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Fetch upstream repo
run: |
git remote add upstream https://github.com/${UPSTREAM_REPO}
git fetch upstream

- name: Check diff
id: diff
shell: bash
run: |
echo 'diff<<EOF' >> $GITHUB_OUTPUT
git diff --stat upstream/${BRANCH} | tee -a >(cat >> $GITHUB_OUTPUT)
echo 'EOF' >> $GITHUB_OUTPUT

- name: Create PR
if: ${{ steps.diff.outputs.diff != '' }}
env:
GH_TOKEN: ${{ github.token }}
run: |
set -xeu

git_hash="$(git rev-parse upstream/${BRANCH})"
echo "git_hash=$git_hash" >> $GITHUB_OUTPUT
git_describe="$(git describe --tags upstream/${BRANCH})"
echo "git_describe=$git_describe" >> $GITHUB_OUTPUT

# echo 'commits<<EOF' >> $GITHUB_OUTPUT
# git log --oneline ..upstream/${BRANCH} >> $GITHUB_OUTPUT
# echo 'EOF' >> $GITHUB_OUTPUT

upstream_url="https://github.com/${UPSTREAM_REPO}"
upstream_branch="$upstream_url/tree/${BRANCH}"

title="Sync with upstream@${git_describe}"
body="Merge [${UPSTREAM_REPO}]($upstream_url):[${BRANCH}]($upstream_branch)@[${git_describe}](${upstream_url}/commit/$git_hash) into $BRANCH"

gh repo set-default $GITHUB_REPOSITORY
pr_number=$(gh pr list -S "Sync with upstream@" --json number --jq '.[0].number')

if [[ -z $pr_number ]]; then
echo "Creating PR"
gh pr create \
--head $(echo $UPSTREAM_REPO | sed 's|/|:|g'):${BRANCH} \
--base ${BRANCH} \
--label code-sync \
--title "$title" \
--body "$body" \
--draft \
--no-maintainer-edit
exit 0
fi

echo "Updating PR \#${pr_number}"
gh pr edit \
$pr_number \
--body "$body" \
--title "$title"
202 changes: 202 additions & 0 deletions Dockerfile.ubi
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
## Global Args #################################################################
ARG BASE_UBI_IMAGE_TAG=9.4
ARG PYTHON_VERSION=3.11

ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"

## Base Layer ##################################################################
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
ARG PYTHON_VERSION

RUN microdnf install -y \
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
&& microdnf clean all

WORKDIR /workspace

ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8

# Some utils for dev purposes - tar required for kubectl cp
RUN microdnf install -y \
which procps findutils tar vim git\
&& microdnf clean all


## Python Installer ############################################################
FROM base as python-install

ARG PYTHON_VERSION

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN microdnf install -y \
python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U uv pip wheel && microdnf clean all


## CUDA Base ###################################################################
FROM python-install as cuda-base

RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo

RUN microdnf install -y \
cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
microdnf clean all

ENV CUDA_HOME="/usr/local/cuda" \
PATH="${CUDA_HOME}/bin:${PATH}" \
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"

## Python cuda base #################################################################
FROM cuda-base AS python-cuda-base

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

# install cuda and common dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
uv pip install \
-r requirements-cuda.txt

## Development #################################################################
FROM python-cuda-base AS dev

# install build and runtime dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
--mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
--mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \
--mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \
uv pip install \
-r requirements-cuda.txt \
-r requirements-dev.txt

## Builder #####################################################################
FROM dev AS build

# install build dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
uv pip install -r requirements-build.txt

# install compiler cache to speed up compilation leveraging local or remote caching
# git is required for the cutlass kernels
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all
# install build dependencies

# copy input files
COPY csrc csrc
COPY setup.py setup.py
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
COPY pyproject.toml pyproject.toml

ARG TORCH_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST

# max jobs used by Ninja to build extensions
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
# make sure punica kernels are built (for LoRA)
ENV VLLM_INSTALL_PUNICA_KERNELS=1

# Make sure the cuda environment is in the PATH
ENV PATH=/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

# Copy the entire directory before building wheel
COPY vllm vllm

ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,src=.git,target=/workspace/.git \
env CFLAGS="-march=haswell" \
CXXFLAGS="$CFLAGS $CXXFLAGS" \
CMAKE_BUILD_TYPE=Release \
python3 setup.py bdist_wheel --dist-dir=dist

#################### libsodium Build IMAGE ####################
FROM base as libsodium-builder

RUN microdnf install -y gcc gzip \
&& microdnf clean all

WORKDIR /usr/src/libsodium

ARG LIBSODIUM_VERSION=1.0.20
RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \
&& tar -xzvf libsodium*.tar.gz \
&& rm -f libsodium*.tar.gz \
&& mv libsodium*/* ./

RUN ./configure --prefix="/usr/" && make && make check

## Release #####################################################################
FROM python-install AS vllm-openai

WORKDIR /workspace

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH=$VIRTUAL_ENV/bin/:$PATH

# Triton needs a CC compiler
RUN microdnf install -y gcc \
&& microdnf clean all

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
--mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
uv pip install $(echo dist/*.whl)'[tensorizer]' --verbose

# Install libsodium for Tensorizer encryption
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
cd /usr/src/libsodium \
&& make install

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp311-cp311-linux_x86_64.whl

ENV HF_HUB_OFFLINE=1 \
PORT=8000 \
HOME=/home/vllm \
VLLM_USAGE_SOURCE=production-docker-image \
VLLM_WORKER_MULTIPROC_METHOD=fork

# setup non-root user for OpenShift
RUN umask 002 \
&& useradd --uid 2000 --gid 0 vllm \
&& chmod g+rwx $HOME /usr/src /workspace

COPY LICENSE /licenses/vllm.md

USER 2000
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]


FROM vllm-openai as vllm-grpc-adapter

USER root

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
uv pip install vllm-tgis-adapter==0.2.3

ENV GRPC_PORT=8033
USER 2000
ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter"]
28 changes: 28 additions & 0 deletions OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
approvers:
- dtrifiro
- fialhocoelho
- heyselbi
- joerunde
- maxdebayser
- njhill
- prashantgupta24
- RH-steve-grubb
- rpancham
- terrytangyuan
- vaibhavjainwiz
- z103cb
- Xaenalt
reviewers:
- dtrifiro
- fialhocoelho
- heyselbi
- joerunde
- maxdebayser
- njhill
- prashantgupta24
- RH-steve-grubb
- rpancham
- terrytangyuan
- vaibhavjainwiz
- Xaenalt
- z103cb
Loading