Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5dbefd6
[SW-224648] Redirect test logs to file (#1017)
bmyrcha Apr 8, 2025
ff61f89
[SW-224648] Fix test logs redirection (#1027)
bmyrcha Apr 9, 2025
b92af9c
[SW-225233] Adjust method of getting synapse_build (#1045)
bmyrcha Apr 9, 2025
5a9ddfd
Implement Pipeline Parallelism support for HPU. (#1000) (#1040)
jmaksymc Apr 10, 2025
ed47e1e
[1.21 cherry-pick] Fix async callback ordering (#1023) (#1028)
madamczyk-intel Apr 10, 2025
9a06a89
[1.21 cherry-pick] Make lazy mode autodetection more robust (#1038)
madamczyk-intel Apr 10, 2025
035db32
APC - Remove prompt attn with context and use existing implementation…
adobrzyn Apr 11, 2025
b576015
Cherry pick exponential bucketing integration from #642 (#1067)
kzawora-intel Apr 12, 2025
2edff28
[deepseek r1] HPU support for deepseek (#1030)
xuechendi Apr 15, 2025
4445dca
Modify RobertaEmbedding forward as custom op method (#1049)
yeonsily Apr 16, 2025
b3c3a2f
Fix embedding model accuracy issue when merged prefill is enabled (#1…
libinta Apr 16, 2025
43b3e15
[SW-226128]Disable mark scales as const & add `dist.barrier` only for…
yiliu30 Apr 16, 2025
5d30a8f
[1.21.0 cherry-pick] Synchronize vLLM flags to support cross-node inf…
afierka-intel Apr 16, 2025
c46e620
[SW-225980] Allow to skip pytest for non-code related changes (#1093)
bmyrcha Apr 17, 2025
b2955df
[1.21.0 cherry-pick] Set VLLM_T_COMPILE_FULLGRAPH=False in CI multi-m…
afierka-intel Apr 17, 2025
377d0f9
[1.21.0 cherry-pick] Enable APC pre-merge tests to compile test suite…
afierka-intel Apr 17, 2025
5df67e7
[SW-224431] Fix fp8 measurement for mixtral (#1119)
kwisniewski98 Apr 18, 2025
b63079a
[WIP] 1.21 docs update (#1080)
PatrykWo Apr 18, 2025
1ee6b61
Update hpu_worker.py (#943)
michalkuligowski Apr 18, 2025
beaeec5
Update requirements-hpu.txt (#1123)
afierka-intel Apr 22, 2025
d285a39
[1.21 cherry-pick] Restore fsdpa calibration (#1087)
madamczyk-intel Apr 23, 2025
91a143a
Update CODEOWNERS (#1139)
michalkuligowski Apr 23, 2025
da859c0
Michalkuligowski patch update workflows (#1019)
michalkuligowski Apr 23, 2025
bd508fa
Add in Dockerfile.hpu.ubi (#1118)
AnetaKaczynska Apr 28, 2025
765b0c8
Fix the llama3.2-11b/90b accuracy drop issue. (#1175)
libinta Apr 30, 2025
d0754d6
[SW-226779]Fix attribute not found issue (#1160)
xuechendi May 5, 2025
7461f4a
Update README_GAUDI.md 1.21.0 (#1196)
anastasiauvarovaintel May 6, 2025
e7b5689
Update links and tags for 1.21.0 release (#1204)
bartekkuncer May 7, 2025
b208380
Removed OS specification from requirements list (#1221)
PatrykWo May 7, 2025
0275ce4
Final update of models 1.21. (#1231)
PatrykWo May 8, 2025
aa9e006
Add Qwen2.5-Omni thinker
wenbinc-Bin Apr 7, 2025
719a4ef
Optimize qwen2.5vl phase2
yingjie-han Apr 30, 2025
703fd42
Porting to Qwen2.5-Omni
yingjie-han May 8, 2025
71b0079
Qwen2.5VL/Omni: Pad W and H
wenbinc-Bin May 14, 2025
bbfdce4
Fix iteration bug introduced by transformers
wenbinc-Bin May 19, 2025
6884a94
Fix bug that multi-modal model fails on eager mode
wenbinc-Bin May 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# See https://help.github.com/articles/about-codeowners/
# for more info about CODEOWNERS file

* @kzawora-intel @madamczykhabana @michalkuligowski @mgawarkiewicz @vivekgoe @afierka-intel
* @kzawora-intel @madamczyk-intel @michalkuligowski @mgawarkiewicz-intel @vivekgoe @afierka-intel
7 changes: 5 additions & 2 deletions .github/actionlint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@ paths:
.github/workflows/trigger_jenkins.yml:
ignore:
- shellcheck reported issue in this script: SC2116:.+
- shellcheck reported issue in this script: SC2086:.+
- shellcheck reported issue in this script: SC2001:.+
- shellcheck reported issue in this script: SC2086:.+
- shellcheck reported issue in this script: SC2001:.+
.github/workflows/skip_gaudi_tests.yml:
ignore:
- shellcheck reported issue in this script: SC2086:.+
21 changes: 0 additions & 21 deletions .github/workflows/add_label_automerge.yml

This file was deleted.

10 changes: 2 additions & 8 deletions .github/workflows/cpu-test.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
name: cpu-test

on:
# Trigger the workflow on push or pull request,
# but only for the habana_main branch
push:
branches:
- habana_main
pull_request:
branches:
- habana_main

push:
branches: [main]

jobs:
cputest:
Expand Down
79 changes: 79 additions & 0 deletions .github/workflows/skip_gaudi_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
name: Skip Gaudi Tests
on:
issue_comment:
types: [created]

permissions:
pull-requests: write
statuses: write
actions: read
jobs:
read_codeowners:
name: Check Commenter
runs-on: generic-runner
if: ${{ contains(github.event.comment.body, '/skip-gaudi-tests') && github.event.issue.pull_request }}
outputs:
pr_sha: ${{ steps.extract_pr.outputs.pr_sha }}
steps:
- name: 'Checkout Repository'
uses: actions/checkout@v4
with:
ref: habana_main
fetch-depth: 0
token: ${{ secrets.GH_PAT }}
- name: Parse Comment
run: |
MAINTAINERS=$(grep -Eh '^[^#]' .github/CODEOWNERS | tr -d '@*' | tr '\n' ' ')
COMMENTER=${{ github.event.comment.user.login }}
echo "Maintainers are: ${MAINTAINERS}"
echo "Commenter Is: ${COMMENTER}"
if ! echo "$MAINTAINERS" | grep -q "$COMMENTER"; then
echo "❌ User $COMMENTER is not authorized to trigger tests."
exit 1
fi
- name: Extract PR Sha
id: extract_pr
run: |
pr_sha=$(curl -sH "Authorization: token ${{ secrets.GH_PAT }}" https://api.github.com/repos/${{github.repository}}/pulls/${{ github.event.issue.number }} | jq -r '.head.sha')
echo "pr_sha=$pr_sha" >> $GITHUB_OUTPUT
Summarize:
name: Summarize Test Results
runs-on: generic-runner
needs: [read_codeowners]
if: always() && !contains(fromJSON('["skipped","cancelled"]'), needs.read_codeowners.result)
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.GH_PAT }}
- name: Create Commit Status(Success)
uses: actions/github-script@v7
if: success()
env:
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
with:
script: |
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: process.env.GIT_SHA,
state: 'success',
description: 'Tests have been skipped!',
context: 'Summarize Test Results'
});
- name: Create Commit Status(Failure)
uses: actions/github-script@v7
if: failure()
env:
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
with:
script: |
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: process.env.GIT_SHA,
state: 'failure',
description: 'Test Failure! Check Jobs To See Why',
context: 'Summarize Test Results'
});
20 changes: 16 additions & 4 deletions .github/workflows/trigger_jenkins.yml
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,19 @@ jobs:
RELEASED_SYNAPSE_VERSION: ${{ vars.RELEASED_SYNAPSE_VERSION }}
BASE_BRANCH: ${{ needs.read_codeowners.outputs.pr_branch }}
run: |
version_regex='^v([0-9]+)\.([0-9]+)\.([0-9]+)$'
LOG_REDIRECTION="\&>"
version_regex='^v([0-9]+)\.([0-9]+)\.([0-9]+)_next$'
if [[ $TARGET_BRANCH =~ $version_regex ]]; then
synapse_version=${TARGET_BRANCH#v}
synapse_version=${synapse_version%_*}
synapse_build_endpoint="https://dms.habana-labs.com/api/v1.1/guide/info/${synapse_version}/latest?type=docker-pt"
else
synapse_version=${RELEASED_SYNAPSE_VERSION#v}
LOG_REDIRECTION="2>\&1 \| tee"
synapse_build_endpoint="https://dms.habana-labs.com/api/v1.1/branch/info/v${synapse_version}"
fi
echo "Using SynapseAI version ${synapse_version}"
synapse_build=$(curl "https://dms.habana-labs.com/api/v1.1/branch/info/v$synapse_version" | jq -r ".release_id")
echo "Using SynapseAI version ${synapse_version}"
synapse_build=$(curl "${synapse_build_endpoint}" | jq -r ".release_id")
pt_version=${{ vars.PT_VERSION }}
BUILD_TAG="Github-vLLM-Fork-${{ github.event.number }}-${{github.run_number}}"
safe_cmd=${TEST_COMMAND//&/\\&}
Expand All @@ -239,17 +244,24 @@ jobs:
sed -i "s/##PYTORCH_VERSION##/${pt_version}/g" pod.yml
sed -i "s|##GIT_BRANCH##|$BASE_BRANCH|g" pod.yml
sed -i "s|##CMD##|$safe_cmd|g" pod.yml
sed -i "s|##LOG_REDIRECTION##|$LOG_REDIRECTION|g" pod.yml
echo "Pod Template Created"
- name: Run Test
run: |
random_string=$(tr -dc 'a-z0-9' </dev/urandom | head -c 10)
pod_name="vllm-fork-${{github.event.issue.number}}-${random_string}"
set +e
hlctl create containers \
--file=pod.yml \
--flavor=${{ matrix.tests.flavor}} \
--name="vllm-fork-${{github.event.issue.number}}-${random_string}" \
--name="${pod_name}" \
--namespace="framework" \
--retry \
--shm=10240
test_status=$?
set -e
echo "Logs are available at https://logs-browser.k8s-infra.habana-labs.com/files/${pod_name}-tfjob"
exit $test_status
- name: Create Commit Status(Failure)
uses: actions/github-script@v7
if: failure()
Expand Down
16 changes: 12 additions & 4 deletions .jenkins/test_config_t_compile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ stages:
- name: v1_gsm8k_small_g2_tp2
flavor: g2.s
command: export PT_HPU_LAZY_MODE=0 && export VLLM_T_COMPILE_FULLGRAPH=True && export VLLM_USE_V1=1 && export VLLM_CONTIGUOUS_PA=false && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2
- name: test_gsm8k_small_models_apc
steps:
- name: gsm8k_small_g3_tp1_apc
flavor: g3
command: export PT_HPU_LAZY_MODE=0 && export VLLM_CONTIGUOUS_PA=false && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
- name: gsm8k_small_g2_tp1_apc
flavor: g2
command: export PT_HPU_LAZY_MODE=0 && export VLLM_CONTIGUOUS_PA=false && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
- name: test_gsm8k_large_models
steps:
- name: v0_gsm8k_large_g3_tp2
Expand Down Expand Up @@ -124,24 +132,24 @@ stages:
flavor: g3
command: >
cd .jenkins/vision &&
VLLM_T_COMPILE_FULLGRAPH=True PT_HPU_LAZY_MODE=0
VLLM_T_COMPILE_FULLGRAPH=False PT_HPU_LAZY_MODE=0
bash run-tests.sh -c configs/models-small.txt -t 1
- name: multimodal_small_g3_tp2
flavor: g3.s
command: >
cd .jenkins/vision &&
VLLM_T_COMPILE_FULLGRAPH=True PT_HPU_LAZY_MODE=0
VLLM_T_COMPILE_FULLGRAPH=False PT_HPU_LAZY_MODE=0
bash run-tests.sh -c configs/models-small.txt -t 2
- name: multimodal_small_g3_tp1_mss
flavor: g3
command: >
cd .jenkins/vision && VLLM_T_COMPILE_FULLGRAPH=True PT_HPU_LAZY_MODE=0
cd .jenkins/vision && VLLM_T_COMPILE_FULLGRAPH=False PT_HPU_LAZY_MODE=0
bash run-tests.sh -c configs/models-mss.txt -t 1
- name: multimodal_small_g3_tp2_mss
flavor: g3.s
command: >
cd .jenkins/vision &&
VLLM_T_COMPILE_FULLGRAPH=True PT_HPU_LAZY_MODE=0
VLLM_T_COMPILE_FULLGRAPH=False PT_HPU_LAZY_MODE=0
bash run-tests.sh -c configs/models-mss.txt -t 2
- name: tests_int4_quantization
steps:
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.hpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest

COPY ./ /workspace/vllm

Expand Down
101 changes: 101 additions & 0 deletions Dockerfile.hpu.ubi
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
ARG BASE_IMAGE=vault.habana.ai/gaudi-docker/1.21.0/rhel9.4/habanalabs/pytorch-installer-2.6.0:latest
FROM ${BASE_IMAGE} as habana-base

USER root

ENV VLLM_TARGET_DEVICE="hpu"

RUN dnf -y update --best --allowerasing --skip-broken && dnf clean all

WORKDIR /workspace

## Python Installer #################################################################
FROM habana-base as python-install

ARG PYTHON_VERSION=3.11

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN dnf install -y --setopt=install_weak_deps=0 --nodocs \
python${PYTHON_VERSION}-wheel && \
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV --system-site-packages && pip install --no-cache -U pip wheel && dnf clean all

## Python Habana base #################################################################
FROM python-install as python-habana-base

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

# install Habana Software and common dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
--mount=type=bind,source=requirements-hpu.txt,target=requirements-hpu.txt \
pip install \
-r requirements-hpu.txt

## Builder #####################################################################
FROM python-habana-base AS build

# install build dependencies

# copy input files
COPY csrc csrc
COPY setup.py setup.py
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
COPY requirements-common.txt requirements-common.txt
COPY requirements-hpu.txt requirements-hpu.txt
COPY pyproject.toml pyproject.toml

# max jobs used by Ninja to build extensions
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
# # make sure punica kernels are built (for LoRA)
# HPU currently doesn't support LoRA
# ENV VLLM_INSTALL_PUNICA_KERNELS=1

# Copy the entire directory before building wheel
COPY vllm vllm

ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,src=.git,target=/workspace/.git \
env CFLAGS="-march=haswell" \
CXXFLAGS="$CFLAGS $CXXFLAGS" \
CMAKE_BUILD_TYPE=Release \
python3 setup.py bdist_wheel --dist-dir=dist

## Release #####################################################################
FROM python-install AS vllm-openai

WORKDIR /workspace

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH=$VIRTUAL_ENV/bin/:$PATH

# Triton needs a CC compiler
RUN dnf install -y --setopt=install_weak_deps=0 --nodocs gcc \
&& dnf clean all

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
--mount=type=cache,target=/root/.cache/pip \
pip install $(echo dist/*.whl)'[tensorizer]' --verbose

ENV HF_HUB_OFFLINE=1 \
PORT=8000 \
HOME=/home/vllm \
VLLM_USAGE_SOURCE=production-docker-image

# setup non-root user for OpenShift
# In OpenShift the user ID is randomly assigned, for compatibility we also
# set up a non-root user here.
RUN umask 002 \
&& useradd --uid 2000 --gid 0 vllm \
&& chmod g+rwx $HOME /usr/src /workspace

COPY LICENSE /licenses/vllm.md

USER 2000
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Loading