HabanaAI · wenbinc-Bin · Apr 8, 2025 · Apr 9, 2025 · Apr 9, 2025 · Apr 10, 2025
@@ -1,4 +1,4 @@
 # See https://help.github.com/articles/about-codeowners/
 # for more info about CODEOWNERS file
 
-* @kzawora-intel @madamczykhabana @michalkuligowski @mgawarkiewicz @vivekgoe @afierka-intel
+* @kzawora-intel @madamczyk-intel @michalkuligowski @mgawarkiewicz-intel @vivekgoe @afierka-intel
@@ -6,5 +6,8 @@ paths:
   .github/workflows/trigger_jenkins.yml:
     ignore:
       - shellcheck reported issue in this script: SC2116:.+
-      - shellcheck reported issue in this script: SC2086:.+ 
-      - shellcheck reported issue in this script: SC2001:.+    
+      - shellcheck reported issue in this script: SC2086:.+
+      - shellcheck reported issue in this script: SC2001:.+
+  .github/workflows/skip_gaudi_tests.yml:
+    ignore:
+      - shellcheck reported issue in this script: SC2086:.+
@@ -1,15 +1,9 @@
 name: cpu-test
 
 on:
-  # Trigger the workflow on push or pull request,
-  # but only for the habana_main branch
-  push:
-    branches:
-      - habana_main
   pull_request:
-    branches:
-      - habana_main
-
+  push:
+    branches: [main]
 
 jobs:
   cputest:

@@ -0,0 +1,79 @@
+name: Skip Gaudi Tests
+on:
+  issue_comment:
+    types: [created]
+
+permissions:
+    pull-requests: write
+    statuses: write
+    actions: read
+jobs:
+  read_codeowners:
+    name: Check Commenter
+    runs-on: generic-runner
+    if: ${{ contains(github.event.comment.body, '/skip-gaudi-tests') && github.event.issue.pull_request }}
+    outputs:
+      pr_sha: ${{ steps.extract_pr.outputs.pr_sha }}
+    steps:
+      - name: 'Checkout Repository'
+        uses: actions/checkout@v4
+        with:
+          ref: habana_main
+          fetch-depth: 0
+          token: ${{ secrets.GH_PAT }}
+      - name: Parse Comment
+        run: |
+          MAINTAINERS=$(grep -Eh '^[^#]' .github/CODEOWNERS | tr -d '@*' | tr '\n' ' ')
+          COMMENTER=${{ github.event.comment.user.login }}
+          echo "Maintainers are: ${MAINTAINERS}"
+          echo "Commenter Is: ${COMMENTER}"
+          if ! echo "$MAINTAINERS" | grep -q "$COMMENTER"; then
+            echo "❌ User $COMMENTER is not authorized to trigger tests."
+            exit 1
+          fi
+      - name: Extract PR Sha
+        id: extract_pr
+        run: |
+            pr_sha=$(curl -sH "Authorization: token ${{ secrets.GH_PAT }}" https://api.github.com/repos/${{github.repository}}/pulls/${{ github.event.issue.number }} | jq -r '.head.sha')
+            echo "pr_sha=$pr_sha" >> $GITHUB_OUTPUT
+  Summarize:
+      name: Summarize Test Results
+      runs-on: generic-runner
+      needs: [read_codeowners]
+      if: always() && !contains(fromJSON('["skipped","cancelled"]'), needs.read_codeowners.result)
+      steps:
+        - name: Checkout Repository
+          uses: actions/checkout@v4
+          with:
+            fetch-depth: 0
+            token: ${{ secrets.GH_PAT }}
+        - name: Create Commit Status(Success)
+          uses: actions/github-script@v7
+          if: success()
+          env:
+            GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
+          with:
+            script: |
+              await github.rest.repos.createCommitStatus({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                sha:  process.env.GIT_SHA,
+                state: 'success',
+                description: 'Tests have been skipped!',
+                context: 'Summarize Test Results'
+              });
+        - name: Create Commit Status(Failure)
+          uses: actions/github-script@v7
+          if: failure()
+          env:
+            GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
+          with:
+            script: |
+              await github.rest.repos.createCommitStatus({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                sha:  process.env.GIT_SHA,
+                state: 'failure',
+                description: 'Test Failure! Check Jobs To See Why',
+                context: 'Summarize Test Results'
+              });
@@ -220,14 +220,19 @@ jobs:
             RELEASED_SYNAPSE_VERSION: ${{ vars.RELEASED_SYNAPSE_VERSION }}
             BASE_BRANCH: ${{ needs.read_codeowners.outputs.pr_branch }}      
           run: |
-            version_regex='^v([0-9]+)\.([0-9]+)\.([0-9]+)$'
+            LOG_REDIRECTION="\&>"
+            version_regex='^v([0-9]+)\.([0-9]+)\.([0-9]+)_next$'
             if [[ $TARGET_BRANCH =~ $version_regex ]]; then
               synapse_version=${TARGET_BRANCH#v}
+              synapse_version=${synapse_version%_*}
+              synapse_build_endpoint="https://dms.habana-labs.com/api/v1.1/guide/info/${synapse_version}/latest?type=docker-pt"
             else
               synapse_version=${RELEASED_SYNAPSE_VERSION#v}
+              LOG_REDIRECTION="2>\&1 \| tee"
+              synapse_build_endpoint="https://dms.habana-labs.com/api/v1.1/branch/info/v${synapse_version}"
             fi
-            echo "Using SynapseAI version ${synapse_version}"            
-            synapse_build=$(curl "https://dms.habana-labs.com/api/v1.1/branch/info/v$synapse_version" | jq -r ".release_id")
+            echo "Using SynapseAI version ${synapse_version}"
+            synapse_build=$(curl "${synapse_build_endpoint}" | jq -r ".release_id")
             pt_version=${{ vars.PT_VERSION }}
             BUILD_TAG="Github-vLLM-Fork-${{ github.event.number }}-${{github.run_number}}"
             safe_cmd=${TEST_COMMAND//&/\\&}
@@ -239,17 +244,24 @@ jobs:
             sed -i "s/##PYTORCH_VERSION##/${pt_version}/g" pod.yml
             sed -i "s|##GIT_BRANCH##|$BASE_BRANCH|g" pod.yml
             sed -i "s|##CMD##|$safe_cmd|g" pod.yml
+            sed -i "s|##LOG_REDIRECTION##|$LOG_REDIRECTION|g" pod.yml
             echo "Pod Template Created"
         - name: Run Test
           run: |
             random_string=$(tr -dc 'a-z0-9' </dev/urandom | head -c 10)
+            pod_name="vllm-fork-${{github.event.issue.number}}-${random_string}"
+            set +e
             hlctl create containers \
                 --file=pod.yml \
                 --flavor=${{ matrix.tests.flavor}} \
-                --name="vllm-fork-${{github.event.issue.number}}-${random_string}" \
+                --name="${pod_name}" \
                 --namespace="framework" \
                 --retry \
                 --shm=10240
+            test_status=$?
+            set -e
+            echo "Logs are available at https://logs-browser.k8s-infra.habana-labs.com/files/${pod_name}-tfjob"
+            exit $test_status
         - name: Create Commit Status(Failure)
           uses: actions/github-script@v7
           if: failure()

@@ -26,6 +26,14 @@ stages:
       - name: v1_gsm8k_small_g2_tp2
         flavor: g2.s
         command: export PT_HPU_LAZY_MODE=0 && export VLLM_T_COMPILE_FULLGRAPH=True && export VLLM_USE_V1=1 && export VLLM_CONTIGUOUS_PA=false && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2
+  - name: test_gsm8k_small_models_apc
+    steps:
+      - name: gsm8k_small_g3_tp1_apc
+        flavor: g3
+        command: export PT_HPU_LAZY_MODE=0 && export VLLM_CONTIGUOUS_PA=false && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
+      - name: gsm8k_small_g2_tp1_apc
+        flavor: g2
+        command: export PT_HPU_LAZY_MODE=0 && export VLLM_CONTIGUOUS_PA=false && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
   - name: test_gsm8k_large_models
     steps:
       - name: v0_gsm8k_large_g3_tp2
@@ -124,24 +132,24 @@ stages:
         flavor: g3
         command: >
           cd .jenkins/vision &&
-          VLLM_T_COMPILE_FULLGRAPH=True PT_HPU_LAZY_MODE=0 
+          VLLM_T_COMPILE_FULLGRAPH=False PT_HPU_LAZY_MODE=0 
           bash run-tests.sh -c configs/models-small.txt -t 1
       - name: multimodal_small_g3_tp2
         flavor: g3.s
         command: >
           cd .jenkins/vision && 
-          VLLM_T_COMPILE_FULLGRAPH=True PT_HPU_LAZY_MODE=0 
+          VLLM_T_COMPILE_FULLGRAPH=False PT_HPU_LAZY_MODE=0 
           bash run-tests.sh -c configs/models-small.txt -t 2
       - name: multimodal_small_g3_tp1_mss
         flavor: g3
         command: >
-          cd .jenkins/vision && VLLM_T_COMPILE_FULLGRAPH=True PT_HPU_LAZY_MODE=0 
+          cd .jenkins/vision && VLLM_T_COMPILE_FULLGRAPH=False PT_HPU_LAZY_MODE=0 
           bash run-tests.sh -c configs/models-mss.txt -t 1
       - name: multimodal_small_g3_tp2_mss
         flavor: g3.s
         command: >
           cd .jenkins/vision && 
-          VLLM_T_COMPILE_FULLGRAPH=True PT_HPU_LAZY_MODE=0 
+          VLLM_T_COMPILE_FULLGRAPH=False PT_HPU_LAZY_MODE=0 
           bash run-tests.sh -c configs/models-mss.txt -t 2
   - name: tests_int4_quantization
     steps:

@@ -1,4 +1,4 @@
-FROM vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
 
 COPY ./ /workspace/vllm
 

@@ -0,0 +1,101 @@
+ARG BASE_IMAGE=vault.habana.ai/gaudi-docker/1.21.0/rhel9.4/habanalabs/pytorch-installer-2.6.0:latest
+FROM ${BASE_IMAGE} as habana-base
+
+USER root
+
+ENV VLLM_TARGET_DEVICE="hpu"
+
+RUN dnf -y update --best --allowerasing --skip-broken && dnf clean all
+
+WORKDIR /workspace
+
+## Python Installer #################################################################
+FROM habana-base as python-install
+
+ARG PYTHON_VERSION=3.11
+
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+RUN dnf install -y --setopt=install_weak_deps=0 --nodocs \
+    python${PYTHON_VERSION}-wheel && \
+    python${PYTHON_VERSION} -m venv $VIRTUAL_ENV --system-site-packages && pip install --no-cache -U pip wheel && dnf clean all
+
+## Python Habana base #################################################################
+FROM python-install as python-habana-base
+
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# install Habana Software and common dependencies
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
+    --mount=type=bind,source=requirements-hpu.txt,target=requirements-hpu.txt \
+    pip install \
+    -r requirements-hpu.txt
+
+## Builder #####################################################################
+FROM python-habana-base AS build
+
+# install build dependencies
+
+# copy input files
+COPY csrc csrc
+COPY setup.py setup.py
+COPY cmake cmake
+COPY CMakeLists.txt CMakeLists.txt
+COPY requirements-common.txt requirements-common.txt
+COPY requirements-hpu.txt requirements-hpu.txt
+COPY pyproject.toml pyproject.toml
+
+# max jobs used by Ninja to build extensions
+ARG max_jobs=2
+ENV MAX_JOBS=${max_jobs}
+# # make sure punica kernels are built (for LoRA)
+# HPU currently doesn't support LoRA
+# ENV VLLM_INSTALL_PUNICA_KERNELS=1
+
+# Copy the entire directory before building wheel
+COPY vllm vllm
+
+ENV CCACHE_DIR=/root/.cache/ccache
+RUN --mount=type=cache,target=/root/.cache/ccache \
+    --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,src=.git,target=/workspace/.git \
+    env CFLAGS="-march=haswell" \
+    CXXFLAGS="$CFLAGS $CXXFLAGS" \
+    CMAKE_BUILD_TYPE=Release \
+    python3 setup.py bdist_wheel --dist-dir=dist
+
+## Release #####################################################################
+FROM python-install AS vllm-openai
+
+WORKDIR /workspace
+
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH=$VIRTUAL_ENV/bin/:$PATH
+
+# Triton needs a CC compiler
+RUN dnf install -y --setopt=install_weak_deps=0 --nodocs gcc \
+    && dnf clean all
+
+# install vllm wheel first, so that torch etc will be installed
+RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
+    --mount=type=cache,target=/root/.cache/pip \
+    pip install $(echo dist/*.whl)'[tensorizer]' --verbose
+
+ENV HF_HUB_OFFLINE=1 \
+    PORT=8000 \
+    HOME=/home/vllm \
+    VLLM_USAGE_SOURCE=production-docker-image
+
+# setup non-root user for OpenShift
+# In OpenShift the user ID is randomly assigned, for compatibility we also
+# set up a non-root user here.
+RUN umask 002 \
+    && useradd --uid 2000 --gid 0 vllm \
+    && chmod g+rwx $HOME /usr/src /workspace
+
+COPY LICENSE /licenses/vllm.md
+
+USER 2000
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]