diff --git a/.github/workflows/add_label_automerge.yml b/.github/workflows/add_label_automerge.yml
deleted file mode 100644
index cd53b764c720..000000000000
--- a/.github/workflows/add_label_automerge.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Add label on auto-merge enabled
-on:
-    pull_request_target:
-        types:
-            - auto_merge_enabled
-jobs:
-    add-label-on-auto-merge:
-        runs-on: ubuntu-latest
-        steps:
-            -   name: Add label
-                uses: actions/github-script@v5
-                with:
-                    script: |
-                        github.rest.issues.addLabels({
-                            owner: context.repo.owner,
-                            repo: context.repo.repo,
-                            issue_number: context.issue.number,
-                            labels: ['ready']
-                        })
-                env:
-                    GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/add_label_ready_comment.yml b/.github/workflows/add_label_ready_comment.yml
deleted file mode 100644
index 729c1452af03..000000000000
--- a/.github/workflows/add_label_ready_comment.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Add Ready Label on Ready Comment
-
-on:
-  issue_comment:
-    types: [created]
-
-jobs:
-  add-ready-label:
-    runs-on: ubuntu-latest
-    if: github.event.issue.pull_request && contains(github.event.comment.body, '/ready')
-    steps:
-        -   name: Add label
-            uses: actions/github-script@v5
-            with:
-                script: |
-                    github.rest.issues.addLabels({
-                        owner: context.repo.owner,
-                        repo: context.repo.repo,
-                        issue_number: context.issue.number,
-                        labels: ['ready']
-                    })
-            env:
-                GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/reminder_comment.yml b/.github/workflows/reminder_comment.yml
deleted file mode 100644
index 390c88bb6530..000000000000
--- a/.github/workflows/reminder_comment.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: PR Reminder Comment Bot
-on:
-  pull_request_target:
-    types: [opened]
-
-jobs:
-  pr_reminder:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Remind to run full CI on PR
-        uses: actions/github-script@v6
-        with:
-          script: |
-            github.rest.issues.createComment({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: context.issue.number,
-              body: '👋 Hi! Thank you for contributing to the vLLM project.\n Just a reminder: PRs would not trigger full CI run by default. Instead, it would only run `fastcheck` CI which consists a small and essential subset of CI tests to quickly catch errors. You can run other CI tests on top of default ones by unblocking the steps in your `fast-check` build on Buildkite UI. \n\nOnce the PR is approved and ready to go, please make sure to run full CI as it is required to merge (or just use auto-merge).\n\n To run full CI, you can do one of these:\n- Comment `/ready` on the PR\n- Add `ready` label to the PR\n- Enable auto-merge.\n\n🚀'
-            })
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/sync-with-upstream.yml b/.github/workflows/sync-with-upstream.yml
new file mode 100644
index 000000000000..53751552f4d2
--- /dev/null
+++ b/.github/workflows/sync-with-upstream.yml
@@ -0,0 +1,84 @@
+name: "Sync with upstream"
+
+on:
+  schedule:
+    - cron: 20 4 * * *
+
+  workflow_dispatch:
+
+
+env:
+  # repo to fetch changes from
+  UPSTREAM_REPO: vllm-project/vllm
+ # branch to sync
+  BRANCH: main
+
+jobs:
+  upstream-sync:
+    name: Sync with upstream
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      contents: write
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Fetch upstream repo
+        run: |
+          git remote add upstream https://github.com/${UPSTREAM_REPO}
+          git fetch upstream
+
+      - name: Check diff
+        id: diff
+        shell: bash
+        run: |
+          echo 'diff<<EOF' >> $GITHUB_OUTPUT
+          git diff --stat upstream/${BRANCH} | tee -a >(cat >> $GITHUB_OUTPUT)
+          echo 'EOF' >> $GITHUB_OUTPUT
+
+      - name: Create PR
+        if: ${{ steps.diff.outputs.diff != '' }}
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          set -xeu
+
+          git_hash="$(git rev-parse upstream/${BRANCH})"
+          echo "git_hash=$git_hash" >> $GITHUB_OUTPUT
+          git_describe="$(git describe --tags upstream/${BRANCH})"
+          echo "git_describe=$git_describe" >> $GITHUB_OUTPUT
+
+          # echo 'commits<<EOF' >> $GITHUB_OUTPUT
+          # git log --oneline ..upstream/${BRANCH} >> $GITHUB_OUTPUT
+          # echo 'EOF' >> $GITHUB_OUTPUT
+
+          upstream_url="https://github.com/${UPSTREAM_REPO}"
+          upstream_branch="$upstream_url/tree/${BRANCH}"
+
+          title="Sync with upstream@${git_describe}"
+          body="Merge [${UPSTREAM_REPO}]($upstream_url):[${BRANCH}]($upstream_branch)@[${git_describe}](${upstream_url}/commit/$git_hash) into $BRANCH"
+
+          gh repo set-default $GITHUB_REPOSITORY
+          pr_number=$(gh pr list -S "Sync with upstream@" --json number --jq '.[0].number')
+
+          if [[ -z $pr_number ]]; then
+            echo "Creating PR"
+            gh pr create \
+              --head $(echo $UPSTREAM_REPO | sed 's|/|:|g'):${BRANCH} \
+              --base ${BRANCH} \
+              --label code-sync \
+              --title "$title" \
+              --body "$body" \
+              --draft \
+              --no-maintainer-edit
+            exit 0
+          fi
+
+          echo "Updating PR \#${pr_number}"
+          gh pr edit \
+            $pr_number \
+            --body "$body" \
+            --title "$title"
diff --git a/Dockerfile.ubi b/Dockerfile.ubi
new file mode 100644
index 000000000000..2d37e232a465
--- /dev/null
+++ b/Dockerfile.ubi
@@ -0,0 +1,202 @@
+## Global Args #################################################################
+ARG BASE_UBI_IMAGE_TAG=9.4
+ARG PYTHON_VERSION=3.11
+
+ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+
+## Base Layer ##################################################################
+FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
+ARG PYTHON_VERSION
+
+RUN microdnf install -y \
+    python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
+    && microdnf clean all
+
+WORKDIR /workspace
+
+ENV LANG=C.UTF-8 \
+    LC_ALL=C.UTF-8
+
+# Some utils for dev purposes - tar required for kubectl cp
+RUN microdnf install -y \
+        which procps findutils tar vim git\
+    && microdnf clean all
+
+
+## Python Installer ############################################################
+FROM base as python-install
+
+ARG PYTHON_VERSION
+
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+RUN microdnf install -y \
+    python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
+    python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U uv pip wheel && microdnf clean all
+
+
+## CUDA Base ###################################################################
+FROM python-install as cuda-base
+
+RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
+        https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
+
+RUN microdnf install -y \
+        cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
+    microdnf clean all
+
+ENV CUDA_HOME="/usr/local/cuda" \
+    PATH="${CUDA_HOME}/bin:${PATH}" \
+    LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
+
+## Python cuda base #################################################################
+FROM cuda-base AS python-cuda-base
+
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# install cuda and common dependencies
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
+    --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
+    uv pip install \
+        -r requirements-cuda.txt
+
+## Development #################################################################
+FROM python-cuda-base AS dev
+
+# install build and runtime dependencies
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
+    --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
+    --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
+    --mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \
+    --mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \
+    uv pip install \
+        -r requirements-cuda.txt \
+        -r requirements-dev.txt
+
+## Builder #####################################################################
+FROM dev AS build
+
+# install build dependencies
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
+    uv pip install -r requirements-build.txt
+
+# install compiler cache to speed up compilation leveraging local or remote caching
+# git is required for the cutlass kernels
+RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all
+# install build dependencies
+
+# copy input files
+COPY csrc csrc
+COPY setup.py setup.py
+COPY cmake cmake
+COPY CMakeLists.txt CMakeLists.txt
+COPY requirements-common.txt requirements-common.txt
+COPY requirements-cuda.txt requirements-cuda.txt
+COPY pyproject.toml pyproject.toml
+
+ARG TORCH_CUDA_ARCH_LIST
+ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
+
+# max jobs used by Ninja to build extensions
+ARG max_jobs=2
+ENV MAX_JOBS=${max_jobs}
+# number of threads used by nvcc
+ARG nvcc_threads=8
+ENV NVCC_THREADS=$nvcc_threads
+# make sure punica kernels are built (for LoRA)
+ENV VLLM_INSTALL_PUNICA_KERNELS=1
+
+# Make sure the cuda environment is in the PATH
+ENV PATH=/usr/local/cuda/bin:$PATH
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+
+# Copy the entire directory before building wheel
+COPY vllm vllm
+
+ENV CCACHE_DIR=/root/.cache/ccache
+RUN --mount=type=cache,target=/root/.cache/ccache \
+    --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,src=.git,target=/workspace/.git \
+    env CFLAGS="-march=haswell" \
+        CXXFLAGS="$CFLAGS $CXXFLAGS" \
+        CMAKE_BUILD_TYPE=Release \
+        python3 setup.py bdist_wheel --dist-dir=dist
+
+#################### libsodium Build IMAGE ####################
+FROM base as libsodium-builder
+
+RUN microdnf install -y gcc gzip \
+    && microdnf clean all
+
+WORKDIR /usr/src/libsodium
+
+ARG LIBSODIUM_VERSION=1.0.20
+RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \
+    && tar -xzvf libsodium*.tar.gz \
+    && rm -f libsodium*.tar.gz \
+    && mv libsodium*/* ./
+
+RUN ./configure --prefix="/usr/" && make && make check
+
+## Release #####################################################################
+FROM python-install AS vllm-openai
+
+WORKDIR /workspace
+
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH=$VIRTUAL_ENV/bin/:$PATH
+
+# Triton needs a CC compiler
+RUN microdnf install -y gcc \
+    && microdnf clean all
+
+# install vllm wheel first, so that torch etc will be installed
+RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
+    --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install $(echo dist/*.whl)'[tensorizer]' --verbose
+
+# Install libsodium for Tensorizer encryption
+RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
+    cd /usr/src/libsodium \
+    && make install
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp311-cp311-linux_x86_64.whl
+
+ENV HF_HUB_OFFLINE=1 \
+    PORT=8000 \
+    HOME=/home/vllm \
+    VLLM_USAGE_SOURCE=production-docker-image \
+    VLLM_WORKER_MULTIPROC_METHOD=fork
+
+# setup non-root user for OpenShift
+RUN umask 002 \
+    && useradd --uid 2000 --gid 0 vllm \
+    && chmod g+rwx $HOME /usr/src /workspace
+
+COPY LICENSE /licenses/vllm.md
+
+USER 2000
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
+
+
+FROM vllm-openai as vllm-grpc-adapter
+
+USER root
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install vllm-tgis-adapter==0.2.3
+
+ENV GRPC_PORT=8033
+USER 2000
+ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter"]
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 000000000000..09b25dab41c0
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,28 @@
+approvers:
+  - dtrifiro
+  - fialhocoelho
+  - heyselbi
+  - joerunde
+  - maxdebayser
+  - njhill
+  - prashantgupta24
+  - RH-steve-grubb
+  - rpancham
+  - terrytangyuan
+  - vaibhavjainwiz
+  - z103cb
+  - Xaenalt
+reviewers:
+  - dtrifiro
+  - fialhocoelho
+  - heyselbi
+  - joerunde
+  - maxdebayser
+  - njhill
+  - prashantgupta24
+  - RH-steve-grubb
+  - rpancham
+  - terrytangyuan
+  - vaibhavjainwiz
+  - Xaenalt
+  - z103cb
diff --git a/extras/smoke-test.sh b/extras/smoke-test.sh
new file mode 100644
index 000000000000..f03edea4f619
--- /dev/null
+++ b/extras/smoke-test.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+set -uxo pipefail
+
+# we will need to download test models off HF hub
+unset HF_HUB_OFFLINE
+
+export HTTP_PORT=8080
+export GRPC_PORT=8033
+
+
+function wait_for(){
+    trap "" ERR # we don't care about errors in this function
+
+    name=$1
+    shift
+    command=$@
+
+    max_retries=10
+    until $command ; do
+        echo "Waiting for $name to be up (retries_left=$max_retries)..."
+        sleep 30
+        max_retries=$((max_retries-1))
+        if [[ max_retries -le 0 ]]; then
+            echo "Timed out waiting for $name server" >&2
+            exit 1
+        fi
+    done
+}
+
+# stop the server on any errors
+trap 'kill -9 $server_pid && exit 1' ERR
+
+# spin up the OpenAPI server in the background
+python -m vllm.entrypoints.openai.api_server --port $HTTP_PORT &
+server_pid=$!
+server_url="http://localhost:$HTTP_PORT"
+
+wait_for "http server" curl --verbose --connect-timeout 1 --fail-with-body --no-progress-meter "${server_url}/health"
+
+curl -v --no-progress-meter --fail-with-body \
+  "${server_url}/v1/models" | python -m json.tool || \
+
+curl -v --no-progress-meter --fail-with-body \
+  --header "Content-Type: application/json" \
+  --data '{
+    "prompt": "A red fedora symbolizes ",
+    "model": "facebook/opt-125m"
+}' \
+  "${server_url}/v1/completions" | python -m json.tool
+
+echo "OpenAI API success" && kill -9 $server_pid
+
+
+# spin up the grpc server in the background
+python -m vllm_tgis_adapter --grpc-port $GRPC_PORT &
+server_pid=$!
+server_url="localhost:$GRPC_PORT"
+
+# get grpcurl
+curl --no-progress-meter --location --output /tmp/grpcurl.tar.gz \
+  https://github.com/fullstorydev/grpcurl/releases/download/v1.9.1/grpcurl_1.9.1_linux_x86_64.tar.gz
+tar -xf /tmp/grpcurl.tar.gz --directory /tmp
+
+wait_for "grpc_server" grpc_healthcheck # healthcheck is part of vllm_tgis_adapter
+
+/tmp/grpcurl -v \
+    -plaintext \
+    -use-reflection \
+    -d '{ "requests": [{"text": "A red fedora symbolizes "}]}' \
+    "$server_url" \
+    fmaas.GenerationService/Generate
+
+echo "GRPC API success" && kill -9 $server_pid
diff --git a/extras/unit-tests.sh b/extras/unit-tests.sh
new file mode 100644
index 000000000000..08b2388b646e
--- /dev/null
+++ b/extras/unit-tests.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# partially copied from from .buildkite/test-pipeline.yml
+set -e
+
+cd tests || exit 1
+
+# we will need to download test models off HF hub
+unset HF_HUB_OFFLINE
+
+# basic correctness
+pytest -v -s test_regression.py
+pytest -v -s async_engine
+VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
+VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
+VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
+VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
+VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
+
+# core
+pytest -v -s core
+
+# note: distributed tests are disabled
+
+# engine tests
+pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
+# entrypoint
+pytest -v -s entrypoints -m openai
+
+#inputs (note: multimodal tests are skipped)
+pytest -v -s test_inputs.py
+
+#models
+pytest -v -s models -m \"not vlm\"
+
+# misc
+pytest -v -s prefix_caching
+pytest -v -s samplers
+pytest -v -s test_logits_processor.py
+pytest -v -s models -m \"not vlm\"
+pytest -v -s worker
+VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s spec_decode
+# pytest -v -s tensorizer_loader # disabled: requires libsodium
+pytest -v -s metrics
+pytest -v -s quantization