Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions .buildkite/test-nightly.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
steps:
- label: "Omni Model Test with H100"
- label: ":full_moon: Omni Model Test with H100"
timeout_in_minutes: 90
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1"
Expand Down Expand Up @@ -41,7 +41,7 @@ steps:
path: /mnt/hf-cache
type: DirectoryOrCreate

- label: "Omni Model Test"
- label: ":full_moon: Omni Model Test"
timeout_in_minutes: 60
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1"
Expand All @@ -56,18 +56,22 @@ steps:
always-pull: true
shm-size: "8gb"
propagate-environment: true
shm-size: "8gb"
environment:
- "HF_HOME=/fsx/hf_cache"
volumes:
- "/fsx/hf_cache:/fsx/hf_cache"

- label: "Omni Model Perf Test"
timeout_in_minutes: 120
- label: ":full_moon: Omni Model Perf Test with H100"
key: nightly-performance
timeout_in_minutes: 180
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export BENCHMARK_DIR=tests
- pytest -s -v tests/perf/scripts/run_benchmark.py
- buildkite-agent artifact upload "tests/*.json"
agents:
queue: "mithril-h100-pool"
plugins:
Expand Down Expand Up @@ -96,3 +100,18 @@ steps:
hostPath:
path: /mnt/hf-cache
type: DirectoryOrCreate

- label: ":email: Nightly Perf Collection & Email"
key: nightly-perf-distribution
depends_on: nightly-performance
if: build.env("NIGHTLY") == "1"
commands:
- pip install openpyxl
- export DEFAULT_INPUT_DIR=tests
- export DEFAULT_OUTPUT_DIR=tests
- buildkite-agent artifact download "tests/*.json" . --step nightly-performance
- python tools/nightly/generate_nightly_perf_excel.py
- python tools/nightly/send_nightly_perf_email.py
- buildkite-agent artifact upload "tests/*.xlsx"
agents:
queue: "cpu_queue_premerge"
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ dev = [
"imageio[ffmpeg]>=0.6.0",
"opencv-python>=4.12.0.88",
"mooncake-transfer-engine==0.3.8.post1",
"av" # for ComfyUI tests
"av", # for ComfyUI tests
"openpyxl>=3.0.0", # for nightly CI
]

docs = [
Expand Down
17 changes: 9 additions & 8 deletions tests/perf/scripts/run_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
import os

os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "0"

import json
import os
import subprocess
import threading
from datetime import datetime
Expand All @@ -14,6 +10,9 @@

from tests.conftest import OmniServer, modify_stage_config

os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "0"


def load_configs(config_path: str) -> list[dict[str, Any]]:
try:
Expand Down Expand Up @@ -123,6 +122,8 @@ def run_benchmark(args: list, test_name: str, flow, dataset_name: str, num_promp
"--endpoint",
"/v1/chat/completions",
"--save-result",
"--result-dir",
os.environ.get("BENCHMARK_DIR", "tests"),
"--result-filename",
result_filename,
]
Expand All @@ -137,9 +138,9 @@ def run_benchmark(args: list, test_name: str, flow, dataset_name: str, num_promp
for line in iter(process.stderr.readline, ""):
print(line, end=" ")

if "--result-dir" in args:
index = args.index("--result-dir")
result_dir = args[index + 1]
if "--result-dir" in command:
index = command.index("--result-dir")
result_dir = command[index + 1]
else:
result_dir = "./"

Expand Down
Loading