From b74f88fbf7fa632c72f36d6b378761fe64343e11 Mon Sep 17 00:00:00 2001 From: wangyu <410167048@qq.com> Date: Thu, 9 Apr 2026 15:07:28 +0800 Subject: [PATCH] [CI] Update environment variables for test configurations in Buildkite YAML files Signed-off-by: wangyu <410167048@qq.com> --- .buildkite/test-merge.yml | 13 +++++-------- .buildkite/test-nightly-diffusion.yml | 14 +++++--------- .buildkite/test-nightly.yml | 11 +++++------ .buildkite/test-ready.yml | 17 +++++------------ .../models/bagel/test_trajectory_recording.py | 8 ++++---- vllm_omni/diffusion/cache/teacache/backend.py | 9 --------- .../cache/teacache/coefficient_estimator.py | 8 -------- .../models/bagel/bagel_transformer.py | 18 +++++++++--------- .../diffusion/models/bagel/pipeline_bagel.py | 8 ++++---- 9 files changed, 37 insertions(+), 69 deletions(-) diff --git a/.buildkite/test-merge.yml b/.buildkite/test-merge.yml index f98ff171407..7355e2b4c7c 100644 --- a/.buildkite/test-merge.yml +++ b/.buildkite/test-merge.yml @@ -1,3 +1,8 @@ +env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + HF_HUB_DOWNLOAD_TIMEOUT: 300 + HF_HUB_ETAG_TIMEOUT: 60 + steps: - label: "Simple Unit Test" depends_on: upload-merge-pipeline @@ -169,7 +174,6 @@ steps: commands: - | timeout 15m bash -c ' - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/engine/test_async_omni_engine_abort.py ' agents: @@ -191,7 +195,6 @@ steps: depends_on: upload-merge-pipeline commands: - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py tests/e2e/online_serving/test_qwen2_5_omni.py -m "advanced_model" --run-level "advanced_model" agents: queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU @@ -212,7 +215,6 @@ steps: - | timeout 20m bash -c ' export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" pytest -s -v tests/e2e/online_serving/test_qwen3_tts_customvoice.py tests/e2e/offline_inference/test_qwen3_tts_customvoice.py -m "advanced_model" --run-level "advanced_model" ' @@ -235,7 +237,6 @@ steps: - | timeout 20m bash -c ' export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" pytest -s -v tests/e2e/online_serving/test_qwen3_tts_base.py tests/e2e/offline_inference/test_qwen3_tts_base.py -m "advanced_model" --run-level "advanced_model" ' @@ -256,7 +257,6 @@ steps: timeout_in_minutes: 30 depends_on: upload-merge-pipeline commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_TEST_CLEAN_GPU_MEMORY="1" - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py tests/e2e/online_serving/test_mimo_audio.py -m "advanced_model" --run-level "advanced_model" agents: @@ -297,7 +297,6 @@ steps: timeout_in_minutes: 20 depends_on: upload-merge-pipeline commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py agents: queue: "mithril-h100-pool" @@ -340,7 +339,6 @@ steps: - | timeout 55m bash -c ' set -e - export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_TEST_CLEAN_GPU_MEMORY=1 export VLLM_IMAGE_FETCH_TIMEOUT=60 pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "advanced_model" --run-level "advanced_model" -k "shared_memory" @@ -387,7 +385,6 @@ steps: - | timeout 20m bash -c ' export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/e2e/online_serving/test_voxtral_tts.py tests/e2e/offline_inference/test_voxtral_tts.py -m "advanced_model" --run-level "advanced_model" ' agents: diff --git a/.buildkite/test-nightly-diffusion.yml b/.buildkite/test-nightly-diffusion.yml index 73bf4551136..742624e8b51 100644 --- a/.buildkite/test-nightly-diffusion.yml +++ b/.buildkite/test-nightly-diffusion.yml @@ -2,6 +2,11 @@ # buildkite-agent pipeline upload .buildkite/test-nightly-diffusion.yml # from test-nightly.yml (step key: nightly-diffusion-model-test). Top-level groups are # foldable in the Buildkite UI (Other / Wan / Qwen-Image). +env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + HF_HUB_DOWNLOAD_TIMEOUT: 300 + HF_HUB_ETAG_TIMEOUT: 60 + steps: - group: ":card_index_dividers: Other Model Test" key: nightly-other-model-test-group @@ -10,7 +15,6 @@ steps: timeout_in_minutes: 120 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not test_qwen_image" -m "advanced_model and diffusion and H100" --run-level "advanced_model" agents: queue: "mithril-h100-pool" @@ -50,7 +54,6 @@ steps: timeout_in_minutes: 60 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and diffusion and L4" --run-level "advanced_model" agents: queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU @@ -70,7 +73,6 @@ steps: timeout_in_minutes: 60 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_TEST_CLEAN_GPU_MEMORY="1" - pytest -s -v tests/examples/online_serving/test_text_to_image.py tests/examples/offline_inference/test_text_to_image.py -m "advanced_model and example and H100" --run-level "advanced_model" agents: @@ -114,7 +116,6 @@ steps: timeout_in_minutes: 90 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/online_serving/test_wan22_expansion.py tests/e2e/online_serving/test_wan_2_1_vace_expansion.py -m "advanced_model" --run-level "advanced_model" agents: queue: "mithril-h100-pool" @@ -155,7 +156,6 @@ steps: timeout_in_minutes: 180 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py --run-level advanced_model agents: queue: "mithril-h100-pool" @@ -198,7 +198,6 @@ steps: timeout_in_minutes: 120 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/online_serving/test_qwen_image*_expansion.py -m "advanced_model and diffusion and H100" --run-level "advanced_model" agents: queue: "mithril-h100-pool" @@ -239,7 +238,6 @@ steps: timeout_in_minutes: 60 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/accuracy/test_gebench_h100_smoke.py --run-level advanced_model --gebench-model Qwen/Qwen-Image-2512 --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gebench-port 8093 --accuracy-workers 1 - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gebench_qwen-image-2512/summary*.json" agents: @@ -281,7 +279,6 @@ steps: timeout_in_minutes: 60 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/accuracy/test_gedit_bench_h100_smoke.py --run-level advanced_model --gedit-model Qwen/Qwen-Image-Edit --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gedit-port 8093 --gedit-samples-per-group 20 --accuracy-workers 1 - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_vie_score_*.csv" - buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_summary_*.json" @@ -326,7 +323,6 @@ steps: timeout_in_minutes: 180 if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results - export CACHE_DIT_VERSION=1.3.0 - pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json diff --git a/.buildkite/test-nightly.yml b/.buildkite/test-nightly.yml index 62f6e4dceb6..0d1c8eaccf4 100644 --- a/.buildkite/test-nightly.yml +++ b/.buildkite/test-nightly.yml @@ -1,3 +1,8 @@ +env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + HF_HUB_DOWNLOAD_TIMEOUT: 300 + HF_HUB_ETAG_TIMEOUT: 60 + steps: # Group: collapses under one heading in the Buildkite UI; child steps still run in parallel. - group: ":card_index_dividers: Omni Model Test" @@ -8,7 +13,6 @@ steps: depends_on: upload-nightly-pipeline if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and H100 and omni" --run-level "advanced_model" agents: queue: "mithril-h100-pool" @@ -49,7 +53,6 @@ steps: depends_on: upload-nightly-pipeline if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and L4 and omni" --run-level "advanced_model" agents: @@ -71,7 +74,6 @@ steps: depends_on: upload-nightly-pipeline if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - pytest -s -v tests/examples/ -m "advanced_model and omni and L4" --run-level "advanced_model" agents: @@ -93,7 +95,6 @@ steps: depends_on: upload-nightly-pipeline if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v tests/examples/ -m "advanced_model and omni and H100" --run-level "advanced_model" agents: queue: "mithril-h100-pool" @@ -135,7 +136,6 @@ steps: depends_on: upload-nightly-pipeline if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export BENCHMARK_DIR=tests/dfx/perf/results - export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" - pytest -s -v tests/dfx/perf/scripts/run_benchmark.py @@ -193,7 +193,6 @@ steps: depends_on: upload-nightly-pipeline if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test" commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - python tools/nightly/buildkite_testcase_statistics.py -o tests/dfx/perf/results/buildkite_testcase_statistics.html - buildkite-agent artifact upload "tests/dfx/perf/results/*.html" agents: diff --git a/.buildkite/test-ready.yml b/.buildkite/test-ready.yml index 6f3ad6504e8..2f1f05463af 100644 --- a/.buildkite/test-ready.yml +++ b/.buildkite/test-ready.yml @@ -1,3 +1,8 @@ +env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + HF_HUB_DOWNLOAD_TIMEOUT: 300 + HF_HUB_ETAG_TIMEOUT: 60 + steps: - label: "Simple Unit Test" depends_on: upload-ready-pipeline @@ -173,7 +178,6 @@ steps: commands: - | timeout 15m bash -c ' - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/engine/test_async_omni_engine_abort.py ' agents: @@ -197,7 +201,6 @@ steps: - | timeout 17m bash -c ' export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/e2e/online_serving/test_qwen2_5_omni.py -m "core_model" --run-level "core_model" ' agents: @@ -218,7 +221,6 @@ steps: commands: - | timeout 20m bash -c ' - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "core_model" --run-level "core_model" ' agents: @@ -256,7 +258,6 @@ steps: - | timeout 30m bash -c ' export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/e2e/online_serving/test_mimo_audio.py -m "core_model" --run-level "core_model" ' agents: @@ -299,7 +300,6 @@ steps: - | timeout 20m bash -c ' export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1" pytest -s -v tests/e2e/online_serving/test_qwen3_tts_customvoice.py -m "core_model" --run-level "core_model" ' @@ -324,7 +324,6 @@ steps: - | timeout 20m bash -c ' export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/e2e/online_serving/test_omnivoice.py -m "core_model" --run-level "core_model" ' agents: @@ -347,7 +346,6 @@ steps: - | timeout 20m bash -c ' export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/e2e/online_serving/test_voxtral_tts.py -m "core_model" --run-level "core_model" ' agents: @@ -384,7 +382,6 @@ steps: # commands: # - | # timeout 20m bash -c ' - # export VLLM_WORKER_MULTIPROC_METHOD=spawn # pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py # ' # agents: @@ -421,7 +418,6 @@ steps: commands: - | timeout 30m bash -c ' - export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "core_model" --run-level "core_model" ' @@ -464,7 +460,6 @@ steps: commands: - | timeout 30m bash -c ' - export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "core_model" --run-level "core_model" ' @@ -507,7 +502,6 @@ steps: commands: - | timeout 40m bash -c ' - export VLLM_WORKER_MULTIPROC_METHOD=spawn export VLLM_TEST_CLEAN_GPU_MEMORY=1 export VLLM_IMAGE_FETCH_TIMEOUT=60 pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "core_model" --run-level "core_model" @@ -552,7 +546,6 @@ steps: commands: - | timeout 20m bash -c ' - export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -s -v tests/e2e/online_serving/test_cosyvoice3_tts.py -m "core_model" --run-level "core_model" ' agents: diff --git a/tests/diffusion/models/bagel/test_trajectory_recording.py b/tests/diffusion/models/bagel/test_trajectory_recording.py index 7518388d28e..80b3f9d9ba7 100644 --- a/tests/diffusion/models/bagel/test_trajectory_recording.py +++ b/tests/diffusion/models/bagel/test_trajectory_recording.py @@ -24,15 +24,15 @@ def _make_mock_bagel(): - """Create a mock Bagel with _forward_flow returning constant velocity.""" + """Create a mock Bagel with forward returning constant velocity.""" mock = MagicMock(spec=Bagel) mock._sp_size = 1 - # _forward_flow returns a small constant velocity so x_t changes each step - def fake_forward_flow(self, x_t, **kwargs): + # forward returns a small constant velocity so x_t changes each step + def fake_forward(self, x_t, **kwargs): return torch.ones_like(x_t) * 0.1 - mock._forward_flow = types.MethodType(fake_forward_flow, mock) + mock.forward = types.MethodType(fake_forward, mock) # _merge_naive_caches is called in the batched CFG path mock._merge_naive_caches = types.MethodType(lambda self, caches: NaiveCache(1), mock) diff --git a/vllm_omni/diffusion/cache/teacache/backend.py b/vllm_omni/diffusion/cache/teacache/backend.py index a5087fe0c24..772dec78913 100644 --- a/vllm_omni/diffusion/cache/teacache/backend.py +++ b/vllm_omni/diffusion/cache/teacache/backend.py @@ -48,16 +48,7 @@ def enable_bagel_teacache(pipeline: Any, config: DiffusionCacheConfig) -> None: coefficients=config.coefficients, ) transformer = pipeline.bagel - original_forward_flow = transformer._forward_flow - - import types - - def forward_alias(self, *args, **kwargs): - return original_forward_flow(*args, **kwargs) - - transformer.forward = types.MethodType(forward_alias, transformer) apply_teacache_hook(transformer, teacache_config) - transformer._forward_flow = transformer.forward pipeline.transformer = transformer logger.info( diff --git a/vllm_omni/diffusion/cache/teacache/coefficient_estimator.py b/vllm_omni/diffusion/cache/teacache/coefficient_estimator.py index f3a278b2174..5dd80718d11 100644 --- a/vllm_omni/diffusion/cache/teacache/coefficient_estimator.py +++ b/vllm_omni/diffusion/cache/teacache/coefficient_estimator.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import types from typing import Any import numpy as np @@ -74,15 +73,8 @@ def get_transformer(pipeline: Any) -> tuple[Any, str]: @staticmethod def install_hook(transformer: Any, hook: DataCollectionHook) -> None: - original_forward_flow = transformer._forward_flow - - def forward_alias(self, *args, **kwargs): - return original_forward_flow(*args, **kwargs) - - transformer.forward = types.MethodType(forward_alias, transformer) registry = HookRegistry.get_or_create(transformer) registry.register_hook(hook._HOOK_NAME, hook) - transformer._forward_flow = transformer.forward class StableAudioAdapter: diff --git a/vllm_omni/diffusion/models/bagel/bagel_transformer.py b/vllm_omni/diffusion/models/bagel/bagel_transformer.py index a04ded37653..f8480775687 100644 --- a/vllm_omni/diffusion/models/bagel/bagel_transformer.py +++ b/vllm_omni/diffusion/models/bagel/bagel_transformer.py @@ -1734,7 +1734,7 @@ def generate_image( packed_seqlens=packed_seqlens, ) - v_t = self._forward_flow_single_branch( + v_t = self.forward_single_branch( **common, packed_indexes=packed_indexes, packed_position_ids=packed_position_ids, @@ -1744,7 +1744,7 @@ def generate_image( ) if cfg_text_scale_ > 1.0: - cfg_text_v_t = self._forward_flow_single_branch( + cfg_text_v_t = self.forward_single_branch( **common, packed_indexes=cfg_text_packed_query_indexes, packed_position_ids=cfg_text_packed_position_ids, @@ -1754,7 +1754,7 @@ def generate_image( ) cfg_img_v_t = None if cfg_img_scale_ > 1.0: - cfg_img_v_t = self._forward_flow_single_branch( + cfg_img_v_t = self.forward_single_branch( **common, packed_indexes=cfg_img_packed_query_indexes, packed_position_ids=cfg_img_packed_position_ids, @@ -1790,7 +1790,7 @@ def generate_image( if use_sp: for i, t in enumerate(timesteps): timestep = torch.tensor([t] * x_t.shape[0], device=x_t.device) - v_t = self._forward_flow_single_branch( + v_t = self.forward_single_branch( x_t=x_t, timestep=timestep, packed_vae_token_indexes=packed_vae_token_indexes, @@ -1883,7 +1883,7 @@ def generate_image( else: cfg_text_scale_ = 1.0 cfg_img_scale_ = 1.0 - v_t = self._forward_flow( + v_t = self.forward( x_t=x_t, timestep=timestep, packed_vae_token_indexes=packed_vae_token_indexes, @@ -2019,7 +2019,7 @@ def _generate_image_parallel( if use_cfg_this_step: # CFG interval: each rank computes its own branch - local_v_t = self._forward_flow_single_branch( + local_v_t = self.forward_single_branch( x_t=x_t, timestep=timestep, packed_vae_token_indexes=packed_vae_token_indexes, @@ -2046,7 +2046,7 @@ def _generate_image_parallel( ) else: # Outside CFG interval: all ranks compute with gen inputs, no comm - v_t = self._forward_flow_single_branch( + v_t = self.forward_single_branch( x_t=x_t, timestep=timestep, packed_vae_token_indexes=packed_vae_token_indexes, @@ -2128,7 +2128,7 @@ def _combine_cfg( return v_t - def _forward_flow_single_branch( + def forward_single_branch( self, x_t: torch.Tensor, timestep: torch.LongTensor, @@ -2258,7 +2258,7 @@ def _forward_flow_single_branch( v_t = v_t[packed_vae_token_indexes] return v_t - def _forward_flow( + def forward( self, x_t: torch.Tensor, timestep: torch.LongTensor, diff --git a/vllm_omni/diffusion/models/bagel/pipeline_bagel.py b/vllm_omni/diffusion/models/bagel/pipeline_bagel.py index 2c72d98908d..13d0cc2093b 100644 --- a/vllm_omni/diffusion/models/bagel/pipeline_bagel.py +++ b/vllm_omni/diffusion/models/bagel/pipeline_bagel.py @@ -159,8 +159,8 @@ def __init__(self, *, od_config: OmniDiffusionConfig, prefix: str = ""): self.od_config = od_config self.device = get_local_device() - self._scheduler: object | None = None - self._scheduler_kwargs: dict = {} + self.scheduler: object | None = None + self.scheduler_kwargs: dict = {} model = od_config.model local_files_only = os.path.exists(model) @@ -654,8 +654,8 @@ def vae_transforms(img): cfg_img_key_values_lens=generation_input_cfg_img["cfg_key_values_lens"], cfg_img_packed_key_value_indexes=generation_input_cfg_img["cfg_packed_key_value_indexes"], return_trajectory_latents=req.sampling_params.return_trajectory_latents, - scheduler=self._scheduler, - scheduler_kwargs=self._scheduler_kwargs, + scheduler=self.scheduler, + scheduler_kwargs=self.scheduler_kwargs, ) img = self._decode_image_from_latent(self.bagel, self.vae, latents[0], image_shape)