From b84b39556b8a68b6c46cff1f8cdc782ba3616ad6 Mon Sep 17 00:00:00 2001 From: Lancer Date: Wed, 18 Feb 2026 20:22:44 +0800 Subject: [PATCH 1/2] [Feat] cache-dit for GLM-Image Signed-off-by: Lancer --- .../diffusion/cache/cache_dit_backend.py | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/vllm_omni/diffusion/cache/cache_dit_backend.py b/vllm_omni/diffusion/cache/cache_dit_backend.py index d7165eda26..f4c0542649 100644 --- a/vllm_omni/diffusion/cache/cache_dit_backend.py +++ b/vllm_omni/diffusion/cache/cache_dit_backend.py @@ -1058,6 +1058,82 @@ def refresh_cache_context(pipeline: Any, num_inference_steps: int, verbose: bool return refresh_cache_context +def enable_cache_for_glm_image(pipeline: Any, cache_config: Any) -> Callable[[int], None]: + """Enable cache-dit for GlmImage pipeline. + + Args: + pipeline: The GlmImage pipeline instance. + cache_config: DiffusionCacheConfig instance with cache configuration. + Returns: + A refresh function that can be called with a new ``num_inference_steps`` + to update the cache context for the pipeline. + """ + # Build DBCacheConfig for transformer + db_cache_config = _build_db_cache_config(cache_config) + + calibrator = None + if cache_config.enable_taylorseer: + taylorseer_order = cache_config.taylorseer_order + calibrator = TaylorSeerCalibratorConfig(taylorseer_order=taylorseer_order) + logger.info(f"TaylorSeer enabled with order={taylorseer_order}") + + # Build ParamsModifier for transformer + modifier = ParamsModifier( + cache_config=db_cache_config, + calibrator_config=calibrator, + ) + + logger.info( + f"Enabling cache-dit on GlmImage transformer with BlockAdapter: " + f"Fn={db_cache_config.Fn_compute_blocks}, " + f"Bn={db_cache_config.Bn_compute_blocks}, " + f"W={db_cache_config.max_warmup_steps}, " + ) + + # Enable cache-dit using BlockAdapter for transformer + # Note: We don't use patch_functor here because it's designed for diffusers' GlmImage, + # and our vllm-omni implementation has a different forward signature. + # We use ForwardPattern.Pattern_0 because our block returns (hidden_states, encoder_hidden_states) + cache_dit.enable_cache( + ( + BlockAdapter( + transformer=pipeline.transformer, + blocks=pipeline.transformer.transformer_blocks, + forward_pattern=ForwardPattern.Pattern_0, + params_modifiers=[modifier], + patch_functor=None, + has_separate_cfg=True, + ) + ), + cache_config=db_cache_config, + ) + + def refresh_cache_context(pipeline: Any, num_inference_steps: int, verbose: bool = True) -> None: + """Refresh cache context for the transformer with new num_inference_steps. + + Args: + pipeline: The GlmImage pipeline instance. + num_inference_steps: New number of inference steps. + """ + if cache_config.scm_steps_mask_policy is None: + cache_dit.refresh_context(pipeline.transformer, num_inference_steps=num_inference_steps, verbose=verbose) + else: + cache_dit.refresh_context( + pipeline.transformer, + cache_config=DBCacheConfig().reset( + num_inference_steps=num_inference_steps, + steps_computation_mask=cache_dit.steps_mask( + mask_policy=cache_config.scm_steps_mask_policy, + total_steps=num_inference_steps, + ), + steps_computation_policy=cache_config.scm_steps_policy, + ), + verbose=verbose, + ) + + return refresh_cache_context + + # Register custom cache-dit enablers after function definitions CUSTOM_DIT_ENABLERS.update( { @@ -1073,6 +1149,7 @@ def refresh_cache_context(pipeline: Any, num_inference_steps: int, verbose: bool "LTX2ImageToVideoPipeline": enable_cache_for_ltx2, "BagelPipeline": enable_cache_for_bagel, "Flux2Pipeline": enable_cache_for_flux2, + "GlmImagePipeline": enable_cache_for_glm_image, } ) From ba0d754ebae2f5b1a030b47b9a650c4fb5edb023 Mon Sep 17 00:00:00 2001 From: Lancer Date: Fri, 17 Apr 2026 11:13:32 +0800 Subject: [PATCH 2/2] upd Signed-off-by: Lancer --- .../diffusion/cache/cache_dit_backend.py | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/vllm_omni/diffusion/cache/cache_dit_backend.py b/vllm_omni/diffusion/cache/cache_dit_backend.py index 3ad0679fb7..db51d33b76 100644 --- a/vllm_omni/diffusion/cache/cache_dit_backend.py +++ b/vllm_omni/diffusion/cache/cache_dit_backend.py @@ -1168,36 +1168,6 @@ def refresh_cache_context(pipeline: Any, num_inference_steps: int, verbose: bool return refresh_cache_context -def enable_cache_for_glm_image(pipeline: Any, cache_config: Any) -> Callable[[int], None]: - """Enable cache-dit for GLM-Image pipeline. - - GLM-Image processes prompt and image by calling the transformer before the - denoising loop. When an input image is provided (editing mode), the cache must - be force-refreshed after the preprocessing step so stale hidden states are - discarded. Set force_refresh_step_hint = 1 for editing, None for text-to-image. - """ - db_cache_config = _build_db_cache_config(cache_config) - - calibrator_config = None - if cache_config.enable_taylorseer: - calibrator_config = TaylorSeerCalibratorConfig(taylorseer_order=cache_config.taylorseer_order) - logger.info(f"TaylorSeer enabled with order={cache_config.taylorseer_order}") - - logger.info( - f"Enabling cache-dit on GLM-Image transformer: " - f"Fn={db_cache_config.Fn_compute_blocks}, " - f"Bn={db_cache_config.Bn_compute_blocks}, " - f"W={db_cache_config.max_warmup_steps}, " - f"force_refresh_step_hint={db_cache_config.force_refresh_step_hint}, " - ) - - cache_dit.enable_cache( - pipeline.transformer, - cache_config=db_cache_config, - calibrator_config=calibrator_config, - ) - - def enable_cache_for_flux2(pipeline: Any, cache_config: Any) -> Callable[[int], None]: """Enable cache-dit for Flux.2-dev pipeline. @@ -1368,7 +1338,6 @@ def refresh_cache_context(pipeline: Any, num_inference_steps: int, verbose: bool "BagelPipeline": enable_cache_for_bagel, "GlmImagePipeline": enable_cache_for_glm_image, "Flux2Pipeline": enable_cache_for_flux2, - "GlmImagePipeline": enable_cache_for_glm_image, } )