sgl-project · eitanturok · Feb 8, 2026 · Feb 8, 2026 · Feb 9, 2026 · Feb 9, 2026
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: Apache-2.0
+from dataclasses import dataclass
+
+from sglang.multimodal_gen.configs.sample.sampling_params import CacheParams
+
+
+@dataclass
+class MagCacheParams(CacheParams):
+    """
+    MagCache configuration for magnitude-ratio-based caching.
+
+    MagCache accelerates diffusion inference by skipping forward passes when
+    magnitude ratios of consecutive residuals are predictably similar.
+
+    Attributes:
+        threshold: Accumulated error threshold (default 0.06 from paper).
+                   Lower = higher quality but slower. Higher = faster but lower quality.
+        max_skip_steps: Maximum consecutive skips allowed (default 3).
+                        Prevents infinite skipping even if error is low.
+        skip_start_step: Number of denoising steps at the start where skipping is disabled.
+        skip_end_step: Number of denoising steps at the end where skipping is disabled (0 = active until last step).
+    """
+
+    cache_type: str = "magcache"
+    threshold: float = 0.12
+    max_skip_steps: int = 4
+    skip_start_step: int = 10
+    skip_end_step: int = 0
+    mag_ratios: list[float] | None = None
@@ -22,6 +22,8 @@
 logger = init_logger(__name__)
 
 if TYPE_CHECKING:
+    from sglang.multimodal_gen.configs.sample.magcache import MagCacheParams
+    from sglang.multimodal_gen.configs.sample.teacache import TeaCacheParams
     from sglang.multimodal_gen.runtime.server_args import ServerArgs
 
 
@@ -154,8 +156,12 @@ class SamplingParams:
     cfg_normalization: float | bool = 0.0
     boundary_ratio: float | None = None
 
-    # TeaCache parameters
+    # Cache acceleration
     enable_teacache: bool = False
+    teacache_params: "TeaCacheParams | None" = None
+    enable_magcache: bool = False
+    magcache_params: "MagCacheParams | None" = None
+    calibrate_cache: bool = False
 
     # Profiling
     profile: bool = False
@@ -601,6 +607,37 @@ def add_cli_args(parser: Any) -> Any:
             "--enable-teacache",
             action="store_true",
             default=SamplingParams.enable_teacache,
+            help="Enable TeaCache acceleration for diffusion inference.",
+        )
+        parser.add_argument(
+            "--teacache-params",
+            type=json.loads,
+            default=None,
+            help=(
+                'TeaCache params as a JSON object, e.g. \'{"teacache_thresh": 0.08, "coefficients": [1.0, 2.0]}\'. '
+                "Fields map directly to TeaCacheParams dataclass fields."
+            ),
+        )
+        parser.add_argument(
+            "--enable-magcache",
+            action="store_true",
+            default=SamplingParams.enable_magcache,
+            help="Enable MagCache acceleration for diffusion inference.",
+        )
+        parser.add_argument(
+            "--magcache-params",
+            type=json.loads,
+            default=None,
+            help=(
+                'MagCache params as a JSON object, e.g. \'{"threshold": 0.12, "max_skip_steps": 4}\'. '
+                "Fields map directly to MagCacheParams dataclass fields."
+            ),
+        )
+        parser.add_argument(
+            "--calibrate-cache",
+            action="store_true",
+            default=SamplingParams.calibrate_cache,
+            help="Run in calibration mode: collect magnitude ratio statistics instead of skipping steps.",
         )
 
         # profiling

@@ -10,34 +10,42 @@
 class TeaCacheParams(CacheParams):
     cache_type: str = "teacache"
     teacache_thresh: float = 0.0
+    skip_start_step: int = 5
+    skip_end_step: int = 0
     coefficients: list[float] = field(default_factory=list)
 
 
 @dataclass
 class WanTeaCacheParams(CacheParams):
-    # Unfortunately, TeaCache is very different for Wan than other models
+    # Default threshold and coefficients are for Wan T2V 1.3B (use_ret_steps=True).
+    # For other Wan variants, override these values via --teacache-params.
     cache_type: str = "teacache"
-    teacache_thresh: float = 0.0
+    teacache_thresh: float = 0.08
+    skip_start_step: int = 5
+    skip_end_step: int = 0
     use_ret_steps: bool = True
-    ret_steps_coeffs: list[float] = field(default_factory=list)
-    non_ret_steps_coeffs: list[float] = field(default_factory=list)
+    ret_steps_coeffs: list[float] = field(
+        default_factory=lambda: [
+            -5.21862437e04,
+            9.23041404e03,
+            -5.28275948e02,
+            1.36987616e01,
+            -4.99875664e-02,
+        ]
+    )
+    non_ret_steps_coeffs: list[float] = field(
+        default_factory=lambda: [
+            2.39676752e03,
+            -1.31110545e03,
+            2.01331979e02,
+            -8.29855975e00,
+            1.37887774e-01,
+        ]
+    )
 
     @property
     def coefficients(self) -> list[float]:
         if self.use_ret_steps:
             return self.ret_steps_coeffs
         else:
             return self.non_ret_steps_coeffs
-
-    @property
-    def ret_steps(self) -> int:
-        if self.use_ret_steps:
-            return 5 * 2
-        else:
-            return 1 * 2
-
-    def get_cutoff_steps(self, num_inference_steps: int) -> int:
-        if self.use_ret_steps:
-            return num_inference_steps * 2
-        else:
-            return num_inference_steps * 2 - 2
@@ -3,9 +3,115 @@
 # SPDX-License-Identifier: Apache-2.0
 from dataclasses import dataclass, field
 
+from sglang.multimodal_gen.configs.sample.magcache import MagCacheParams
 from sglang.multimodal_gen.configs.sample.sampling_params import SamplingParams
 from sglang.multimodal_gen.configs.sample.teacache import WanTeaCacheParams
 
+# Magnitude ratio arrays from the reference implementation:
+# https://github.com/Zehong-Ma/MagCache/blob/df81cb181776c2c61477c08e1d21f87fda1cd938/MagCache4Wan2.1/magcache_generate.py
+T2V_13B_MAG_RATIOS = [
+    1.0,
+    1.0,
+    1.0124,
+    1.02213,
+    1.00166,
+    1.0041,
+    0.99791,
+    1.00061,
+    0.99682,
+    0.99762,
+    0.99634,
+    0.99685,
+    0.99567,
+    0.99586,
+    0.99416,
+    0.99422,
+    0.99578,
+    0.99575,
+    0.9957,
+    0.99563,
+    0.99511,
+    0.99506,
+    0.99535,
+    0.99531,
+    0.99552,
+    0.99549,
+    0.99541,
+    0.99539,
+    0.9954,
+    0.99536,
+    0.99489,
+    0.99485,
+    0.99518,
+    0.99514,
+    0.99484,
+    0.99478,
+    0.99481,
+    0.99479,
+    0.99415,
+    0.99413,
+    0.99419,
+    0.99416,
+    0.99396,
+    0.99393,
+    0.99388,
+    0.99386,
+    0.99349,
+    0.99349,
+    0.99309,
+    0.99304,
+    0.9927,
+    0.9927,
+    0.99228,
+    0.99226,
+    0.99171,
+    0.9917,
+    0.99137,
+    0.99135,
+    0.99068,
+    0.99063,
+    0.99005,
+    0.99003,
+    0.98944,
+    0.98942,
+    0.98849,
+    0.98849,
+    0.98758,
+    0.98757,
+    0.98644,
+    0.98643,
+    0.98504,
+    0.98503,
+    0.9836,
+    0.98359,
+    0.98202,
+    0.98201,
+    0.97977,
+    0.97978,
+    0.97717,
+    0.97718,
+    0.9741,
+    0.97411,
+    0.97003,
+    0.97002,
+    0.96538,
+    0.96541,
+    0.9593,
+    0.95933,
+    0.95086,
+    0.95089,
+    0.94013,
+    0.94019,
+    0.92402,
+    0.92414,
+    0.90241,
+    0.9026,
+    0.86821,
+    0.86868,
+    0.81838,
+    0.81939,
+]
+
 
 @dataclass
 class WanT2V_1_3B_SamplingParams(SamplingParams):
@@ -50,6 +156,16 @@ class WanT2V_1_3B_SamplingParams(SamplingParams):
         )
     )
 
+    magcache_params: MagCacheParams = field(
+        default_factory=lambda: MagCacheParams(
+            threshold=0.12,
+            max_skip_steps=4,
+            skip_start_step=10,
+            skip_end_step=0,
+            mag_ratios=T2V_13B_MAG_RATIOS,
+        )
+    )
+
 
 @dataclass
 class WanT2V_14B_SamplingParams(SamplingParams):

diff --git a/python/sglang/multimodal_gen/runtime/cache/__init__.py b/python/sglang/multimodal_gen/runtime/cache/__init__.py
@@ -6,22 +6,40 @@
 diffusion transformer (DiT) inference:
 
 - TeaCache: Temporal similarity-based caching for diffusion models
+- MagCache: Magnitude-ratio-based caching for diffusion models
 - cache-dit integration: Block-level caching with DBCache and TaylorSeer
 
 """
 
+from sglang.multimodal_gen.runtime.cache.base import DiffusionCache
 from sglang.multimodal_gen.runtime.cache.cache_dit_integration import (
     CacheDitConfig,
     enable_cache_on_dual_transformer,
     enable_cache_on_transformer,
     get_scm_mask,
 )
-from sglang.multimodal_gen.runtime.cache.teacache import TeaCacheContext, TeaCacheMixin
+from sglang.multimodal_gen.runtime.cache.magcache import (
+    MagCacheContext,
+    MagCacheState,
+    MagCacheStrategy,
+)
+from sglang.multimodal_gen.runtime.cache.teacache import (
+    TeaCacheContext,
+    TeaCacheState,
+    TeaCacheStrategy,
+)
 
 __all__ = [
-    # TeaCache (always available)
+    # Base
+    "DiffusionCache",
+    # TeaCache
     "TeaCacheContext",
-    "TeaCacheMixin",
+    "TeaCacheState",
+    "TeaCacheStrategy",
+    # MagCache
+    "MagCacheContext",
+    "MagCacheState",
+    "MagCacheStrategy",
     # cache-dit integration (lazy-loaded, requires cache-dit package)
     "CacheDitConfig",
     "enable_cache_on_transformer",