diff --git a/examples/diffusers/quantization/config.py b/examples/diffusers/quantization/config.py
index e15b8c7ba3c..7b472565a69 100644
--- a/examples/diffusers/quantization/config.py
+++ b/examples/diffusers/quantization/config.py
@@ -16,82 +16,21 @@
 import torch.nn as nn
 from calib.plugin_calib import PercentileCalibrator
 
-FP8_DEFAULT_CONFIG = {
-    "quant_cfg": [
-        {"quantizer_name": "*", "enable": False},
-        {"quantizer_name": "*weight_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-        {"quantizer_name": "*input_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-        {"quantizer_name": "*output_quantizer", "enable": False},
-        {"quantizer_name": "*softmax_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-    ],
-    "algorithm": "max",
-}
+from modelopt.torch.opt.config_loader import load_config
+from modelopt.torch.quantization.config import QuantizeConfig
 
-INT8_DEFAULT_CONFIG = {
-    "quant_cfg": [
-        {"quantizer_name": "*", "enable": False},
-        {"quantizer_name": "*weight_quantizer", "cfg": {"num_bits": 8, "axis": 0}},
-        {"quantizer_name": "*input_quantizer", "cfg": {"num_bits": 8, "axis": None}},
-        {"quantizer_name": "*output_quantizer", "enable": False},
-    ],
-    "algorithm": "max",
-}
-
-NVFP4_DEFAULT_CONFIG = {
-    "quant_cfg": [
-        {"quantizer_name": "*", "enable": False},
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-                "axis": None,
-            },
-            "enable": True,
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-                "axis": None,
-            },
-            "enable": True,
-        },
-        {"quantizer_name": "*output_quantizer", "enable": False},
-        {"quantizer_name": "*softmax_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-    ],
-    "algorithm": "max",
-}
-
-NVFP4_FP8_MHA_CONFIG = {
-    "quant_cfg": [
-        {"quantizer_name": "*", "enable": False},
-        {
-            "quantizer_name": "**weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-                "axis": None,
-            },
-            "enable": True,
-        },
-        {
-            "quantizer_name": "**input_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-                "axis": None,
-            },
-            "enable": True,
-        },
-        {"quantizer_name": "*output_quantizer", "enable": False},
-        {"quantizer_name": "*[qkv]_bmm_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-        {"quantizer_name": "*softmax_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-        {"quantizer_name": "*bmm2_output_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-    ],
-    "algorithm": {"method": "svdquant", "lowrank": 32},
-}
+FP8_DEFAULT_CONFIG = load_config(
+    "configs/ptq/presets/diffusers/fp8", schema_type=QuantizeConfig
+).model_dump(exclude_unset=True)
+INT8_DEFAULT_CONFIG = load_config(
+    "configs/ptq/presets/diffusers/int8", schema_type=QuantizeConfig
+).model_dump(exclude_unset=True)
+NVFP4_DEFAULT_CONFIG = load_config(
+    "configs/ptq/presets/diffusers/nvfp4", schema_type=QuantizeConfig
+).model_dump(exclude_unset=True)
+NVFP4_FP8_MHA_CONFIG = load_config(
+    "configs/ptq/presets/diffusers/nvfp4_fp8_mha", schema_type=QuantizeConfig
+).model_dump(exclude_unset=True)
 
 
 def set_quant_config_attr(quant_config, trt_high_precision_dtype, quant_algo, **kwargs):
diff --git a/examples/diffusers/quantization/quantize.py b/examples/diffusers/quantization/quantize.py
index 2a3c947a2d6..c719fbd45cc 100644
--- a/examples/diffusers/quantization/quantize.py
+++ b/examples/diffusers/quantization/quantize.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import argparse
+import copy
 import logging
 import sys
 import time as time
@@ -114,19 +115,13 @@ def get_quant_config(self, n_steps: int, backbone: torch.nn.Module) -> Any:
         """
         self.logger.info(f"Building quantization config for {self.config.format.value}")
 
+        apply_int8_percentile_calibrator = False
         if self.config.format == QuantFormat.INT8:
             if self.config.algo == QuantAlgo.SMOOTHQUANT:
                 base_cfg = mtq.INT8_SMOOTHQUANT_CFG
             else:
                 base_cfg = INT8_DEFAULT_CONFIG
-            if self.config.collect_method != CollectMethod.DEFAULT:
-                reset_set_int8_config(
-                    base_cfg,
-                    self.config.percentile,
-                    n_steps,
-                    collect_method=self.config.collect_method.value,
-                    backbone=backbone,
-                )
+            apply_int8_percentile_calibrator = self.config.collect_method != CollectMethod.DEFAULT
         elif self.config.format == QuantFormat.FP8:
             base_cfg = FP8_DEFAULT_CONFIG
         elif self.config.format == QuantFormat.FP4:
@@ -137,7 +132,18 @@ def get_quant_config(self, n_steps: int, backbone: torch.nn.Module) -> Any:
         else:
             raise NotImplementedError(f"Unknown format {self.config.format}")
 
-        # Build a fresh config dict so we never mutate the global constants.
+        # Build a fresh config dict so runtime overrides never mutate the global constants.
+        base_cfg = copy.deepcopy(base_cfg)
+
+        if apply_int8_percentile_calibrator:
+            reset_set_int8_config(
+                base_cfg,
+                self.config.percentile,
+                n_steps,
+                collect_method=self.config.collect_method.value,
+                backbone=backbone,
+            )
+
         quant_cfg_list = list(base_cfg["quant_cfg"])
 
         if self.config.format == QuantFormat.FP4:
diff --git a/examples/llm_autodeploy/run_auto_quantize.py b/examples/llm_autodeploy/run_auto_quantize.py
index ebd7c1090bb..db35e4841fb 100644
--- a/examples/llm_autodeploy/run_auto_quantize.py
+++ b/examples/llm_autodeploy/run_auto_quantize.py
@@ -15,6 +15,7 @@
 
 import argparse
 from collections import defaultdict
+from typing import Any
 
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -24,7 +25,7 @@
 from modelopt.torch.utils import create_forward_loop
 from modelopt.torch.utils.dataset_utils import get_dataset_dataloader
 
-SUPPORT_QUANT_FORMAT = {
+SUPPORT_QUANT_FORMAT: dict[str, dict[str, Any]] = {
     "fp8": mtq.FP8_DEFAULT_CFG,
     "nvfp4": mtq.NVFP4_DEFAULT_CFG,
 }
@@ -87,7 +88,7 @@ def loss_func(output, data):
         data_loader=calib_dataloader,
         forward_step=lambda model, batch: model(**batch),
         loss_func=loss_func,
-        quantization_formats=[SUPPORT_QUANT_FORMAT[format] for format in qformat_list],
+        quantization_formats=[SUPPORT_QUANT_FORMAT[quant_format] for quant_format in qformat_list],
         num_calib_steps=len(calib_dataloader),
         num_score_steps=min(
             len(calib_dataloader), 128 // batch_size
diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py
index 76ed2bb6503..80864523e52 100644
--- a/modelopt/torch/opt/config_loader.py
+++ b/modelopt/torch/opt/config_loader.py
@@ -336,7 +336,19 @@ def _schema_equal(left: Any | None, right: Any | None) -> bool:
 def _list_element_schema(schema_type: Any | None) -> Any | None:
     """Return the element schema for a typed ``list[T]`` annotation."""
     schema_type = _unwrap_schema_type(schema_type)
-    if get_origin(schema_type) is not list:
+    origin = get_origin(schema_type)
+    if origin in (UnionType, Union):
+        element_schemas = []
+        for arg in get_args(schema_type):
+            if arg is NoneType:
+                continue
+            element_schema = _list_element_schema(arg)
+            if element_schema is None:
+                continue
+            if not any(_schema_equal(element_schema, seen) for seen in element_schemas):
+                element_schemas.append(element_schema)
+        return element_schemas[0] if len(element_schemas) == 1 else None
+    if origin is not list:
         return None
     args = get_args(schema_type)
     if len(args) != 1 or args[0] is Any:
@@ -510,6 +522,12 @@ def _resolve_list_import(
         if _schema_equal(imported.schema_type, element_schema):
             return [imported.data]
 
+        element_schema_unwrapped = _unwrap_schema_type(element_schema)
+        if isinstance(imported.data, dict) and (
+            element_schema_unwrapped is dict or get_origin(element_schema_unwrapped) is dict
+        ):
+            return [imported.data]
+
         raise ValueError(
             f"$import {ref_name!r} in list at {context} has schema "
             f"{_schema_label(imported.schema_type, imported.schema)!r}; expected either "
diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
index b0c3fb859b2..fd95171ce43 100644
--- a/modelopt/torch/quantization/config.py
+++ b/modelopt/torch/quantization/config.py
@@ -150,7 +150,6 @@
 
 """
 
-import copy
 import warnings
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal
@@ -1199,578 +1198,141 @@ class _QuantizeExportConfig(ModeloptBaseConfig):
     """An empty config."""
 
 
-# Shared snippet constants are dumped back to plain dicts before being spliced into
-# the public quant config constants below.  ``load_config`` returns validated
-# ``QuantizerCfgEntry`` instances for schema-tagged files, but the public constants
-# (``INT4_AWQ_CFG``, ``NVFP4_DEFAULT_CFG``, etc.) have always been raw dict/list trees;
-# splatting schema instances into them would surprise callers that serialise the
-# constants or do ``isinstance(entry, dict)`` checks.  ``exclude_unset=True`` keeps the
-# sparse YAML shape (only the explicitly set fields) so the dumped dicts are
-# byte-identical to what authors wrote in the YAML snippets.
-_base_disable_all: list[dict[str, Any]] = [
-    load_config("configs/ptq/units/base_disable_all").model_dump(exclude_unset=True)
-]
-
-_default_disabled_quantizer_cfg: list[dict[str, Any]] = [
-    entry.model_dump(exclude_unset=True)
-    for entry in load_config("configs/ptq/units/default_disabled_quantizers")
-]
-
-_mamba_moe_disabled_quantizer_cfg: list[dict[str, Any]] = [
-    {"quantizer_name": "*fc1_latent_proj*", "enable": False},  # Skip Latent MOE
-    {"quantizer_name": "*fc2_latent_proj*", "enable": False},  # Skip Latent MOE
-    {"quantizer_name": "*q_proj*", "enable": False},  # Skip QKV Linear (HF naming)
-    {"quantizer_name": "*k_proj*", "enable": False},  # Skip QKV Linear (HF naming)
-    {"quantizer_name": "*v_proj*", "enable": False},  # Skip QKV Linear (HF naming)
-    {"quantizer_name": "*o_proj*", "enable": False},  # Skip QKV Output Projection (HF naming)
-    {
-        "quantizer_name": "*self_attention.linear_qkv*",
-        "enable": False,
-    },  # Skip QKV Linear (Mcore naming)
-    {
-        "quantizer_name": "*self_attention.linear_proj*",
-        "enable": False,
-    },  # Skip QKV Output Projection (Mcore naming)
-]
-
-INT8_DEFAULT_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {"quantizer_name": "*weight_quantizer", "cfg": {"num_bits": 8, "axis": 0}},
-        {"quantizer_name": "*input_quantizer", "cfg": {"num_bits": 8, "axis": None}},
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
-
-INT8_SMOOTHQUANT_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {"quantizer_name": "*weight_quantizer", "cfg": {"num_bits": 8, "axis": 0}},
-        {"quantizer_name": "*input_quantizer", "cfg": {"num_bits": 8, "axis": None}},
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "smoothquant",
-}
-
-INT8_WEIGHT_ONLY_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {"quantizer_name": "*weight_quantizer", "cfg": {"num_bits": 8, "axis": 0}},
-        {"quantizer_name": "*input_quantizer", "enable": False},
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
-
-FP8_DEFAULT_CFG: dict[str, Any] = load_config("configs/ptq/presets/model/fp8").model_dump(
-    exclude_unset=True
-)
-
-MAMBA_MOE_FP8_AGGRESSIVE_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        *_default_disabled_quantizer_cfg,
-        *_mamba_moe_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
-
-MAMBA_MOE_FP8_CONSERVATIVE_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        *_default_disabled_quantizer_cfg,
-        *_mamba_moe_disabled_quantizer_cfg,
-        {"quantizer_name": "*mixer.in_proj*", "enable": False},  # Skip mamba linear
-        {"quantizer_name": "*mixer.out_proj*", "enable": False},  # Skip mamba linear
-    ],
-    "algorithm": "max",
-}
+def _load_quantizer_attribute_dict(config_path: str) -> dict[str, Any]:
+    """Load a schema-backed QuantizerAttributeConfig YAML as a public dict."""
+    config = load_config(config_path, schema_type=QuantizerAttributeConfig)
+    if isinstance(config, QuantizerAttributeConfig):
+        return config.model_dump(exclude_unset=True)
+    if isinstance(config, Mapping):
+        return dict(config)
+    raise TypeError(f"{config_path} must declare QuantizerAttributeConfig.")
 
-FP8_PER_CHANNEL_PER_TOKEN_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {"quantizer_name": "*weight_quantizer", "cfg": {"num_bits": (4, 3), "axis": 0}},
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-                "type": "dynamic",
-                "block_sizes": {-1: None},
-            },
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
 
-# FP8 2D blockwise fake quantization config for deepseek models
-FP8_2D_BLOCKWISE_WEIGHT_ONLY_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-                "block_sizes": {-1: 128, -2: 128},
-            },
-        },
-        {"quantizer_name": "*input_quantizer", "enable": False},
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
+def _load_quantize_config_dict(config_path: str) -> dict[str, Any]:
+    """Load a schema-backed QuantizeConfig YAML as a public legacy-shape dict."""
+    return load_config(config_path, schema_type=QuantizeConfig).model_dump(exclude_unset=True)
 
-INT4_BLOCKWISE_WEIGHT_ONLY_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": 4,
-                "block_sizes": {-1: 128},
-            },
-        },
-        {"quantizer_name": "*input_quantizer", "enable": False},
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
 
+def _load_quantizer_cfg_dict_list(config_path: str) -> list[dict[str, Any]]:
+    """Load a QuantizerCfgEntry or QuantizerCfgListConfig snippet as public dict entries."""
+    config = load_config(config_path)
+    entries = config if isinstance(config, list) else [config]
+    return [e.model_dump(exclude_unset=True) for e in entries]
 
-INT4_AWQ_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": 4,
-                "block_sizes": {-1: 128, "type": "static"},
-            },
-        },
-        {"quantizer_name": "*input_quantizer", "enable": False},
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": {"method": "awq_lite", "alpha_step": 0.1},
-    # "algorithm": {"method": "awq_full", "alpha_step": 0.1, "max_co_batch_size": 1024},
-    # "algorithm": {"method": "awq_clip", "max_co_batch_size": 2048},
-}
 
-# W4A8 currently uses INT4 blockwise quantization (block size = 128) followed by FP8 quantization
-# for weights. This could change in the future
-W4A8_AWQ_BETA_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": [
-                {
-                    "num_bits": 4,
-                    "block_sizes": {-1: 128, "type": "static"},
-                },
-                {
-                    "num_bits": (4, 3),
-                },
-            ],
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-            },
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "awq_lite",
-}
+_base_disable_all: list[dict[str, Any]] = _load_quantizer_cfg_dict_list(
+    "configs/ptq/units/base_disable_all"
+)
 
-MXFP8_DEFAULT_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": None,
-}
+_default_disabled_quantizer_cfg: list[dict[str, Any]] = _load_quantizer_cfg_dict_list(
+    "configs/ptq/units/default_disabled_quantizers"
+)
 
-MXFP6_DEFAULT_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (3, 2),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": (3, 2),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": None,
-}
+_mamba_moe_disabled_quantizer_cfg: list[dict[str, Any]] = _load_quantizer_cfg_dict_list(
+    "configs/ptq/units/mamba_moe_disabled_quantizers"
+)
 
-MXFP4_DEFAULT_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": None,
-}
+_nvfp4_cfg: dict[str, Any] = _load_quantizer_attribute_dict("configs/numerics/nvfp4")
 
-W4A8_MXFP4_FP8_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": None,
-}
+_nvfp4_cfg_bs32: dict[str, Any] = _load_quantizer_attribute_dict("configs/numerics/nvfp4_bs32")
 
-MXINT8_DEFAULT_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": 8,
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": 8,
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": None,
-}
+INT8_DEFAULT_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/model/int8")
+INT8_SMOOTHQUANT_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/int8_smoothquant"
+)
+INT8_WEIGHT_ONLY_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/int8_weight_only"
+)
+FP8_DEFAULT_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/model/fp8")
+MAMBA_MOE_FP8_AGGRESSIVE_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/mamba_moe_fp8_aggressive"
+)
+MAMBA_MOE_FP8_CONSERVATIVE_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/mamba_moe_fp8_conservative"
+)
+FP8_PER_CHANNEL_PER_TOKEN_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/fp8_per_channel_per_token"
+)
+FP8_2D_BLOCKWISE_WEIGHT_ONLY_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/fp8_2d_blockwise_weight_only"
+)
+INT4_BLOCKWISE_WEIGHT_ONLY_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/int4_blockwise_weight_only"
+)
+INT4_AWQ_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/model/int4_awq")
+W4A8_AWQ_BETA_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/w4a8_awq_beta"
+)
+MXFP8_DEFAULT_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/model/mxfp8")
+MXFP6_DEFAULT_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/model/mxfp6")
+MXFP4_DEFAULT_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/model/mxfp4")
+W4A8_MXFP4_FP8_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/w4a8_mxfp4_fp8"
+)
+MXINT8_DEFAULT_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/model/mxint8")
 
 # KV-cache configs are designed to be merged with a primary quantization config (e.g.
 # FP8_DEFAULT_CFG) that already contains _base_disable_all.  They intentionally omit both
 # _base_disable_all and "algorithm" because these are provided by the primary config.
-FP8_KV_CFG: dict[str, Any] = load_config("configs/ptq/presets/kv/fp8").model_dump(
-    exclude_unset=True
-)
-
-FP8_AFFINE_KV_CFG = {
-    "quant_cfg": [
-        {
-            "quantizer_name": "*[kv]_bmm_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-                "bias": {-2: None, -4: None, "type": "static"},
-            },
-        },
-    ]
-}
-
-_nvfp4_cfg = {
-    "num_bits": (2, 1),
-    "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-}
-
-_nvfp4_cfg_bs32 = {
-    "num_bits": (2, 1),
-    "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (4, 3)},
-}
+FP8_KV_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/kv/fp8")
+FP8_AFFINE_KV_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/kv/fp8_affine")
 
-
-def _nvfp4_selective_quant_cfg(
-    layer_patterns: list[str],
-    *,
-    quantizer: dict = _nvfp4_cfg,
-    weight_only: bool = False,
-    algorithm: str | dict = "max",
-) -> dict:
-    """Build an NVFP4 config that quantizes only the specified layer patterns."""
-    quant_cfg: list[dict[str, Any]] = []
-    quant_cfg.extend(_base_disable_all)
-    for pattern in layer_patterns:
-        # Deep-copy the quantizer dict so each config constant gets its own instance.
-        quant_cfg.append(
-            {"quantizer_name": f"{pattern}weight_quantizer", "cfg": copy.deepcopy(quantizer)}
-        )
-        if not weight_only:
-            quant_cfg.append(
-                {"quantizer_name": f"{pattern}input_quantizer", "cfg": copy.deepcopy(quantizer)}
-            )
-    quant_cfg.extend(_default_disabled_quantizer_cfg)
-    return {"quant_cfg": quant_cfg, "algorithm": algorithm}
-
-
-NVFP4_DEFAULT_CFG = _nvfp4_selective_quant_cfg(["*"])
-
-NVFP4_W4A4_WEIGHT_MSE_FP8_SWEEP_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "static", "scale_bits": (4, 3)},
-            },
-        },
-        {"quantizer_name": "*input_quantizer", "cfg": _nvfp4_cfg},
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": {
-        "method": "mse",
-        "fp8_scale_sweep": True,
-    },
-}
-
-NVFP4_W4A4_WEIGHT_LOCAL_HESSIAN_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "static", "scale_bits": (4, 3)},
-            },
-        },
-        {"quantizer_name": "*input_quantizer", "cfg": _nvfp4_cfg},
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": {
-        "method": "local_hessian",
-        "fp8_scale_sweep": True,
-    },
-}
-
-MAMBA_MOE_NVFP4_AGGRESSIVE_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {"quantizer_name": "*weight_quantizer", "cfg": _nvfp4_cfg},
-        {"quantizer_name": "*input_quantizer", "cfg": _nvfp4_cfg},
-        *_default_disabled_quantizer_cfg,
-        *_mamba_moe_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
-MAMBA_MOE_NVFP4_CONSERVATIVE_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {"quantizer_name": "*weight_quantizer", "cfg": _nvfp4_cfg},
-        {"quantizer_name": "*input_quantizer", "cfg": _nvfp4_cfg},
-        *_default_disabled_quantizer_cfg,
-        *_mamba_moe_disabled_quantizer_cfg,
-        {"quantizer_name": "*mixer.in_proj*", "enable": False},  # Skip mamba linear
-        {"quantizer_name": "*mixer.out_proj*", "enable": False},  # Skip mamba linear
-    ],
-    "algorithm": "max",
-}
-
-NVFP4_AWQ_LITE_CFG = _nvfp4_selective_quant_cfg(["*"], algorithm="awq_lite")
-
-NVFP4_AWQ_CLIP_CFG = _nvfp4_selective_quant_cfg(["*"], algorithm={"method": "awq_clip"})
-
-NVFP4_AWQ_FULL_CFG = _nvfp4_selective_quant_cfg(
-    ["*"], algorithm={"method": "awq_full", "alpha_step": 0.1}
+NVFP4_DEFAULT_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/model/nvfp4")
+NVFP4_W4A4_WEIGHT_MSE_FP8_SWEEP_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_w4a4_weight_mse_fp8_sweep"
 )
-
-# See comment above FP8_KV_CFG — KV-cache configs omit _base_disable_all and "algorithm".
-NVFP4_AFFINE_KV_CFG = {
-    "quant_cfg": [
-        {
-            "quantizer_name": "*[kv]_bmm_quantizer",
-            "cfg": {
-                **_nvfp4_cfg,
-                "bias": {-2: None, -4: None, "type": "static"},
-            },
-        },
-    ]
-}
-
-NVFP4_KV_CFG = {
-    "quant_cfg": [
-        {"quantizer_name": "*[kv]_bmm_quantizer", "cfg": _nvfp4_cfg},
-    ]
-}
-
-# Moved from examples/diffusers/quantization/config.py to here
-NVFP4_FP8_MHA_CONFIG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {"quantizer_name": "*weight_quantizer", "cfg": _nvfp4_cfg},
-        {"quantizer_name": "*input_quantizer", "cfg": _nvfp4_cfg},
-        {"quantizer_name": "*output_quantizer", "enable": False},
-        {
-            "quantizer_name": "*q_bmm_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-            },
-        },
-        {
-            "quantizer_name": "*k_bmm_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-            },
-        },
-        {
-            "quantizer_name": "*v_bmm_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-            },
-        },
-        {
-            "quantizer_name": "*softmax_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-            },
-        },
-        {
-            "quantizer_name": "transformer_blocks*bmm2_output_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-            },
-        },
-    ],
-    "algorithm": "max",
-}
-
-# See comment above FP8_KV_CFG — KV-cache configs omit _base_disable_all and "algorithm".
-NVFP4_KV_ROTATE_CFG = {
-    "quant_cfg": [
-        {
-            # q_bmm is disabled but pre-configured with rotate=True so that downstream
-            # code can inspect the rotate flag even while the quantizer is off.
-            "quantizer_name": "*q_bmm_quantizer",
-            "cfg": {
-                "rotate": True,
-            },
-            "enable": False,
-        },
-        {
-            "quantizer_name": "*k_bmm_quantizer",
-            "cfg": {
-                **_nvfp4_cfg,
-                "rotate": True,
-            },
-        },
-        {"quantizer_name": "*v_bmm_quantizer", "cfg": _nvfp4_cfg},
-    ],
-    "algorithm": "max",
-}
-
-NVFP4_SVDQUANT_DEFAULT_CFG = _nvfp4_selective_quant_cfg(
-    ["*"], algorithm={"method": "svdquant", "lowrank": 32}
+NVFP4_W4A4_WEIGHT_LOCAL_HESSIAN_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_w4a4_weight_local_hessian"
 )
-
-W4A8_NVFP4_FP8_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (4, 3)},
-            },
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": (4, 3),
-            },
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
-W4A16_NVFP4_CFG = _nvfp4_selective_quant_cfg(["*"], weight_only=True)
-
-MXFP4_MLP_WEIGHT_ONLY_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*mlp*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        {
-            "quantizer_name": "*block_sparse_moe*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 32, "type": "dynamic", "scale_bits": (8, 0)},
-            },
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": None,
-}
-
-NVFP4_MLP_WEIGHT_ONLY_CFG = _nvfp4_selective_quant_cfg(
-    ["*mlp*", "*block_sparse_moe*"], quantizer=_nvfp4_cfg_bs32, weight_only=True
+MAMBA_MOE_NVFP4_AGGRESSIVE_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/mamba_moe_nvfp4_aggressive"
+)
+MAMBA_MOE_NVFP4_CONSERVATIVE_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/mamba_moe_nvfp4_conservative"
+)
+NVFP4_AWQ_LITE_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_awq_lite"
+)
+NVFP4_AWQ_CLIP_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_awq_clip"
+)
+NVFP4_AWQ_FULL_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_awq_full"
+)
+NVFP4_AFFINE_KV_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/kv/nvfp4_affine"
+)
+NVFP4_KV_CFG: dict[str, Any] = _load_quantize_config_dict("configs/ptq/presets/kv/nvfp4")
+NVFP4_FP8_MHA_CONFIG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_fp8_mha"
+)
+NVFP4_KV_ROTATE_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/kv/nvfp4_rotate"
+)
+NVFP4_SVDQUANT_DEFAULT_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_svdquant"
+)
+W4A8_NVFP4_FP8_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/w4a8_nvfp4_fp8"
+)
+W4A16_NVFP4_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/w4a16_nvfp4"
+)
+MXFP4_MLP_WEIGHT_ONLY_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/mxfp4_mlp_weight_only"
+)
+NVFP4_MLP_WEIGHT_ONLY_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_mlp_weight_only"
+)
+NVFP4_EXPERTS_ONLY_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_experts_only"
+)
+NVFP4_MLP_ONLY_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_mlp_only"
 )
-NVFP4_EXPERTS_ONLY_CFG = _nvfp4_selective_quant_cfg(
-    ["*mlp.experts*", "*block_sparse_moe*", "*.experts.*"]
+NVFP4_OMLP_ONLY_CFG: dict[str, Any] = _load_quantize_config_dict(
+    "configs/ptq/presets/model/nvfp4_omlp_only"
 )
-NVFP4_MLP_ONLY_CFG = _nvfp4_selective_quant_cfg(["*mlp*", "*block_sparse_moe*", "*.experts.*"])
-NVFP4_OMLP_ONLY_CFG = _nvfp4_selective_quant_cfg(["*o_proj*", "*mlp*", "*block_sparse_moe*"])
 
 # DO NOT ADD NEW CONFIGS HERE. If you want to add a new general recipe, add it to
 # modelopt_recipes/general/ptq/ as a yaml file
@@ -1786,6 +1348,7 @@ def _nvfp4_selective_quant_cfg(
     "INT8_SMOOTHQUANT_CFG",
     "INT8_WEIGHT_ONLY_CFG",
     "MXFP4_DEFAULT_CFG",
+    "MXFP6_DEFAULT_CFG",
     "MXFP8_DEFAULT_CFG",
     "MXINT8_DEFAULT_CFG",
     "NVFP4_AFFINE_KV_CFG",
@@ -1810,6 +1373,7 @@ def _nvfp4_selective_quant_cfg(
     "MAMBA_MOE_NVFP4_AGGRESSIVE_CFG",
     "MAMBA_MOE_FP8_CONSERVATIVE_CFG",
     "MAMBA_MOE_FP8_AGGRESSIVE_CFG",
+    "NVFP4_W4A4_WEIGHT_LOCAL_HESSIAN_CFG",
     "NVFP4_W4A4_WEIGHT_MSE_FP8_SWEEP_CFG",
 }
 
diff --git a/modelopt_recipes/configs/numerics/fp8.yaml b/modelopt_recipes/configs/numerics/fp8.yaml
index ab1da6fad5f..7761dd106c0 100644
--- a/modelopt_recipes/configs/numerics/fp8.yaml
+++ b/modelopt_recipes/configs/numerics/fp8.yaml
@@ -13,9 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# FP8 E4M3 quantizer attributes (per-tensor; used for weight/activation/KV).
-# ``axis: null`` is explicit to match the hardcoded ``FP8_DEFAULT_CFG`` shape —
-# downstream code that keys on ``"axis" in cfg`` sees the same dict layout.
+# Per-tensor FP8 E4M3 quantizer attributes.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
 num_bits: e4m3
diff --git a/modelopt_recipes/configs/numerics/int4_per_block.yaml b/modelopt_recipes/configs/numerics/int4_per_block.yaml
new file mode 100644
index 00000000000..35d9f53a17a
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/int4_per_block.yaml
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Static INT4 quantizer attributes with 128-value blocks on the last dimension.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
+num_bits: 4
+block_sizes:
+  -1: 128
+  type: static
diff --git a/modelopt_recipes/configs/numerics/int8.yaml b/modelopt_recipes/configs/numerics/int8.yaml
new file mode 100644
index 00000000000..41e8835c374
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/int8.yaml
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Per-tensor INT8 quantizer attributes.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
+num_bits: 8
+axis:
diff --git a/modelopt_recipes/configs/numerics/int8_per_channel.yaml b/modelopt_recipes/configs/numerics/int8_per_channel.yaml
new file mode 100644
index 00000000000..31c10635fc4
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/int8_per_channel.yaml
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Per-channel INT8 quantizer attributes with axis 0.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
+num_bits: 8
+axis: 0
diff --git a/modelopt_recipes/configs/numerics/mxfp4.yaml b/modelopt_recipes/configs/numerics/mxfp4.yaml
new file mode 100644
index 00000000000..f32fde304f2
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/mxfp4.yaml
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Dynamic MXFP4 E2M1 block quantizer attributes with E8M0 scales.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
+num_bits: e2m1
+block_sizes:
+  -1: 32
+  type: dynamic
+  scale_bits: e8m0
diff --git a/modelopt_recipes/configs/numerics/mxfp6.yaml b/modelopt_recipes/configs/numerics/mxfp6.yaml
new file mode 100644
index 00000000000..f8849edd294
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/mxfp6.yaml
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Dynamic MXFP6 E3M2 block quantizer attributes with E8M0 scales.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
+num_bits: e3m2
+block_sizes:
+  -1: 32
+  type: dynamic
+  scale_bits: e8m0
diff --git a/modelopt_recipes/configs/numerics/mxfp8.yaml b/modelopt_recipes/configs/numerics/mxfp8.yaml
new file mode 100644
index 00000000000..46cb3d9f7c7
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/mxfp8.yaml
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Dynamic MXFP8 E4M3 block quantizer attributes with E8M0 scales.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
+num_bits: e4m3
+block_sizes:
+  -1: 32
+  type: dynamic
+  scale_bits: e8m0
diff --git a/modelopt_recipes/configs/numerics/mxint8.yaml b/modelopt_recipes/configs/numerics/mxint8.yaml
new file mode 100644
index 00000000000..388b251de67
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/mxint8.yaml
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Dynamic MXINT8 block quantizer attributes with E8M0 scales.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
+num_bits: 8
+block_sizes:
+  -1: 32
+  type: dynamic
+  scale_bits: e8m0
diff --git a/modelopt_recipes/configs/numerics/nvfp4.yaml b/modelopt_recipes/configs/numerics/nvfp4.yaml
index 68629c009fb..88598e36e85 100644
--- a/modelopt_recipes/configs/numerics/nvfp4.yaml
+++ b/modelopt_recipes/configs/numerics/nvfp4.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# NVFP4 E2M1 blockwise quantizer attributes with FP8 E4M3 scales (dynamic calibration, the default).
+# Dynamic NVFP4 E2M1 block quantizer attributes with FP8 E4M3 scales.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
 num_bits: e2m1
diff --git a/modelopt_recipes/configs/numerics/nvfp4_bs32.yaml b/modelopt_recipes/configs/numerics/nvfp4_bs32.yaml
new file mode 100644
index 00000000000..a84b63a91d3
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/nvfp4_bs32.yaml
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Dynamic NVFP4 E2M1 block quantizer attributes with FP8 E4M3 scales and block size 32.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
+num_bits: e2m1
+block_sizes:
+  -1: 32
+  type: dynamic
+  scale_bits: e4m3
diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yaml b/modelopt_recipes/configs/numerics/nvfp4_static.yaml
index 32bd247b79a..9f6ac62e11e 100644
--- a/modelopt_recipes/configs/numerics/nvfp4_static.yaml
+++ b/modelopt_recipes/configs/numerics/nvfp4_static.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# NVFP4 E2M1 blockwise quantizer attributes with FP8 E4M3 scales (used for NVFP4 weights since weight scales can be static).
+# Static NVFP4 E2M1 block quantizer attributes with FP8 E4M3 scales.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig
 num_bits: e2m1
diff --git a/modelopt_recipes/configs/ptq/presets/README.md b/modelopt_recipes/configs/ptq/presets/README.md
index 3ab307fe453..2aeec2d2b33 100644
--- a/modelopt_recipes/configs/ptq/presets/README.md
+++ b/modelopt_recipes/configs/ptq/presets/README.md
@@ -1,8 +1,8 @@
 # PTQ Preset Configs
 
 This directory holds preset quantization configurations that serve as the
-YAML source of truth for the hardcoded `*_CFG` dicts in
-`modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`,
+YAML source of truth for the `*_CFG` `QuantizeConfig` constants exposed
+from `modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`,
 `FP8_KV_CFG`).
 
 Presets compose from the reusable snippets in `configs/numerics/` and
@@ -25,6 +25,10 @@ own imports have been resolved.
   be merged on top of a `model/` preset via `$import` to produce a
   complete config. Example: `kv/fp8.yaml` (the YAML source of
   `FP8_KV_CFG`).
+- **`diffusers/`** — Diffusers-specific full quantization presets. These
+  files are complete configs used by the Diffusers examples, including
+  attention and softmax quantizer choices that differ from the generic
+  `model/` presets.
 
 **Note:** The main purpose of these presets is to support the existing
 `hf_ptq.py` script's `--qformat` / `--kv_cache_qformat` flags and other
diff --git a/modelopt_recipes/configs/ptq/presets/diffusers/fp8.yaml b/modelopt_recipes/configs/ptq/presets/diffusers/fp8.yaml
new file mode 100644
index 00000000000..7cb89a7bfed
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/diffusers/fp8.yaml
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Diffusers FP8 preset with per-tensor E4M3 weights, inputs, and softmax quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  fp8: configs/numerics/fp8
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: fp8
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: fp8
+  - quantizer_name: '*output_quantizer'
+    enable: false
+  - quantizer_name: '*softmax_quantizer'
+    cfg:
+      $import: fp8
diff --git a/modelopt_recipes/configs/ptq/presets/diffusers/int8.yaml b/modelopt_recipes/configs/ptq/presets/diffusers/int8.yaml
new file mode 100644
index 00000000000..be12d717451
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/diffusers/int8.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Diffusers INT8 preset with per-channel weights and per-tensor inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  int8_per_channel: configs/numerics/int8_per_channel
+  int8: configs/numerics/int8
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: int8_per_channel
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: int8
+  - quantizer_name: '*output_quantizer'
+    enable: false
diff --git a/modelopt_recipes/configs/ptq/presets/diffusers/nvfp4.yaml b/modelopt_recipes/configs/ptq/presets/diffusers/nvfp4.yaml
new file mode 100644
index 00000000000..691defb2ae2
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/diffusers/nvfp4.yaml
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Diffusers NVFP4 preset with dynamic E2M1 block quantization and FP8 softmax.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  fp8: configs/numerics/fp8
+  nvfp4: configs/numerics/nvfp4
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*output_quantizer'
+    enable: false
+  - quantizer_name: '*softmax_quantizer'
+    cfg:
+      $import: fp8
diff --git a/modelopt_recipes/configs/ptq/presets/diffusers/nvfp4_fp8_mha.yaml b/modelopt_recipes/configs/ptq/presets/diffusers/nvfp4_fp8_mha.yaml
new file mode 100644
index 00000000000..ee8c2704fc5
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/diffusers/nvfp4_fp8_mha.yaml
@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Diffusers Flux preset with dynamic NVFP4 weights/inputs and FP8 attention quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  attention_qkv_fp8: configs/ptq/units/attention_qkv_fp8
+  fp8: configs/numerics/fp8
+  nvfp4: configs/numerics/nvfp4
+
+algorithm:
+  method: svdquant
+  lowrank: 32
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '**weight_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '**input_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*output_quantizer'
+    enable: false
+  - $import: attention_qkv_fp8
+  - quantizer_name: '*bmm2_output_quantizer'
+    cfg:
+      $import: fp8
diff --git a/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml b/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml
index 7e97f0bc77b..21894ef9c01 100644
--- a/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml
+++ b/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml
@@ -13,10 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# FP8 E4M3 KV cache quantization preset.
-# Equivalent to the hardcoded FP8_KV_CFG in config.py.
-# This is a partial config (no algorithm, no base_disable_all) — designed
-# to be merged with a primary model quantization config.
+# Partial QuantizeConfig that enables FP8 E4M3 KV-cache quantizers.
+# Merge this fragment with a primary model quantization preset.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
 imports:
diff --git a/modelopt_recipes/configs/ptq/presets/kv/fp8_affine.yaml b/modelopt_recipes/configs/ptq/presets/kv/fp8_affine.yaml
new file mode 100644
index 00000000000..4540df34ea9
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/kv/fp8_affine.yaml
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Partial QuantizeConfig that enables affine FP8 E4M3 KV-cache quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  kv_fp8_affine: configs/ptq/units/kv_fp8_affine
+
+quant_cfg:
+  - $import: kv_fp8_affine
diff --git a/modelopt_recipes/configs/ptq/presets/kv/nvfp4.yaml b/modelopt_recipes/configs/ptq/presets/kv/nvfp4.yaml
new file mode 100644
index 00000000000..6d759e2c115
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/kv/nvfp4.yaml
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Partial QuantizeConfig that enables NVFP4 KV-cache quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  kv_nvfp4: configs/ptq/units/kv_nvfp4
+
+quant_cfg:
+  - $import: kv_nvfp4
diff --git a/modelopt_recipes/configs/ptq/presets/kv/nvfp4_affine.yaml b/modelopt_recipes/configs/ptq/presets/kv/nvfp4_affine.yaml
new file mode 100644
index 00000000000..1f2a871010b
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/kv/nvfp4_affine.yaml
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Partial QuantizeConfig that enables affine NVFP4 KV-cache quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  kv_nvfp4_affine: configs/ptq/units/kv_nvfp4_affine
+
+quant_cfg:
+  - $import: kv_nvfp4_affine
diff --git a/modelopt_recipes/configs/ptq/presets/kv/nvfp4_rotate.yaml b/modelopt_recipes/configs/ptq/presets/kv/nvfp4_rotate.yaml
new file mode 100644
index 00000000000..2451ee1a359
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/kv/nvfp4_rotate.yaml
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Partial QuantizeConfig that enables rotated NVFP4 KV-cache quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  kv_nvfp4_rotate: configs/ptq/units/kv_nvfp4_rotate
+
+algorithm: max
+quant_cfg:
+  - $import: kv_nvfp4_rotate
diff --git a/modelopt_recipes/configs/ptq/presets/model/fp8.yaml b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml
index af80b57fe48..423904a6e18 100644
--- a/modelopt_recipes/configs/ptq/presets/model/fp8.yaml
+++ b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml
@@ -13,8 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# FP8 per-tensor weight and activation (W8A8), max calibration.
-# Equivalent to the hardcoded FP8_DEFAULT_CFG in config.py.
+# QuantizeConfig preset for W8A8 FP8 E4M3 with per-tensor weights and inputs.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
 imports:
diff --git a/modelopt_recipes/configs/ptq/presets/model/fp8_2d_blockwise_weight_only.yaml b/modelopt_recipes/configs/ptq/presets/model/fp8_2d_blockwise_weight_only.yaml
new file mode 100644
index 00000000000..a8d6bbb03f8
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/fp8_2d_blockwise_weight_only.yaml
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for FP8 E4M3 2D blockwise weight-only quantization.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  fp8: configs/numerics/fp8
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: fp8
+      block_sizes:
+        -1: 128
+        -2: 128
+  - quantizer_name: '*input_quantizer'
+    enable: false
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/fp8_per_channel_per_token.yaml b/modelopt_recipes/configs/ptq/presets/model/fp8_per_channel_per_token.yaml
new file mode 100644
index 00000000000..98a42f49591
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/fp8_per_channel_per_token.yaml
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for FP8 E4M3 per-channel weights and per-token dynamic inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  fp8: configs/numerics/fp8
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: fp8
+      axis: 0
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: fp8
+      type: dynamic
+      block_sizes:
+        -1:
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/int4_awq.yaml b/modelopt_recipes/configs/ptq/presets/model/int4_awq.yaml
new file mode 100644
index 00000000000..828aef7d06f
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/int4_awq.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for AWQ-lite INT4 weight-only quantization.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  int4_per_block: configs/numerics/int4_per_block
+
+algorithm:
+  method: awq_lite
+  alpha_step: 0.1
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: int4_per_block
+  - quantizer_name: '*input_quantizer'
+    enable: false
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/int4_blockwise_weight_only.yaml b/modelopt_recipes/configs/ptq/presets/model/int4_blockwise_weight_only.yaml
new file mode 100644
index 00000000000..f55351812c4
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/int4_blockwise_weight_only.yaml
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for INT4 blockwise weight-only quantization.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  int4_per_block: configs/numerics/int4_per_block
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: int4_per_block
+  - quantizer_name: '*input_quantizer'
+    enable: false
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/int8.yaml b/modelopt_recipes/configs/ptq/presets/model/int8.yaml
new file mode 100644
index 00000000000..1bfc7b95f0c
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/int8.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for INT8 per-channel weights and per-tensor inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  int8_per_channel: configs/numerics/int8_per_channel
+  int8: configs/numerics/int8
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: int8_per_channel
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: int8
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/int8_smoothquant.yaml b/modelopt_recipes/configs/ptq/presets/model/int8_smoothquant.yaml
new file mode 100644
index 00000000000..d75522bfced
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/int8_smoothquant.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for SmoothQuant INT8 per-channel weights and per-tensor inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  int8_per_channel: configs/numerics/int8_per_channel
+  int8: configs/numerics/int8
+
+algorithm: smoothquant
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: int8_per_channel
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: int8
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/int8_weight_only.yaml b/modelopt_recipes/configs/ptq/presets/model/int8_weight_only.yaml
new file mode 100644
index 00000000000..cc475ab6103
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/int8_weight_only.yaml
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for INT8 per-channel weight-only quantization.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  int8_per_channel: configs/numerics/int8_per_channel
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: int8_per_channel
+  - quantizer_name: '*input_quantizer'
+    enable: false
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/mamba_moe_fp8_aggressive.yaml b/modelopt_recipes/configs/ptq/presets/model/mamba_moe_fp8_aggressive.yaml
new file mode 100644
index 00000000000..a556941c43d
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mamba_moe_fp8_aggressive.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for FP8 W8A8 Mamba-MoE quantization with shared exclusions.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w8a8_fp8_fp8: configs/ptq/units/w8a8_fp8_fp8
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mamba_moe_disabled_quantizers: configs/ptq/units/mamba_moe_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w8a8_fp8_fp8
+  - $import: default_disabled_quantizers
+  - $import: mamba_moe_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/mamba_moe_fp8_conservative.yaml b/modelopt_recipes/configs/ptq/presets/model/mamba_moe_fp8_conservative.yaml
new file mode 100644
index 00000000000..f7f693ae673
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mamba_moe_fp8_conservative.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for FP8 W8A8 Mamba-MoE quantization with mixer projections disabled.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w8a8_fp8_fp8: configs/ptq/units/w8a8_fp8_fp8
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mamba_moe_disabled_quantizers: configs/ptq/units/mamba_moe_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w8a8_fp8_fp8
+  - $import: default_disabled_quantizers
+  - $import: mamba_moe_disabled_quantizers
+  - quantizer_name: '*mixer.in_proj*'
+    enable: false
+  - quantizer_name: '*mixer.out_proj*'
+    enable: false
diff --git a/modelopt_recipes/configs/ptq/presets/model/mamba_moe_nvfp4_aggressive.yaml b/modelopt_recipes/configs/ptq/presets/model/mamba_moe_nvfp4_aggressive.yaml
new file mode 100644
index 00000000000..4ad8accce75
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mamba_moe_nvfp4_aggressive.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 W4A4 Mamba-MoE quantization with shared exclusions.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mamba_moe_disabled_quantizers: configs/ptq/units/mamba_moe_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4a4_nvfp4_nvfp4
+  - $import: default_disabled_quantizers
+  - $import: mamba_moe_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/mamba_moe_nvfp4_conservative.yaml b/modelopt_recipes/configs/ptq/presets/model/mamba_moe_nvfp4_conservative.yaml
new file mode 100644
index 00000000000..f7420bb7c07
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mamba_moe_nvfp4_conservative.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 W4A4 Mamba-MoE quantization with mixer projections disabled.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mamba_moe_disabled_quantizers: configs/ptq/units/mamba_moe_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4a4_nvfp4_nvfp4
+  - $import: default_disabled_quantizers
+  - $import: mamba_moe_disabled_quantizers
+  - quantizer_name: '*mixer.in_proj*'
+    enable: false
+  - quantizer_name: '*mixer.out_proj*'
+    enable: false
diff --git a/modelopt_recipes/configs/ptq/presets/model/mxfp4.yaml b/modelopt_recipes/configs/ptq/presets/model/mxfp4.yaml
new file mode 100644
index 00000000000..982e22144ec
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mxfp4.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic MXFP4 block quantization on weights and inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mxfp4: configs/numerics/mxfp4
+
+algorithm:
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: mxfp4
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: mxfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/mxfp4_mlp_weight_only.yaml b/modelopt_recipes/configs/ptq/presets/model/mxfp4_mlp_weight_only.yaml
new file mode 100644
index 00000000000..8d03600e872
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mxfp4_mlp_weight_only.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic MXFP4 block weight-only quantization on MLP/MoE layers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mxfp4: configs/numerics/mxfp4
+
+algorithm:
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*mlp*weight_quantizer'
+    cfg:
+      $import: mxfp4
+  - quantizer_name: '*block_sparse_moe*weight_quantizer'
+    cfg:
+      $import: mxfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/mxfp6.yaml b/modelopt_recipes/configs/ptq/presets/model/mxfp6.yaml
new file mode 100644
index 00000000000..e8d590f3848
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mxfp6.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic MXFP6 block quantization on weights and inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mxfp6: configs/numerics/mxfp6
+
+algorithm:
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: mxfp6
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: mxfp6
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/mxfp8.yaml b/modelopt_recipes/configs/ptq/presets/model/mxfp8.yaml
new file mode 100644
index 00000000000..7cf2832311c
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mxfp8.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic MXFP8 block quantization on weights and inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mxfp8: configs/numerics/mxfp8
+
+algorithm:
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: mxfp8
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: mxfp8
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/mxint8.yaml b/modelopt_recipes/configs/ptq/presets/model/mxint8.yaml
new file mode 100644
index 00000000000..e6ef1ca3d06
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/mxint8.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic MXINT8 block quantization on weights and inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  mxint8: configs/numerics/mxint8
+
+algorithm:
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: mxint8
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: mxint8
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4.yaml
new file mode 100644
index 00000000000..ee74eebeccc
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4.yaml
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic NVFP4 W4A4 quantization on weights and inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4a4_nvfp4_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_clip.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_clip.yaml
new file mode 100644
index 00000000000..4d1d0d5ee9b
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_clip.yaml
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 W4A4 quantization with AWQ clip calibration.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+
+algorithm:
+  method: awq_clip
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4a4_nvfp4_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_full.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_full.yaml
new file mode 100644
index 00000000000..d41046d7d37
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_full.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 W4A4 quantization with full AWQ calibration.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+
+algorithm:
+  method: awq_full
+  alpha_step: 0.1
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4a4_nvfp4_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_lite.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_lite.yaml
new file mode 100644
index 00000000000..70313afac08
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_awq_lite.yaml
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 W4A4 quantization with AWQ-lite calibration.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+
+algorithm: awq_lite
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4a4_nvfp4_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_experts_only.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_experts_only.yaml
new file mode 100644
index 00000000000..fdd18dfe36b
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_experts_only.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic NVFP4 W4A4 quantization on expert layers only.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  block_sparse_moe_nvfp4: configs/ptq/units/block_sparse_moe_nvfp4
+  experts_nvfp4: configs/ptq/units/experts_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: block_sparse_moe_nvfp4
+  - $import: experts_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_fp8_mha.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_fp8_mha.yaml
new file mode 100644
index 00000000000..abebea4917b
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_fp8_mha.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for Diffusers NVFP4 with FP8 attention quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  attention_qkv_fp8: configs/ptq/units/attention_qkv_fp8
+  fp8: configs/numerics/fp8
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4a4_nvfp4_nvfp4
+  - quantizer_name: '*output_quantizer'
+    enable: false
+  - $import: attention_qkv_fp8
+  - quantizer_name: 'transformer_blocks*bmm2_output_quantizer'
+    cfg:
+      $import: fp8
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_mlp_only.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_mlp_only.yaml
new file mode 100644
index 00000000000..c5d36fd9236
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_mlp_only.yaml
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic NVFP4 W4A4 quantization on MLP/MoE layers only.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  block_sparse_moe_nvfp4: configs/ptq/units/block_sparse_moe_nvfp4
+  experts_nvfp4: configs/ptq/units/experts_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*mlp*weight_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*mlp*input_quantizer'
+    cfg:
+      $import: nvfp4
+  - $import: block_sparse_moe_nvfp4
+  - $import: experts_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_mlp_weight_only.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_mlp_weight_only.yaml
new file mode 100644
index 00000000000..952ea3a90db
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_mlp_weight_only.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 block-size-32 weight-only quantization on MLP/MoE layers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  nvfp4_bs32: configs/numerics/nvfp4_bs32
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*mlp*weight_quantizer'
+    cfg:
+      $import: nvfp4_bs32
+  - quantizer_name: '*block_sparse_moe*weight_quantizer'
+    cfg:
+      $import: nvfp4_bs32
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_omlp_only.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_omlp_only.yaml
new file mode 100644
index 00000000000..82bf401ea9f
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_omlp_only.yaml
@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for dynamic NVFP4 W4A4 quantization on output projections and MLP/MoE layers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  block_sparse_moe_nvfp4: configs/ptq/units/block_sparse_moe_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*o_proj*weight_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*o_proj*input_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*mlp*weight_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*mlp*input_quantizer'
+    cfg:
+      $import: nvfp4
+  - $import: block_sparse_moe_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_svdquant.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_svdquant.yaml
new file mode 100644
index 00000000000..5acb834db2c
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_svdquant.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 W4A4 quantization with SVDQuant low-rank calibration.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+
+algorithm:
+  method: svdquant
+  lowrank: 32
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4a4_nvfp4_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_w4a4_weight_local_hessian.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_w4a4_weight_local_hessian.yaml
new file mode 100644
index 00000000000..ac6a3094b7c
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_w4a4_weight_local_hessian.yaml
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 W4A4 with static weight scales from local-Hessian calibration.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4
+  nvfp4_static: configs/numerics/nvfp4_static
+
+algorithm:
+  method: local_hessian
+  fp8_scale_sweep: true
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: nvfp4_static
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/nvfp4_w4a4_weight_mse_fp8_sweep.yaml b/modelopt_recipes/configs/ptq/presets/model/nvfp4_w4a4_weight_mse_fp8_sweep.yaml
new file mode 100644
index 00000000000..3ae22dbc3a6
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/nvfp4_w4a4_weight_mse_fp8_sweep.yaml
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 W4A4 with static weight scales from MSE FP8-scale sweep.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4
+  nvfp4_static: configs/numerics/nvfp4_static
+
+algorithm:
+  method: mse
+  fp8_scale_sweep: true
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: nvfp4_static
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/w4a16_nvfp4.yaml b/modelopt_recipes/configs/ptq/presets/model/w4a16_nvfp4.yaml
new file mode 100644
index 00000000000..7a189858c65
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/w4a16_nvfp4.yaml
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for NVFP4 weight-only quantization on all layers (W4A16).
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  w4_nvfp4: configs/ptq/units/w4_nvfp4
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w4_nvfp4
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/w4a8_awq_beta.yaml b/modelopt_recipes/configs/ptq/presets/model/w4a8_awq_beta.yaml
new file mode 100644
index 00000000000..12073e14601
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/w4a8_awq_beta.yaml
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for W4A8 AWQ-lite with INT4 block weights and FP8 inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  fp8: configs/numerics/fp8
+  int4_per_block: configs/numerics/int4_per_block
+
+algorithm: awq_lite
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      - $import: int4_per_block
+      - $import: fp8
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: fp8
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/w4a8_mxfp4_fp8.yaml b/modelopt_recipes/configs/ptq/presets/model/w4a8_mxfp4_fp8.yaml
new file mode 100644
index 00000000000..428cb659da5
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/w4a8_mxfp4_fp8.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for W4A8 with MXFP4 block weights and FP8 inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  fp8: configs/numerics/fp8
+  mxfp4: configs/numerics/mxfp4
+
+algorithm:
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: mxfp4
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: fp8
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/presets/model/w4a8_nvfp4_fp8.yaml b/modelopt_recipes/configs/ptq/presets/model/w4a8_nvfp4_fp8.yaml
new file mode 100644
index 00000000000..9b7e541abcc
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/model/w4a8_nvfp4_fp8.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizeConfig preset for W4A8 with NVFP4 block-size-32 weights and FP8 inputs.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  nvfp4_bs32: configs/numerics/nvfp4_bs32
+  fp8: configs/numerics/fp8
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: nvfp4_bs32
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: fp8
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/configs/ptq/units/README.md b/modelopt_recipes/configs/ptq/units/README.md
index b7a7421f9fc..cd738f62626 100644
--- a/modelopt_recipes/configs/ptq/units/README.md
+++ b/modelopt_recipes/configs/ptq/units/README.md
@@ -19,7 +19,15 @@ recipes (under `general/` or `models/`) or presets (under `presets/`).
 | `base_disable_all.yaml` | Deny-all entry: disables all quantizers as the first step |
 | `default_disabled_quantizers.yaml` | Standard exclusions (LM head, routers, BatchNorm, etc.) |
 | `kv_fp8.yaml` | FP8 E4M3 KV cache quantizer entry; supported on Hopper+ GPUs |
+| `kv_fp8_affine.yaml` | FP8 E4M3 affine KV cache quantizer entries; supported on Hopper+ GPUs |
 | `kv_fp8_cast.yaml` | FP8 E4M3 KV cache with constant amax (skips KV calibration); supported on Hopper+ GPUs |
+| `kv_nvfp4.yaml` | NVFP4 KV cache quantizer entry; supported on Blackwell+ GPUs |
+| `kv_nvfp4_affine.yaml` | NVFP4 affine KV cache quantizer entries; supported on Blackwell+ GPUs |
 | `kv_nvfp4_cast.yaml` | NVFP4 KV cache with constant amax (skips KV calibration); supported on Blackwell+ GPUs |
+| `kv_nvfp4_rotate.yaml` | NVFP4 rotated KV cache quantizer entries; supported on Blackwell+ GPUs |
+| `mamba_moe_disabled_quantizers.yaml` | Shared Mamba-MoE quantizer exclusions |
 | `w8a8_fp8_fp8.yaml` | FP8 weight + activation quantizer entries (W8A8); supported on Hopper+ GPUs |
 | `w4a4_nvfp4_nvfp4.yaml` | NVFP4 weight + activation quantizer entries (W4A4); supported on Blackwell+ GPUs |
+| `block_sparse_moe_nvfp4.yaml` | NVFP4 W4A4 on `*block_sparse_moe*` weight/input quantizers |
+| `experts_nvfp4.yaml` | NVFP4 W4A4 on `*.experts.*` weight/input quantizers |
+| `attention_qkv_fp8.yaml` | FP8 E4M3 on attention q/k/v bmm and softmax quantizers |
diff --git a/modelopt_recipes/configs/ptq/units/attention_qkv_fp8.yaml b/modelopt_recipes/configs/ptq/units/attention_qkv_fp8.yaml
new file mode 100644
index 00000000000..4aa1a7d3240
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/attention_qkv_fp8.yaml
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizerCfgList snippet that enables per-tensor FP8 E4M3 on attention q/k/v
+# bmm and softmax quantizers. Pair with a model preset to add bmm2-output entries.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+imports:
+  fp8: configs/numerics/fp8
+---
+  - quantizer_name: '*[qkv]_bmm_quantizer'
+    cfg:
+      $import: fp8
+  - quantizer_name: '*softmax_quantizer'
+    cfg:
+      $import: fp8
diff --git a/modelopt_recipes/configs/ptq/units/base_disable_all.yaml b/modelopt_recipes/configs/ptq/units/base_disable_all.yaml
index 9a520ee207f..ee96d00411c 100644
--- a/modelopt_recipes/configs/ptq/units/base_disable_all.yaml
+++ b/modelopt_recipes/configs/ptq/units/base_disable_all.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Disable all quantizers by default (deny-all-then-configure pattern).
+# QuantizerCfgList snippet that disables every quantizer before selective re-enabling.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgEntry
 quantizer_name: '*'
diff --git a/modelopt_recipes/configs/ptq/units/block_sparse_moe_nvfp4.yaml b/modelopt_recipes/configs/ptq/units/block_sparse_moe_nvfp4.yaml
new file mode 100644
index 00000000000..b39bc50d748
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/block_sparse_moe_nvfp4.yaml
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizerCfgList snippet that enables dynamic NVFP4 on weight and input
+# quantizers under ``*block_sparse_moe*`` paths.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+imports:
+  nvfp4: configs/numerics/nvfp4
+---
+  - quantizer_name: '*block_sparse_moe*weight_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*block_sparse_moe*input_quantizer'
+    cfg:
+      $import: nvfp4
diff --git a/modelopt_recipes/configs/ptq/units/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/units/default_disabled_quantizers.yaml
index 1508f942776..86d5a64c673 100644
--- a/modelopt_recipes/configs/ptq/units/default_disabled_quantizers.yaml
+++ b/modelopt_recipes/configs/ptq/units/default_disabled_quantizers.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Standard quantizer exclusions: layers that should not be quantized.
+# QuantizerCfgList snippet for standard module patterns that should remain unquantized.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
   - quantizer_name: '*block_sparse_moe.gate*'
diff --git a/modelopt_recipes/configs/ptq/units/experts_nvfp4.yaml b/modelopt_recipes/configs/ptq/units/experts_nvfp4.yaml
new file mode 100644
index 00000000000..31c54e57d9c
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/experts_nvfp4.yaml
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizerCfgList snippet that enables dynamic NVFP4 on weight and input
+# quantizers under ``*.experts.*`` paths.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+imports:
+  nvfp4: configs/numerics/nvfp4
+---
+  - quantizer_name: '*.experts.*weight_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*.experts.*input_quantizer'
+    cfg:
+      $import: nvfp4
diff --git a/modelopt_recipes/configs/ptq/units/kv_fp8.yaml b/modelopt_recipes/configs/ptq/units/kv_fp8.yaml
index 646be96709f..86156e5e95c 100644
--- a/modelopt_recipes/configs/ptq/units/kv_fp8.yaml
+++ b/modelopt_recipes/configs/ptq/units/kv_fp8.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# FP8 E4M3 KV cache quantization.
+# QuantizerCfgList snippet that enables FP8 E4M3 KV-cache quantizers.
 #
 # This snippet uses multi-document YAML (separated by ---) because it is a
 # list-valued snippet that also needs to $import another snippet.  YAML only
diff --git a/modelopt_recipes/configs/ptq/units/kv_fp8_affine.yaml b/modelopt_recipes/configs/ptq/units/kv_fp8_affine.yaml
new file mode 100644
index 00000000000..5276aff2d48
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/kv_fp8_affine.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizerCfgList snippet that enables affine FP8 E4M3 KV-cache quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+imports:
+  kv_fp8: configs/ptq/units/kv_fp8
+  fp8: configs/numerics/fp8
+---
+  - $import: kv_fp8
+  - quantizer_name: '*[kv]_bmm_quantizer'
+    cfg:
+      $import: fp8
+      bias:
+        -2:
+        -4:
+        type: static
diff --git a/modelopt_recipes/configs/ptq/units/kv_fp8_cast.yaml b/modelopt_recipes/configs/ptq/units/kv_fp8_cast.yaml
index 64cfbd47bc7..606c969ab37 100644
--- a/modelopt_recipes/configs/ptq/units/kv_fp8_cast.yaml
+++ b/modelopt_recipes/configs/ptq/units/kv_fp8_cast.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# FP8 E4M3 KV cache quantization with constant amax.
+# QuantizerCfgList snippet that enables FP8 E4M3 KV-cache quantizers with constant amax.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
 imports:
diff --git a/modelopt_recipes/configs/ptq/units/kv_nvfp4.yaml b/modelopt_recipes/configs/ptq/units/kv_nvfp4.yaml
new file mode 100644
index 00000000000..a95b854a0aa
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/kv_nvfp4.yaml
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizerCfgList snippet that enables NVFP4 KV-cache quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+imports:
+  nvfp4: configs/numerics/nvfp4
+---
+  - quantizer_name: '*[kv]_bmm_quantizer'
+    cfg:
+      $import: nvfp4
diff --git a/modelopt_recipes/configs/ptq/units/kv_nvfp4_affine.yaml b/modelopt_recipes/configs/ptq/units/kv_nvfp4_affine.yaml
new file mode 100644
index 00000000000..2122e8b3431
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/kv_nvfp4_affine.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizerCfgList snippet that enables affine NVFP4 KV-cache quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+imports:
+  kv_nvfp4: configs/ptq/units/kv_nvfp4
+  nvfp4: configs/numerics/nvfp4
+---
+  - $import: kv_nvfp4
+  - quantizer_name: '*[kv]_bmm_quantizer'
+    cfg:
+      $import: nvfp4
+      bias:
+        -2:
+        -4:
+        type: static
diff --git a/modelopt_recipes/configs/ptq/units/kv_nvfp4_cast.yaml b/modelopt_recipes/configs/ptq/units/kv_nvfp4_cast.yaml
index 3fc5d597aa8..b5658c2ff11 100644
--- a/modelopt_recipes/configs/ptq/units/kv_nvfp4_cast.yaml
+++ b/modelopt_recipes/configs/ptq/units/kv_nvfp4_cast.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# NVFP4 KV cache quantization with constant amax.
+# QuantizerCfgList snippet that enables NVFP4 KV-cache quantizers with constant amax.
 #
 # The deployment kernel upcasts NVFP4 KV values to FP8 before attention, so the
 # scale must land in the FP8 range.
diff --git a/modelopt_recipes/configs/ptq/units/kv_nvfp4_rotate.yaml b/modelopt_recipes/configs/ptq/units/kv_nvfp4_rotate.yaml
new file mode 100644
index 00000000000..b117edbf1be
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/kv_nvfp4_rotate.yaml
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizerCfgList snippet that enables rotated NVFP4 KV-cache quantizers.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+imports:
+  nvfp4: configs/numerics/nvfp4
+---
+  - quantizer_name: '*q_bmm_quantizer'
+    cfg:
+      rotate: true
+    enable: false
+  - quantizer_name: '*k_bmm_quantizer'
+    cfg:
+      $import: nvfp4
+      rotate: true
+  - quantizer_name: '*v_bmm_quantizer'
+    cfg:
+      $import: nvfp4
diff --git a/modelopt_recipes/configs/ptq/units/mamba_moe_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/units/mamba_moe_disabled_quantizers.yaml
new file mode 100644
index 00000000000..c9b87f8d212
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/mamba_moe_disabled_quantizers.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# QuantizerCfgList snippet with Mamba/MoE-specific exclusion patterns.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+  - quantizer_name: '*fc1_latent_proj*'
+    enable: false
+  - quantizer_name: '*fc2_latent_proj*'
+    enable: false
+  - quantizer_name: '*q_proj*'
+    enable: false
+  - quantizer_name: '*k_proj*'
+    enable: false
+  - quantizer_name: '*v_proj*'
+    enable: false
+  - quantizer_name: '*o_proj*'
+    enable: false
+  - quantizer_name: '*self_attention.linear_qkv*'
+    enable: false
+  - quantizer_name: '*self_attention.linear_proj*'
+    enable: false
diff --git a/modelopt_recipes/configs/ptq/units/w4a4_nvfp4_nvfp4.yaml b/modelopt_recipes/configs/ptq/units/w4a4_nvfp4_nvfp4.yaml
index 033cdf76697..010d81ab621 100644
--- a/modelopt_recipes/configs/ptq/units/w4a4_nvfp4_nvfp4.yaml
+++ b/modelopt_recipes/configs/ptq/units/w4a4_nvfp4_nvfp4.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# W4A4 NVFP4: NVFP4 E2M1 dynamic weight and activation quantizers.
+# QuantizerCfgList snippet that enables dynamic NVFP4 on weight and input quantizers.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
 imports:
diff --git a/modelopt_recipes/configs/ptq/units/w8a8_fp8_fp8.yaml b/modelopt_recipes/configs/ptq/units/w8a8_fp8_fp8.yaml
index 07db59ff3b0..068f38d1497 100644
--- a/modelopt_recipes/configs/ptq/units/w8a8_fp8_fp8.yaml
+++ b/modelopt_recipes/configs/ptq/units/w8a8_fp8_fp8.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# W8A8 FP8: FP8 E4M3 weight and activation quantizers.
+# QuantizerCfgList snippet that enables per-tensor FP8 E4M3 on weight and input quantizers.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
 imports:
diff --git a/modelopt_recipes/general/ptq/fp8_default-kv_fp8.yaml b/modelopt_recipes/general/ptq/fp8_default-kv_fp8.yaml
index 4c6ba99e11f..ea2ac567290 100644
--- a/modelopt_recipes/general/ptq/fp8_default-kv_fp8.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-kv_fp8.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for W8A8 FP8 E4M3 model quantization with FP8 KV-cache quantization.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -21,7 +23,8 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: FP8 per-tensor weight and activation (W8A8), FP8 KV cache, max calibration.
+  description: >-
+    Composes W8A8 FP8 E4M3 model quantization with FP8 KV-cache quantization; uses max calibration.
 quantize:
   algorithm: max
   quant_cfg:
diff --git a/modelopt_recipes/general/ptq/fp8_default-kv_fp8_cast.yaml b/modelopt_recipes/general/ptq/fp8_default-kv_fp8_cast.yaml
index f99a716ced5..4e24bf53274 100644
--- a/modelopt_recipes/general/ptq/fp8_default-kv_fp8_cast.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-kv_fp8_cast.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for W8A8 FP8 E4M3 model quantization with FP8 KV-cache cast mode.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -22,8 +24,8 @@ imports:
 metadata:
   recipe_type: ptq
   description: >-
-    FP8 per-tensor weight and activation (W8A8), FP8 KV cache with constant amax
-    (skips KV calibration; amax hardcoded to FP8 E4M3 max 448.0), max calibration.
+    Composes W8A8 FP8 E4M3 model quantization with FP8 KV-cache cast mode using constant amax; uses
+    max calibration.
 quantize:
   algorithm: max
   quant_cfg:
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-kv_fp8.yaml b/modelopt_recipes/general/ptq/nvfp4_default-kv_fp8.yaml
index 63b6d673b94..6a65efef57a 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-kv_fp8.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-kv_fp8.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for dynamic NVFP4 W4A4 model quantization with FP8 KV-cache quantization.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -21,7 +23,9 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: NVFP4 W4A4, FP8 KV cache, max calibration.
+  description: >-
+    Composes dynamic NVFP4 W4A4 model quantization with FP8 KV-cache quantization; uses max
+    calibration.
 quantize:
   algorithm: max
   quant_cfg:
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-kv_fp8_cast.yaml b/modelopt_recipes/general/ptq/nvfp4_default-kv_fp8_cast.yaml
index 1504f33d3cc..312cdd16c8d 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-kv_fp8_cast.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-kv_fp8_cast.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for dynamic NVFP4 W4A4 model quantization with FP8 KV-cache cast mode.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -22,8 +24,8 @@ imports:
 metadata:
   recipe_type: ptq
   description: >-
-    NVFP4 W4A4, FP8 KV cache with constant amax (skips KV calibration; amax
-    hardcoded to FP8 E4M3 max 448.0), max calibration.
+    Composes dynamic NVFP4 W4A4 model quantization with FP8 KV-cache cast mode using constant amax;
+    uses max calibration.
 quantize:
   algorithm: max
   quant_cfg:
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-kv_none-gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-kv_none-gptq.yaml
index 6aabb04a150..6dee51857c8 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-kv_none-gptq.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-kv_none-gptq.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for NVFP4 W4A4 model quantization with KV quantizers disabled and GPTQ calibration.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -21,7 +23,9 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: NVFP4 weight and activation (W4A4), gptq layerwise calibration.
+  description: >-
+    Applies NVFP4 W4A4 with static weight scales, dynamic inputs, KV quantizers disabled, and GPTQ
+    layerwise calibration.
 quantize:
   algorithm:
     method: gptq
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-kv_nvfp4_cast.yaml b/modelopt_recipes/general/ptq/nvfp4_default-kv_nvfp4_cast.yaml
index d9991e0b9c3..0acdf6050db 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-kv_nvfp4_cast.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-kv_nvfp4_cast.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for dynamic NVFP4 W4A4 model quantization with NVFP4 KV-cache cast mode.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -22,10 +24,8 @@ imports:
 metadata:
   recipe_type: ptq
   description: >-
-    NVFP4 W4A4, NVFP4 KV cache with constant amax (skips KV calibration; amax
-    hardcoded to FP8 E4M3 max 448.0 — the deployment kernel upcasts NVFP4 KV
-    values to FP8 before attention, so the scale must land in the FP8 range),
-    max calibration.
+    Composes dynamic NVFP4 W4A4 model quantization with NVFP4 KV-cache cast mode using constant
+    amax; uses max calibration.
 quantize:
   algorithm: max
   quant_cfg:
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-kv_fp8.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-kv_fp8.yaml
index 547cf312863..08864c8a50d 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only-kv_fp8.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-kv_fp8.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for expert-only dynamic NVFP4 quantization with FP8 KV-cache quantization.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -21,7 +23,9 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: NVFP4 static weight and dynamic activation for expert layers only (W4A4), FP8 KV cache, max calibration.
+  description: >-
+    Applies dynamic NVFP4 only to expert-layer weight and input quantizers, plus FP8 KV-cache
+    quantization; uses max calibration.
 quantize:
   algorithm:
     method: max
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-kv_fp8_cast.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-kv_fp8_cast.yaml
index 5db1666402d..5bf9a36dc31 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-kv_fp8_cast.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-kv_fp8_cast.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for expert-only NVFP4 quantization with MSE weight calibration and FP8 KV-cache cast mode.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -22,7 +24,9 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: NVFP4 static weight (MSE FP8-scale sweep) and dynamic activation for expert layers only (W4A4), FP8 KV cache with constant amax.
+  description: >-
+    Applies static NVFP4 weight scales from MSE FP8-scale sweep and dynamic NVFP4 inputs to expert
+    layers only, plus FP8 KV-cache cast mode.
 quantize:
   algorithm:
     method: mse
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-kv_fp8.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-kv_fp8.yaml
index 60cba464e0c..a4cf71a1dbd 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-kv_fp8.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-kv_fp8.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for MLP/MoE-only dynamic NVFP4 quantization with FP8 KV-cache quantization.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -21,7 +23,9 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: NVFP4 static weight and dynamic activation for all linear layers (W4A4), FP8 KV cache, max calibration.
+  description: >-
+    Applies dynamic NVFP4 only to MLP/MoE weight and input quantizers, plus FP8 KV-cache
+    quantization; uses max calibration.
 quantize:
   algorithm: max
   quant_cfg:
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-kv_fp8_cast.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-kv_fp8_cast.yaml
index 875fb47c9b3..2ea2c0ab13e 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-kv_fp8_cast.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-kv_fp8_cast.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for MLP/MoE-only NVFP4 quantization with MSE weight calibration and FP8 KV-cache cast mode.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -22,7 +24,9 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: NVFP4 static weight (MSE FP8-scale sweep) and dynamic activation for MLP/MoE linear layers (W4A4), FP8 KV cache with constant amax.
+  description: >-
+    Applies static NVFP4 weight scales from MSE FP8-scale sweep and dynamic NVFP4 inputs to MLP/MoE
+    layers, plus FP8 KV-cache cast mode.
 quantize:
   algorithm:
     method: mse
diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-kv_fp8.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-kv_fp8.yaml
index 13c7cac0797..5348e8c7123 100644
--- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-kv_fp8.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-kv_fp8.yaml
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Composed PTQ recipe for output-projection and MLP/MoE dynamic NVFP4 quantization with FP8 KV-cache quantization.
+
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
   default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
@@ -21,7 +23,9 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: NVFP4 static weight and dynamic activation for all linear layers including output projections, FP8 KV cache, max calibration.
+  description: >-
+    Applies dynamic NVFP4 to output-projection and MLP/MoE weight and input quantizers, plus
+    FP8 KV-cache quantization; uses max calibration.
 quantize:
   algorithm: max
   quant_cfg:
diff --git a/modelopt_recipes/general/speculative_decoding/dflash.yaml b/modelopt_recipes/general/speculative_decoding/dflash.yaml
index a38b24d05d6..021cccd475d 100644
--- a/modelopt_recipes/general/speculative_decoding/dflash.yaml
+++ b/modelopt_recipes/general/speculative_decoding/dflash.yaml
@@ -1,5 +1,4 @@
-# Base config for DFlash training. A full modelopt recipe; override fields via
-# OmegaConf dotlist on the CLI (e.g. `model.model_name_or_path=...`).
+# DFlash speculative-decoding training recipe. Override fields via OmegaConf dotlist on the CLI.
 
 metadata:
   recipe_type: speculative_dflash
diff --git a/modelopt_recipes/general/speculative_decoding/eagle3.yaml b/modelopt_recipes/general/speculative_decoding/eagle3.yaml
index 78767ad1ebb..34448182ae2 100644
--- a/modelopt_recipes/general/speculative_decoding/eagle3.yaml
+++ b/modelopt_recipes/general/speculative_decoding/eagle3.yaml
@@ -1,5 +1,4 @@
-# Base config for EAGLE3 training. A full modelopt recipe; override fields via
-# OmegaConf dotlist on the CLI (e.g. `model.model_name_or_path=...`).
+# EAGLE3 speculative-decoding training recipe. Override fields via OmegaConf dotlist on the CLI.
 
 metadata:
   recipe_type: speculative_eagle
diff --git a/modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml b/modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml
index c00aff7d44f..d0adbe00479 100644
--- a/modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml
+++ b/modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml
@@ -13,50 +13,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Model-specific PTQ recipe for Step3.5-Flash NVFP4 MLP/MoE quantization with FP8 KV cache.
+
+imports:
+  fp8: configs/numerics/fp8
+  nvfp4: configs/numerics/nvfp4
+
 metadata:
   recipe_type: ptq
-  description: NVFP4 static weight and dynamic activation for MoE/MLP projections (W4A4), FP8 KV cache, max calibration.
+  description: >-
+    Step3.5-Flash PTQ recipe that enables dynamic NVFP4 on MoE/MLP weight and input quantizers,
+    enables FP8 KV-cache quantizers, and leaves other quantizers disabled.
 quantize:
   algorithm: max
   quant_cfg:
     - quantizer_name: '*'
       enable: false
     - quantizer_name: '*moe*weight_quantizer'
-      enable: true
       cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+        $import: nvfp4
     - quantizer_name: '*moe*input_quantizer'
-      enable: true
       cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+        $import: nvfp4
     - quantizer_name: '*mlp*weight_quantizer'
-      enable: true
       cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+        $import: nvfp4
     - quantizer_name: '*mlp*input_quantizer'
-      enable: true
       cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+        $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
-      enable: true
       cfg:
-        num_bits: e4m3
+        $import: fp8
     - quantizer_name: '*share_expert*'
       enable: false
     - quantizer_name: '*moe.gate.*'
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index ce241150a3b..4c4e2d07ded 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -16,6 +16,7 @@
 """Unit tests for modelopt.recipe.loader and modelopt.recipe.loader.load_config."""
 
 import re
+from importlib.resources import files
 
 import pytest
 
@@ -85,6 +86,13 @@ def _write_quantizer_cfg_list(path, body: str):
     path.write_text(QUANTIZER_CFG_LIST_SCHEMA + body)
 
 
+def _cfg_to_dict(cfg):
+    """Dump a QuantizerAttributeConfig (or list of them) to plain dicts for comparison."""
+    if isinstance(cfg, list):
+        return [item.model_dump(exclude_unset=True) for item in cfg]
+    return cfg.model_dump(exclude_unset=True)
+
+
 # ---------------------------------------------------------------------------
 # Directory-format YAML fixtures
 # ---------------------------------------------------------------------------
@@ -1336,20 +1344,20 @@ def test_import_cross_file_same_name_no_conflict(tmp_path):
 # ---------------------------------------------------------------------------
 
 
-_BUILTIN_CONFIG_SNIPPETS = [
-    "configs/numerics/fp8",
-    "configs/numerics/nvfp4",
-    "configs/numerics/nvfp4_static",
-    "configs/ptq/units/base_disable_all",
-    "configs/ptq/units/default_disabled_quantizers",
-    "configs/ptq/units/kv_fp8",
-    "configs/ptq/units/kv_fp8_cast",
-    "configs/ptq/units/kv_nvfp4_cast",
-    "configs/ptq/units/w4a4_nvfp4_nvfp4",
-    "configs/ptq/units/w8a8_fp8_fp8",
-    "configs/ptq/presets/kv/fp8",
-    "configs/ptq/presets/model/fp8",
-]
+def _iter_builtin_config_snippets(root):
+    """Yield built-in config YAML files that declare a modelopt schema."""
+    for child in sorted(root.iterdir(), key=lambda path: path.name):
+        if child.is_dir():
+            yield from _iter_builtin_config_snippets(child)
+        elif child.name.endswith((".yaml", ".yml")) and "modelopt-schema:" in child.read_text(
+            encoding="utf-8"
+        ):
+            yield child
+
+
+_BUILTIN_CONFIG_SNIPPETS = list(
+    _iter_builtin_config_snippets(files("modelopt_recipes").joinpath("configs"))
+)
 
 
 @pytest.mark.parametrize("config_path", _BUILTIN_CONFIG_SNIPPETS)
@@ -1428,6 +1436,66 @@ def test_modelopt_schema_comment_validates_after_import_resolution(tmp_path):
     }
 
 
+def test_import_dict_snippet_imports_in_union_typed_list_field(tmp_path):
+    """A bare import can append into QuantizerCfgEntry.cfg's list branch."""
+    (tmp_path / "int4.yaml").write_text(
+        "# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig\n"
+        "num_bits: 4\n"
+        "block_sizes:\n"
+        "  -1: 128\n"
+        "  type: static\n"
+    )
+    (tmp_path / "fp8.yaml").write_text(
+        "# modelopt-schema: modelopt.torch.quantization.config.QuantizerAttributeConfig\n"
+        "num_bits: e4m3\n"
+    )
+    config_file = tmp_path / "config.yaml"
+    config_file.write_text(
+        f"# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig\n"
+        f"imports:\n"
+        f"  int4: {tmp_path / 'int4.yaml'}\n"
+        f"  fp8: {tmp_path / 'fp8.yaml'}\n"
+        f"algorithm: awq_lite\n"
+        f"quant_cfg:\n"
+        f"  - quantizer_name: '*weight_quantizer'\n"
+        f"    cfg:\n"
+        f"      - $import: int4\n"
+        f"      - $import: fp8\n"
+    )
+
+    data = load_config(config_file)
+
+    assert _cfg_to_dict(data["quant_cfg"][0]["cfg"]) == [
+        {"num_bits": 4, "block_sizes": {-1: 128, "type": "static"}},
+        {"num_bits": (4, 3)},
+    ]
+
+
+def test_import_dict_snippet_in_union_typed_list_field_with_inline_item(tmp_path):
+    """A dict snippet can be imported as one item inside QuantizerCfgEntry.cfg list."""
+    _write_quantizer_attribute(
+        tmp_path / "int4.yaml",
+        "num_bits: 4\nblock_sizes:\n  -1: 128\n  type: static\n",
+    )
+    config_file = tmp_path / "config.yaml"
+    config_file.write_text(
+        f"# modelopt-schema: modelopt.torch.quantization.config.QuantizeConfig\n"
+        f"imports:\n"
+        f"  int4: {tmp_path / 'int4.yaml'}\n"
+        f"algorithm: awq_lite\n"
+        f"quant_cfg:\n"
+        f"  - quantizer_name: '*weight_quantizer'\n"
+        f"    cfg:\n"
+        f"      - $import: int4\n"
+        f"      - num_bits: e4m3\n"
+    )
+    data = load_config(config_file)
+    assert _cfg_to_dict(data["quant_cfg"][0]["cfg"]) == [
+        {"num_bits": 4, "block_sizes": {-1: 128, "type": "static"}},
+        {"num_bits": (4, 3)},
+    ]
+
+
 # ---------------------------------------------------------------------------
 # Coverage: _load_raw_config edge cases
 # ---------------------------------------------------------------------------