diff --git a/data/models/generated/v0.5.8/qwen35.yaml b/data/models/generated/v0.5.8/qwen35.yaml
index 53afb4de..136dbe9e 100644
--- a/data/models/generated/v0.5.8/qwen35.yaml
+++ b/data/models/generated/v0.5.8/qwen35.yaml
@@ -132,3 +132,285 @@ families:
             - '4'
           prefill: null
           decode: null
+  - name: Qwen3.5-397B-A17B-FP8
+    model_path: Qwen/Qwen3.5-397B-A17B-FP8
+    attributes:
+      llm:
+        thinking_capability: hybrid
+        tool_parser: qwen3_coder
+        reasoning_parser: qwen3
+        chat_template: null
+    hardware:
+      H100:
+        configurations:
+        - name: default
+          attributes:
+            nodes: single
+            optimization: balanced
+            quantization: fp8
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 8
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args: []
+          prefill: null
+          decode: null
+        - name: speculative-mtp
+          attributes:
+            nodes: single
+            optimization: low-latency
+            quantization: fp8
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 8
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args:
+            - --speculative-algo
+            - NEXTN
+            - --speculative-num-steps
+            - '3'
+            - --speculative-eagle-topk
+            - '1'
+            - --speculative-num-draft-tokens
+            - '4'
+          prefill: null
+          decode: null
+      H200:
+        configurations:
+        - name: default
+          attributes:
+            nodes: single
+            optimization: balanced
+            quantization: fp8
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 4
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args: []
+          prefill: null
+          decode: null
+        - name: speculative-mtp
+          attributes:
+            nodes: single
+            optimization: low-latency
+            quantization: fp8
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 4
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args:
+            - --speculative-algo
+            - NEXTN
+            - --speculative-num-steps
+            - '3'
+            - --speculative-eagle-topk
+            - '1'
+            - --speculative-num-draft-tokens
+            - '4'
+          prefill: null
+          decode: null
+      B200:
+        configurations:
+        - name: default
+          attributes:
+            nodes: single
+            optimization: balanced
+            quantization: fp8
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 4
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args: []
+          prefill: null
+          decode: null
+        - name: speculative-mtp
+          attributes:
+            nodes: single
+            optimization: low-latency
+            quantization: fp8
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 4
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args:
+            - --speculative-algo
+            - NEXTN
+            - --speculative-num-steps
+            - '3'
+            - --speculative-eagle-topk
+            - '1'
+            - --speculative-num-draft-tokens
+            - '4'
+          prefill: null
+          decode: null
+      B300:
+        configurations:
+        - name: default
+          attributes:
+            nodes: single
+            optimization: balanced
+            quantization: fp8
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 2
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args: []
+          prefill: null
+          decode: null
+        - name: speculative-mtp
+          attributes:
+            nodes: single
+            optimization: low-latency
+            quantization: fp8
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 2
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args:
+            - --speculative-algo
+            - NEXTN
+            - --speculative-num-steps
+            - '3'
+            - --speculative-eagle-topk
+            - '1'
+            - --speculative-num-draft-tokens
+            - '4'
+          prefill: null
+          decode: null
+  - name: Qwen3.5-397B-A17B-NVFP4
+    model_path: nvidia/Qwen3.5-397B-A17B-NVFP4
+    attributes:
+      llm:
+        thinking_capability: hybrid
+        tool_parser: qwen3_coder
+        reasoning_parser: qwen3
+        chat_template: null
+    hardware:
+      B200:
+        configurations:
+        - name: default
+          attributes:
+            nodes: single
+            optimization: balanced
+            quantization: fp4
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 4
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args:
+            - --attention-backend
+            - trtllm_mha
+            - --moe-runner-backend
+            - flashinfer_trtllm
+            - --fp4-gemm-backend
+            - flashinfer_cutlass
+          prefill: null
+          decode: null
+        - name: speculative-mtp
+          attributes:
+            nodes: single
+            optimization: low-latency
+            quantization: fp4
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 4
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args:
+            - --attention-backend
+            - trtllm_mha
+            - --moe-runner-backend
+            - flashinfer_trtllm
+            - --fp4-gemm-backend
+            - flashinfer_cutlass
+            - --speculative-algo
+            - NEXTN
+            - --speculative-num-steps
+            - '3'
+            - --speculative-eagle-topk
+            - '1'
+            - --speculative-num-draft-tokens
+            - '4'
+          prefill: null
+          decode: null
+      B300:
+        configurations:
+        - name: default
+          attributes:
+            nodes: single
+            optimization: balanced
+            quantization: fp4
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 2
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args:
+            - --attention-backend
+            - trtllm_mha
+            - --moe-runner-backend
+            - flashinfer_trtllm
+            - --fp4-gemm-backend
+            - flashinfer_cutlass
+          prefill: null
+          decode: null
+        - name: speculative-mtp
+          attributes:
+            nodes: single
+            optimization: low-latency
+            quantization: fp4
+          quantized_model_path: null
+          engine:
+            env_vars: {}
+            tp: 2
+            dp: null
+            ep: null
+            enable_dp_attention: null
+            extra_args:
+            - --attention-backend
+            - trtllm_mha
+            - --moe-runner-backend
+            - flashinfer_trtllm
+            - --fp4-gemm-backend
+            - flashinfer_cutlass
+            - --speculative-algo
+            - NEXTN
+            - --speculative-num-steps
+            - '3'
+            - --speculative-eagle-topk
+            - '1'
+            - --speculative-num-draft-tokens
+            - '4'
+          prefill: null
+          decode: null
diff --git a/data/models/src/v0.5.8/qwen35.yaml b/data/models/src/v0.5.8/qwen35.yaml
index 0b63f00a..9e1893d3 100644
--- a/data/models/src/v0.5.8/qwen35.yaml
+++ b/data/models/src/v0.5.8/qwen35.yaml
@@ -1,10 +1,18 @@
 # Qwen3.5 Model Configurations (Simplified Format)
 # This file is compiled to data/models/generated/qwen35.yaml
 #
-# GPU requirements:
+# GPU requirements (BF16):
 #   H100: tp=16 (model ~800GB in BF16, each rank needs ~100GB > 80GB)
 #   H200: tp=8
 #   B200: tp=8
+#
+# GPU requirements (FP8):
+#   H100: tp=8 (model ~400GB in FP8, each rank needs ~50GB < 80GB)
+#   H200: tp=8
+#   B200: tp=8
+#
+# GPU requirements (FP4):
+#   B200 (183GB) and B300 (275GB) - FP4 requires Blackwell architecture
 
 vendor: qwen
 
@@ -41,3 +49,34 @@ families:
     models:
       - name: Qwen3.5-397B-A17B
         quantization: bf16
+
+      - name: Qwen3.5-397B-A17B-FP8
+        quantization: fp8
+        hardware:
+          H100: { tp: 8 }
+          H200: { tp: 4 }
+          B200: { tp: 4 }
+          B300: { tp: 2 }
+
+      - name: Qwen3.5-397B-A17B-NVFP4
+        quantization: fp4
+        model_path: nvidia/Qwen3.5-397B-A17B-NVFP4
+        hardware:
+          B200:
+            tp: 4
+            extra_args:
+              - --attention-backend
+              - trtllm_mha
+              - --moe-runner-backend
+              - flashinfer_trtllm
+              - --fp4-gemm-backend
+              - flashinfer_cutlass
+          B300:
+            tp: 2
+            extra_args:
+              - --attention-backend
+              - trtllm_mha
+              - --moe-runner-backend
+              - flashinfer_trtllm
+              - --fp4-gemm-backend
+              - flashinfer_cutlass
diff --git a/docs/autoregressive/Qwen/Qwen3.5.md b/docs/autoregressive/Qwen/Qwen3.5.md
index 34c937bd..22beaa56 100644
--- a/docs/autoregressive/Qwen/Qwen3.5.md
+++ b/docs/autoregressive/Qwen/Qwen3.5.md
@@ -18,6 +18,8 @@ Qwen3.5 features a Gated Delta Networks combined with sparse Mixture-of-Experts
 **Available Models:**
 
 - **BF16 (Full precision)**: [Qwen/Qwen3.5-397B-A17B](https://huggingface.co/Qwen/Qwen3.5-397B-A17B)
+- **FP8 (8-bit Quantized)**: [Qwen/Qwen3.5-397B-A17B-FP8](https://huggingface.co/Qwen/Qwen3.5-397B-A17B-FP8)
+- **FP4 (4-bit Quantized)**: [nvidia/Qwen3.5-397B-A17B-NVFP4](https://huggingface.co/nvidia/Qwen3.5-397B-A17B-NVFP4)
 
 **License:** Apache 2.0
 
@@ -49,22 +51,34 @@ import Qwen35ConfigGenerator from '@site/src/components/autoregressive/Qwen35Con
 
 ### 3.2 Configuration Tips
 
-- The model has ~397B parameters in BF16, requiring ~800GB of GPU memory for weights alone.
-- **H100 (80GB)** requires tp=16 (2 nodes) since each rank needs ~100GB at tp=8.
-- **H200 (141GB)** and **B200 (192GB)** can run with tp=8 on a single node.
 - Speculative decoding (MTP) can significantly reduce latency for interactive use cases.
 - The `--mem-fraction-static` flag is recommended for optimal memory utilization, adjust it based on your hardware and workload.
 - Context length defaults to 262,144 tokens. If you encounter OOM errors, consider reducing it, but maintain at least 128K to preserve thinking capabilities.
 - To speed up weight loading for this large model, add `--model-loader-extra-config='{"enable_multithread_load": "true","num_threads": 64}'` to the launch command.
 - **CUDA IPC Transport**: Add `SGLANG_USE_CUDA_IPC_TRANSPORT=1` as an environment variable to use CUDA IPC for transferring multimodal features, significantly improving TTFT (Time To First Token). Note: this consumes additional memory proportional to image size, so you may need to lower `--mem-fraction-static` or `--max-running-requests`.
-- **Multimodal Attention Backend**: Use `--mm-attention-backend fa3` on H100/H200 for better vision performance, or `--mm-attention-backend fa4` on B200.
+- **Multimodal Attention Backend**: Use `--mm-attention-backend fa3` on H100/H200 for better vision performance, or `--mm-attention-backend fa4` on B200/B300.
 - For processing large images or videos, you may need to lower `--mem-fraction-static` to leave room for image feature tensors.
-
-| Hardware | TP |
-| -------- | -- |
-| H100     | 16 |
-| H200     | 8  |
-| B200     | 8  |
+- Hardware requirements:
+    - **BF16**: ~397B parameters require ~800GB of GPU memory for weights.
+        - **H100 (80GB)** requires tp=16 (2 nodes) since each rank needs ~100GB at tp=8.
+        - **H200 (141GB)** runs with tp=8.
+        - **B200 (183GB)** runs with tp=8.
+        - **B300 (275GB)** runs with tp=4.
+    - **FP8**: The FP8 quantized model requires ~400GB for weights, cutting memory in half.
+        - **H100 (80GB)** runs with tp=8.
+        - **H200 (141GB)** runs with tp=4.
+        - **B200 (183GB)** runs with tp=4.
+        - **B300 (275GB)** runs with tp=2.
+    - **FP4**: The FP4 quantized model requires ~250GB for weights, cutting memory by almost 4x. Only compatible with B200/B300 (Blackwell architecture).
+        - **B200 (183GB)** runs with tp=4.
+        - **B300 (275GB)** runs with tp=2.
+
+| Hardware | Memory | BF16 TP | FP8 TP | FP4 TP |
+| -------- | ------ | ------- | ------ | --------------- |
+| H100     | 80GB   | 16      | 8      | N/A             |
+| H200     | 141GB  | 8       | 4      | N/A             |
+| B200     | 183GB  | 8       | 4      | 4               |
+| B300     | 275GB  | 4       | 2      | 2               |
 
 ## 4. Model Invocation
 
diff --git a/docs/intro.md b/docs/intro.md
index 134f5c6d..0c8ba70f 100644
--- a/docs/intro.md
+++ b/docs/intro.md
@@ -28,7 +28,7 @@ Each recipe provides step-by-step instructions to help you quickly implement SGL
 
 #### Qwen
 
-- [ ] [Qwen3.5](./autoregressive/Qwen/Qwen3.5.md) <span style={{backgroundColor: '#10b981', color: 'white', padding: '2px 8px', borderRadius: '4px', fontSize: '12px', fontWeight: 'bold', marginLeft: '8px'}}>NEW</span>
+- [x] [Qwen3.5](./autoregressive/Qwen/Qwen3.5.md) <span style={{backgroundColor: '#10b981', color: 'white', padding: '2px 8px', borderRadius: '4px', fontSize: '12px', fontWeight: 'bold', marginLeft: '8px'}}>NEW</span>
 - [x] [Qwen3](./autoregressive/Qwen/Qwen3.md)
 - [x] [Qwen3-Next](./autoregressive/Qwen/Qwen3-Next.md)
 - [x] [Qwen3-VL](./autoregressive/Qwen/Qwen3-VL.md)
diff --git a/src/components/autoregressive/Qwen35ConfigGenerator/index.js b/src/components/autoregressive/Qwen35ConfigGenerator/index.js
index 871088c4..713d015b 100644
--- a/src/components/autoregressive/Qwen35ConfigGenerator/index.js
+++ b/src/components/autoregressive/Qwen35ConfigGenerator/index.js
@@ -4,12 +4,23 @@ import ConfigGenerator from '../../base/ConfigGenerator';
 /**
  * Qwen3.5-397B-A17B Configuration Generator
  * Supports Qwen3.5 397B (17B active) MoE VLM deployment configuration
- * with reasoning parser, tool calling, and speculative decoding
+ * with reasoning parser, tool calling, speculative decoding, and quantization options
  *
- * GPU requirements:
+ * GPU requirements (BF16):
  *   H100: tp=16 (model ~800GB in BF16, each rank needs ~100GB > 80GB)
  *   H200: tp=8
  *   B200: tp=8
+ *   B300: tp=4
+ *
+ * GPU requirements (FP8):
+ *   H100: tp=8 (model ~400GB in FP8, each rank needs ~50GB < 80GB)
+ *   H200: tp=4
+ *   B200: tp=4
+ *   B300: tp=2
+ *
+ * GPU requirements (FP4):
+ *   B200: tp=4 (FP4 requires Blackwell)
+ *   B300: tp=2 (FP4 requires Blackwell)
  */
 const Qwen35ConfigGenerator = () => {
   const config = {
@@ -19,10 +30,23 @@ const Qwen35ConfigGenerator = () => {
       hardware: {
         name: 'hardware',
         title: 'Hardware Platform',
+        getDynamicItems: (values) => {
+          const isNvfp4 = values.quantization === 'fp4';
+          return [
+            { id: 'h100', label: 'H100', default: !isNvfp4, disabled: isNvfp4 },
+            { id: 'h200', label: 'H200', default: false, disabled: isNvfp4 },
+            { id: 'b200', label: 'B200', default: false, disabled: false },
+            { id: 'b300', label: 'B300', default: isNvfp4, disabled: false }
+          ];
+        }
+      },
+      quantization: {
+        name: 'quantization',
+        title: 'Quantization',
         items: [
-          { id: 'h200', label: 'H200', default: true },
-          { id: 'b200', label: 'B200', default: false },
-          { id: 'h100', label: 'H100', default: false }
+          { id: 'bf16', label: 'BF16', default: false },
+          { id: 'fp8', label: 'FP8', default: true },
+          { id: 'fp4', label: 'FP4', default: false }
         ]
       },
       reasoning: {
@@ -55,17 +79,29 @@ const Qwen35ConfigGenerator = () => {
     },
 
     modelConfigs: {
-      h100: { bf16: { tp: 16, mem: 0.8 } },
-      h200: { bf16: { tp: 8, mem: 0.8 } },
-      b200: { bf16: { tp: 8, mem: 0.82 } }
+      h100: { bf16: { tp: 16, mem: 0.8 }, fp8: { tp: 8, mem: 0.8 } },
+      h200: { bf16: { tp: 8,  mem: 0.8 }, fp8: { tp: 4, mem: 0.8 } },
+      b200: { bf16: { tp: 8,  mem: 0.8 }, fp8: { tp: 4, mem: 0.8 }, fp4: { tp: 4, mem: 0.8 } },
+      b300: { bf16: { tp: 4,  mem: 0.8 }, fp8: { tp: 2, mem: 0.8 }, fp4: { tp: 2, mem: 0.8 } }
     },
 
     generateCommand: function (values) {
-      const { hardware, speculative } = values;
+      const { hardware, quantization, speculative } = values;
+
+      // Validate hardware supports the quantization
+      const hwConfig = this.modelConfigs[hardware]?.[quantization];
+      if (!hwConfig) {
+        return '# Please select compatible hardware for the chosen quantization\n# FP4 requires B200/B300 (Blackwell)';
+      }
 
-      const modelName = `${this.modelFamily}/Qwen3.5-397B-A17B`;
+      let modelName;
+      if (quantization === 'fp4') {
+        modelName = 'nvidia/Qwen3.5-397B-A17B-NVFP4';
+      } else {
+        const quantSuffix = quantization === 'fp8' ? '-FP8' : '';
+        modelName = `${this.modelFamily}/Qwen3.5-397B-A17B${quantSuffix}`;
+      }
 
-      const hwConfig = this.modelConfigs[hardware].bf16;
       const tpValue = hwConfig.tp;
       const memFraction = hwConfig.mem;
 
@@ -74,8 +110,9 @@ const Qwen35ConfigGenerator = () => {
       cmd += `  --model ${modelName}`;
       cmd += ` \\\n  --tp ${tpValue}`;
 
-      // Apply commandRule from all options
+      // Apply commandRule from all options except quantization (handled via model name)
       Object.entries(this.options).forEach(([key, option]) => {
+        if (key === 'quantization') return;
         if (option.commandRule) {
           const rule = option.commandRule(values[key]);
           if (rule) {
@@ -84,18 +121,27 @@ const Qwen35ConfigGenerator = () => {
         }
       });
 
-      // Append B200-specific backend configurations
-      if (hardware === 'b200') {
+      // Enable allreduce fusion for all Qwen3.5 configs.
+      cmd += ` \\\n  --enable-flashinfer-allreduce-fusion`;
+
+      // Append backend configurations
+      if (hardware === 'b200' || hardware === 'b300') {
         cmd += ` \\\n  --attention-backend trtllm_mha`;
-        cmd += ` \\\n  --moe-runner-backend flashinfer_trtllm`;
-        cmd += ` \\\n  --disable-radix-cache`;
-        cmd += ` \\\n  --enable-flashinfer-allreduce-fusion`;
+      }
+
+      // Append B200/B300-specific backend configurations
+      if (hardware === 'b200' || hardware === 'b300') {
         if (speculative === 'disabled') {
           cmd += ` \\\n  --tokenizer-worker-num 6`;
         }
       }
 
-      // Add memory fraction
+      // FP4-specific backend settings
+      if (quantization === 'fp4') {
+        cmd += ' \\\n  --moe-runner-backend flashinfer_trtllm \\\n  --fp4-gemm-backend flashinfer_cutlass \\\n  --kv-cache-dtype fp8_e4m3';
+      }
+
+      // Add memory fraction last
       cmd += ` \\\n  --mem-fraction-static ${memFraction}`;
 
       return cmd;