diff --git a/src/python/py/models/README.md b/src/python/py/models/README.md
index 60d5ecf3e4..3af38c3894 100644
--- a/src/python/py/models/README.md
+++ b/src/python/py/models/README.md
@@ -20,6 +20,7 @@ This folder contains the model builder for quickly creating optimized and quanti
     - [Exclude Language Modeling Head](#exclude-language-modeling-head)
     - [Include Last Hidden States Output](#include-last-hidden-states-output)
     - [Enable Shared Embeddings](#enable-shared-embeddings)
+    - [Disable QKV Projections Fusion](#disable-qkv-projections-fusion)
     - [Enable CUDA Graph](#enable-cuda-graph)
     - [Use 8 Bits Quantization in QMoE](#use-8-bits-quantization-in-qmoe)
     - [Use QDQ Pattern for Quantization](#use-qdq-pattern-for-quantization)
@@ -253,6 +254,18 @@ python3 -m onnxruntime_genai.models.builder -m model_name -o path_to_output_fold
 python3 builder.py -m model_name -o path_to_output_folder -p fp16 -e cuda --extra_options shared_embeddings=true
 ```
 
+#### Disable QKV Projections Fusion
+
+This scenario is for when you want to keep Q/K/V projections in the attention layer separate instead of fusing them into a single packed MatMul operation. 
+
+```
+# From wheel:
+python3 -m onnxruntime_genai.models.builder -i path_to_local_folder_on_disk -o path_to_output_folder -p precision -e execution_provider -c cache_dir_to_store_temp_files --extra_options disable_qkv_fusion=true
+
+# From source:
+python3 builder.py -i path_to_local_folder_on_disk -o path_to_output_folder -p precision -e execution_provider -c cache_dir_to_store_temp_files --extra_options disable_qkv_fusion=true
+```
+
 #### Enable CUDA Graph
 
 This scenario is for when you want to enable CUDA graph for your ONNX model.
diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py
index 02635e4060..9cf463af91 100644
--- a/src/python/py/models/builder.py
+++ b/src/python/py/models/builder.py
@@ -61,6 +61,7 @@ def check_extra_options(kv_pairs, execution_provider):
         "use_cuda_bf16",
         "shared_embeddings",
         "hf_remote",
+        "disable_qkv_fusion",
     ]
     for key in bools:
         if key in kv_pairs:
diff --git a/src/python/py/models/builders/base.py b/src/python/py/models/builders/base.py
index 0bb545e501..205cdec652 100644
--- a/src/python/py/models/builders/base.py
+++ b/src/python/py/models/builders/base.py
@@ -490,11 +490,14 @@ def make_attention_init(self):
 
             # Some EPs don't support packed Q/K/V for GQA yet
             # Packed MatMul with LoRA/QLoRA is not currently supported
+            # use_packed_matmul can be overrided by upstream quantization choice
+            # (e.g., when q_proj, k_proj, v_proj have different quantization settings)
             self.attention_attrs["use_packed_matmul"] = (
                 self.ep not in ["dml"]
                 and not self.matmul_attrs["use_lora"]
                 and not self.attention_attrs["q_norm"]
                 and not self.attention_attrs["k_norm"]
+                and not self.extra_options.get("disable_qkv_fusion", False)
             )
 
             # Some EPs don't support fusing rotary embeddings inside GQA yet