diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py
index de855162b..35525ac36 100644
--- a/unsloth_zoo/vllm_utils.py
+++ b/unsloth_zoo/vllm_utils.py
@@ -397,12 +397,13 @@ def patch_vllm():
     # Temporary patch to disable multiprocessing for vLLM
     # Allows accessing model_executor
     os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
+    os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
     patch_bitsandbytes_quant_state()
     patch_vllm_bitsandbytes()
     patch_vllm_lora_tokenizer()
     patch_vllm_lora_load_tensors()
     global LORA_REQUEST_ID
-    LORA_REQUEST_ID = 0
+    LORA_REQUEST_ID = 1
 pass
 
 
@@ -519,6 +520,7 @@ def get_state_dict(prefix, kk, state_dict, proj):
     quant_state_dict["model.embed_tokens.weight"] = state_dict["model.embed_tokens.weight"]
 
     # All layers
+    skipped_layernorms = []
     for kk in range(len(vllm_internals.model.layers)):
         proj = vllm_internals.model.layers[kk].self_attn.qkv_proj
         get_state_dict(f"model.layers.{kk}.self_attn.q_proj", 0, state_dict, proj)
@@ -547,9 +549,11 @@ def get_state_dict(prefix, kk, state_dict, proj):
             vllm_name = f"vllm_internals.{vllm_name}"
             try:
                 layernorm = eval(vllm_name).state_dict()["weight"]
-                state_dict[layernorm_name + ".weight"] = layernorm
-            except:
-                print(f"vllm_internals.{layernorm_name}")
+                layernorm_name = layernorm_name + ".weight"
+                state_dict[layernorm_name] = layernorm
+                quant_state_dict[layernorm_name] = state_dict[layernorm_name]
+            except Exception as e:
+                skipped_layernorms.append(layernorm_name.split(".")[-1])
         pass
     pass
 
@@ -569,6 +573,9 @@ def get_state_dict(prefix, kk, state_dict, proj):
         quant_state_dict["lm_head.weight"] = state_dict["lm_head.weight"]
     pass
 
+    if len(skipped_layernorms) != 0:
+        print(f"Unsloth: Just some info: will skip parsing {list(set(skipped_layernorms))}")
+
     if not return_state_dict: state_dict = None
     return state_dict, quant_state_dict
 pass
@@ -683,9 +690,14 @@ def _override_to(self, *args, **kwargs):
         except: return self
     pass
 
+    skipped_layernorms = []
     for kk in range(config.num_hidden_layers):
         for layer_name in layer_names:
             layer_name = layer_name.format(kk = kk)
+            if f"{layer_name}.weight" not in quant_state_dict:
+                skipped_layernorms.append(layer_name.split(".")[-1])
+                continue
+            pass
             weight = quant_state_dict[f"{layer_name}.weight"]
 
             if f"{layer_name}.bias" in quant_state_dict:
@@ -723,15 +735,8 @@ def _override_to(self, *args, **kwargs):
                 # Layernorms
                 weight = torch.nn.Parameter(weight, requires_grad = False)
                 layer_name = re.sub(r"\.([\d]{1,})\.", r"[\1].", layer_name)
-                try:
-                    # We first must access if the layernorm / item exists
-                    exec(f"new_model.{layer_name}")
-
-                    # If it succeeds, then try will enter the below:
-                    exec(f"new_model.{layer_name}.weight = None")
-                    exec(f"new_model.{layer_name}.weight = weight")
-                except:
-                    pass
+                exec(f"new_model.{layer_name}.weight = None")
+                exec(f"new_model.{layer_name}.weight = weight")
                 continue
             pass
             
@@ -799,6 +804,9 @@ def _override_to(self, *args, **kwargs):
     for _ in range(3):
         gc.collect()
         torch.cuda.empty_cache()
+
+    if len(skipped_layernorms) != 0:
+        print(f"Unsloth: Just some info: will skip parsing {list(set(skipped_layernorms))}")
     return new_model
 pass
 
@@ -1100,7 +1108,8 @@ def load_vllm(
         disable_log_stats      = disable_log_stats,
         enable_prefix_caching  = enable_prefix_caching,
         # enable_chunked_prefill = True, # LoRA fails with chunked prefill as at Feb 2025
-        max_seq_len_to_capture = min(8192, max_seq_length + 256), # Default is 8192 for CUDAGraphs
+        # max_seq_len_to_capture fails for V1
+        # max_seq_len_to_capture = min(8192, max_seq_length + 256), # Default is 8192 for CUDAGraphs
         compilation_config     = compilation_config, # 0, 1, 2, 3
         enforce_eager          = enforce_eager,
         swap_space             = swap_space, # Low memory devices like Colab (13GB) default 4GB
@@ -1381,10 +1390,10 @@ def load_lora(model, save_directory, load_tensors = False):
 
     # All Unsloth Zoo code licensed under LGPLv3
     global LORA_REQUEST_ID
-    if LORA_REQUEST_ID is None: LORA_REQUEST_ID = 0
+    if LORA_REQUEST_ID is None: LORA_REQUEST_ID = 1
 
     # Check if path exists
-    if not os.path.exists(save_directory) or LORA_REQUEST_ID == 0:
+    if not os.path.exists(save_directory) or LORA_REQUEST_ID == 1:
         if load_tensors:
             # We need to save and load the config file once!
             model.peft_config["default"].save_pretrained(save_directory)