diff --git a/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
index 5a0469e4..8b259223 100644
--- a/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
+++ b/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
@@ -1270,7 +1270,7 @@
     "from trl import GRPOConfig, GRPOTrainer\n",
     "training_args = GRPOConfig(\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Falcon_H1_(0.5B)-Alpaca.ipynb b/nb/Falcon_H1_(0.5B)-Alpaca.ipynb
index c74d1b66..86a999b3 100644
--- a/nb/Falcon_H1_(0.5B)-Alpaca.ipynb
+++ b/nb/Falcon_H1_(0.5B)-Alpaca.ipynb
@@ -419,7 +419,7 @@
     "        learning_rate = 2e-4,\n",
     "        logging_steps = 1,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.01,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        output_dir = \"outputs\",\n",
diff --git a/nb/Gemma2_(2B)-Alpaca.ipynb b/nb/Gemma2_(2B)-Alpaca.ipynb
index 5a60d92a..a139add1 100644
--- a/nb/Gemma2_(2B)-Alpaca.ipynb
+++ b/nb/Gemma2_(2B)-Alpaca.ipynb
@@ -563,7 +563,7 @@
     "        learning_rate = 2e-4,\n",
     "        logging_steps = 1,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.01,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        output_dir = \"outputs\",\n",
diff --git a/nb/Gemma3_(1B)-GRPO.ipynb b/nb/Gemma3_(1B)-GRPO.ipynb
index 802fc0b9..84cbea0d 100644
--- a/nb/Gemma3_(1B)-GRPO.ipynb
+++ b/nb/Gemma3_(1B)-GRPO.ipynb
@@ -1076,7 +1076,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
diff --git a/nb/Gemma3_(4B)-Vision-GRPO.ipynb b/nb/Gemma3_(4B)-Vision-GRPO.ipynb
index 53802c27..4604e72e 100644
--- a/nb/Gemma3_(4B)-Vision-GRPO.ipynb
+++ b/nb/Gemma3_(4B)-Vision-GRPO.ipynb
@@ -1149,7 +1149,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
index efa2cb8d..65c87883 100644
--- a/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb	
+++ b/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb	
@@ -832,7 +832,7 @@
     "from trl import GRPOConfig, GRPOTrainer\n",
     "training_args = GRPOConfig(\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
diff --git a/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
index 76235ec7..139cf1bb 100644
--- a/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb	
+++ b/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb	
@@ -1270,7 +1270,7 @@
     "from trl import GRPOConfig, GRPOTrainer\n",
     "training_args = GRPOConfig(\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb b/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb
index 5e45472d..412b9fbc 100644
--- a/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb	
@@ -1078,7 +1078,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
diff --git a/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb b/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb
index 21587611..708390f4 100644
--- a/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb	
@@ -1151,7 +1151,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb b/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb
index 29549985..7d34be62 100644
--- a/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb	
@@ -769,7 +769,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Llama_FP8_GRPO.ipynb b/nb/HuggingFace Course-Llama_FP8_GRPO.ipynb
index d38a7060..9b9f30d5 100644
--- a/nb/HuggingFace Course-Llama_FP8_GRPO.ipynb	
+++ b/nb/HuggingFace Course-Llama_FP8_GRPO.ipynb	
@@ -1307,7 +1307,7 @@
     "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
     "        logging_steps = 5,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.01,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        report_to = \"none\", # Use this for WandB etc\n",
@@ -2520,7 +2520,7 @@
     "    vllm_sampling_params = vllm_sampling_params,\n",
     "    temperature = 1.0,\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.01,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb b/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb
index e8d53937..adaff0e3 100644
--- a/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb	
@@ -613,7 +613,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb b/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb
index 9693df93..93b9becb 100644
--- a/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb	
@@ -391,7 +391,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb b/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb
index eae7b9b5..003742e8 100644
--- a/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb	
@@ -908,7 +908,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb b/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb
index 85cfbed9..b7f50bcc 100644
--- a/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb	
+++ b/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb	
@@ -1402,7 +1402,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb b/nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb
index 4492acd0..24fb30f9 100644
--- a/nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb	
+++ b/nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb	
@@ -1412,7 +1412,7 @@
     "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
     "        logging_steps = 5,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.01,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        report_to = \"none\", # Use this for WandB etc\n",
@@ -2617,7 +2617,7 @@
     "    vllm_sampling_params = vllm_sampling_params,\n",
     "    temperature = 1.0,\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.01,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb
index 752d4f96..cf795aff 100644
--- a/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb	
@@ -1351,7 +1351,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/HuggingFace Course-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb b/nb/HuggingFace Course-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
index f9cdb301..4bc81645 100644
--- a/nb/HuggingFace Course-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb	
+++ b/nb/HuggingFace Course-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb	
@@ -838,7 +838,7 @@
         "training_args = GRPOConfig(\n",
         "    temperature = 1.0,\n",
         "    learning_rate = 5e-5,\n",
-        "    weight_decay = 0.01,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
index e98c9a30..ed56a653 100644
--- a/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
+++ b/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
@@ -821,7 +821,7 @@
     "from trl import GRPOConfig, GRPOTrainer\n",
     "training_args = GRPOConfig(\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
diff --git a/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
index a9d7067a..87a88444 100644
--- a/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
+++ b/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
@@ -1261,7 +1261,7 @@
     "from trl import GRPOConfig, GRPOTrainer\n",
     "training_args = GRPOConfig(\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb b/nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb
index 272f2ab0..5e4c995b 100644
--- a/nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb
+++ b/nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb
@@ -419,7 +419,7 @@
     "        learning_rate = 2e-4,\n",
     "        logging_steps = 1,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.01,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        output_dir = \"outputs\",\n",
diff --git a/nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb b/nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb
index ee681e07..31281cc6 100644
--- a/nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb
+++ b/nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb
@@ -563,7 +563,7 @@
     "        learning_rate = 2e-4,\n",
     "        logging_steps = 1,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.01,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        output_dir = \"outputs\",\n",
diff --git a/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb b/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb
index bef4b60d..92e723bb 100644
--- a/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb
+++ b/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb
@@ -1069,7 +1069,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
diff --git a/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb b/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb
index 45f73345..134d66e9 100644
--- a/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb
+++ b/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb
@@ -1142,7 +1142,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb
index ea796dd7..6ebffb9f 100644
--- a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb
+++ b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb
@@ -760,7 +760,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/Kaggle-Llama_FP8_GRPO.ipynb b/nb/Kaggle-Llama_FP8_GRPO.ipynb
index 6d3b324f..657e09c4 100644
--- a/nb/Kaggle-Llama_FP8_GRPO.ipynb
+++ b/nb/Kaggle-Llama_FP8_GRPO.ipynb
@@ -1298,7 +1298,7 @@
     "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
     "        logging_steps = 5,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.01,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        report_to = \"none\", # Use this for WandB etc\n",
@@ -2513,7 +2513,7 @@
     "    vllm_sampling_params = vllm_sampling_params,\n",
     "    temperature = 1.0,\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.01,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb b/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb
index 13225662..db9f1a9b 100644
--- a/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb
+++ b/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb
@@ -604,7 +604,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb
index 333f2efe..dd82a2fd 100644
--- a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb
+++ b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb
@@ -382,7 +382,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb
index 95a7b129..39357ea8 100644
--- a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb
+++ b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb
@@ -899,7 +899,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb b/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb
index b1a8d904..66f05958 100644
--- a/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb
+++ b/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb
@@ -1393,7 +1393,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb b/nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb
index 213d1dd3..d256cff0 100644
--- a/nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb
+++ b/nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb
@@ -1403,7 +1403,7 @@
     "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
     "        logging_steps = 5,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.01,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        report_to = \"none\", # Use this for WandB etc\n",
@@ -2610,7 +2610,7 @@
     "    vllm_sampling_params = vllm_sampling_params,\n",
     "    temperature = 1.0,\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.01,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb
index eb11ea5e..2d2cf932 100644
--- a/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb
+++ b/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb
@@ -1342,7 +1342,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb b/nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb
index fadc8c64..0b0da122 100644
--- a/nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb
+++ b/nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb
@@ -614,7 +614,7 @@
     "        learning_rate = 2e-5,\n",
     "        logging_steps = 1,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.1,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        output_dir = \"outputs\",\n",
diff --git a/nb/Kaggle-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb b/nb/Kaggle-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
index f9cdb301..4bc81645 100644
--- a/nb/Kaggle-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
+++ b/nb/Kaggle-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
@@ -838,7 +838,7 @@
         "training_args = GRPOConfig(\n",
         "    temperature = 1.0,\n",
         "    learning_rate = 5e-5,\n",
-        "    weight_decay = 0.01,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Llama3.1_(8B)-GRPO.ipynb b/nb/Llama3.1_(8B)-GRPO.ipynb
index 31d2ca32..2488c3d7 100644
--- a/nb/Llama3.1_(8B)-GRPO.ipynb
+++ b/nb/Llama3.1_(8B)-GRPO.ipynb
@@ -767,7 +767,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/Llama_FP8_GRPO.ipynb b/nb/Llama_FP8_GRPO.ipynb
index 512bb9c0..e0b9988a 100644
--- a/nb/Llama_FP8_GRPO.ipynb
+++ b/nb/Llama_FP8_GRPO.ipynb
@@ -1305,7 +1305,7 @@
         "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
         "        logging_steps = 5,\n",
         "        optim = \"adamw_8bit\",\n",
-        "        weight_decay = 0.01,\n",
+        "        weight_decay = 0.001,\n",
         "        lr_scheduler_type = \"linear\",\n",
         "        seed = 3407,\n",
         "        report_to = \"none\", # Use this for WandB etc\n",
@@ -2520,7 +2520,7 @@
         "    vllm_sampling_params = vllm_sampling_params,\n",
         "    temperature = 1.0,\n",
         "    learning_rate = 5e-6,\n",
-        "    weight_decay = 0.01,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Mistral_v0.3_(7B)-GRPO.ipynb b/nb/Mistral_v0.3_(7B)-GRPO.ipynb
index bb3b9fdd..c85cead8 100644
--- a/nb/Mistral_v0.3_(7B)-GRPO.ipynb
+++ b/nb/Mistral_v0.3_(7B)-GRPO.ipynb
@@ -611,7 +611,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/Phi_4_(14B)-GRPO.ipynb b/nb/Phi_4_(14B)-GRPO.ipynb
index 9a44a9b3..3e73adb4 100644
--- a/nb/Phi_4_(14B)-GRPO.ipynb
+++ b/nb/Phi_4_(14B)-GRPO.ipynb
@@ -389,7 +389,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/nb/Qwen2.5_(3B)-GRPO.ipynb b/nb/Qwen2.5_(3B)-GRPO.ipynb
index 442f1c58..1cf9cb60 100644
--- a/nb/Qwen2.5_(3B)-GRPO.ipynb
+++ b/nb/Qwen2.5_(3B)-GRPO.ipynb
@@ -906,7 +906,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Qwen2_5_7B_VL_GRPO.ipynb b/nb/Qwen2_5_7B_VL_GRPO.ipynb
index 166ddb77..d35c4c14 100644
--- a/nb/Qwen2_5_7B_VL_GRPO.ipynb
+++ b/nb/Qwen2_5_7B_VL_GRPO.ipynb
@@ -1401,7 +1401,7 @@
         "    learning_rate = 5e-6,\n",
         "    adam_beta1 = 0.9,\n",
         "    adam_beta2 = 0.99,\n",
-        "    weight_decay = 0.1,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"cosine\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/nb/Qwen3_8B_FP8_GRPO.ipynb b/nb/Qwen3_8B_FP8_GRPO.ipynb
index e93294b5..8d14d7a3 100644
--- a/nb/Qwen3_8B_FP8_GRPO.ipynb
+++ b/nb/Qwen3_8B_FP8_GRPO.ipynb
@@ -1411,7 +1411,7 @@
         "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
         "        logging_steps = 5,\n",
         "        optim = \"adamw_8bit\",\n",
-        "        weight_decay = 0.01,\n",
+        "        weight_decay = 0.001,\n",
         "        lr_scheduler_type = \"linear\",\n",
         "        seed = 3407,\n",
         "        report_to = \"none\", # Use this for WandB etc\n",
@@ -2618,7 +2618,7 @@
         "    vllm_sampling_params = vllm_sampling_params,\n",
         "    temperature = 1.0,\n",
         "    learning_rate = 5e-6,\n",
-        "    weight_decay = 0.01,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/nb/TinyLlama_(1.1B)-Alpaca.ipynb b/nb/TinyLlama_(1.1B)-Alpaca.ipynb
index bb2d1df1..da0707e2 100644
--- a/nb/TinyLlama_(1.1B)-Alpaca.ipynb
+++ b/nb/TinyLlama_(1.1B)-Alpaca.ipynb
@@ -614,7 +614,7 @@
     "        learning_rate = 2e-5,\n",
     "        logging_steps = 1,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.1,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        output_dir = \"outputs\",\n",
diff --git a/nb/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb b/nb/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
index f9cdb301..4bc81645 100644
--- a/nb/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
+++ b/nb/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
@@ -838,7 +838,7 @@
         "training_args = GRPOConfig(\n",
         "    temperature = 1.0,\n",
         "    learning_rate = 5e-5,\n",
-        "    weight_decay = 0.01,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
index b2a7b2d7..99ddd45a 100644
--- a/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
+++ b/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
@@ -805,7 +805,7 @@
     "from trl import GRPOConfig, GRPOTrainer\n",
     "training_args = GRPOConfig(\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
diff --git a/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
index b1b0709c..ca952e47 100644
--- a/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
+++ b/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
@@ -1287,7 +1287,7 @@
     "from trl import GRPOConfig, GRPOTrainer\n",
     "training_args = GRPOConfig(\n",
     "    learning_rate = 5e-6,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/original_template/Falcon_H1_(0.5B)-Alpaca.ipynb b/original_template/Falcon_H1_(0.5B)-Alpaca.ipynb
index 7db57aa1..3e2c7147 100644
--- a/original_template/Falcon_H1_(0.5B)-Alpaca.ipynb
+++ b/original_template/Falcon_H1_(0.5B)-Alpaca.ipynb
@@ -408,7 +408,7 @@
     "        learning_rate=2e-4,\n",
     "        logging_steps=1,\n",
     "        optim=\"adamw_8bit\",\n",
-    "        weight_decay=0.01,\n",
+    "        weight_decay=0.001,\n",
     "        lr_scheduler_type=\"linear\",\n",
     "        seed=3407,\n",
     "        output_dir=\"outputs\",\n",
diff --git a/original_template/Gemma2_(2B)-Alpaca.ipynb b/original_template/Gemma2_(2B)-Alpaca.ipynb
index e5322132..67521ccb 100644
--- a/original_template/Gemma2_(2B)-Alpaca.ipynb
+++ b/original_template/Gemma2_(2B)-Alpaca.ipynb
@@ -536,7 +536,7 @@
     "        learning_rate=2e-4,\n",
     "        logging_steps=1,\n",
     "        optim=\"adamw_8bit\",\n",
-    "        weight_decay=0.01,\n",
+    "        weight_decay=0.001,\n",
     "        lr_scheduler_type=\"linear\",\n",
     "        seed=3407,\n",
     "        output_dir=\"outputs\",\n",
diff --git a/original_template/Gemma3_(1B)-GRPO.ipynb b/original_template/Gemma3_(1B)-GRPO.ipynb
index df3b895d..8eac68d5 100644
--- a/original_template/Gemma3_(1B)-GRPO.ipynb
+++ b/original_template/Gemma3_(1B)-GRPO.ipynb
@@ -1051,7 +1051,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
diff --git a/original_template/Gemma3_(4B)-Vision-GRPO.ipynb b/original_template/Gemma3_(4B)-Vision-GRPO.ipynb
index bbde825f..e7bf6a7a 100644
--- a/original_template/Gemma3_(4B)-Vision-GRPO.ipynb
+++ b/original_template/Gemma3_(4B)-Vision-GRPO.ipynb
@@ -1125,7 +1125,7 @@
         "    learning_rate = 5e-6,\n",
         "    adam_beta1 = 0.9,\n",
         "    adam_beta2 = 0.99,\n",
-        "    weight_decay = 0.1,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"cosine\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/original_template/Llama3.1_(8B)-GRPO.ipynb b/original_template/Llama3.1_(8B)-GRPO.ipynb
index dacb80e5..6eab7ca0 100644
--- a/original_template/Llama3.1_(8B)-GRPO.ipynb
+++ b/original_template/Llama3.1_(8B)-GRPO.ipynb
@@ -742,7 +742,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/original_template/Llama_FP8_GRPO.ipynb b/original_template/Llama_FP8_GRPO.ipynb
index f4aa1f3e..db07ab79 100644
--- a/original_template/Llama_FP8_GRPO.ipynb
+++ b/original_template/Llama_FP8_GRPO.ipynb
@@ -1280,7 +1280,7 @@
         "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
         "        logging_steps = 5,\n",
         "        optim = \"adamw_8bit\",\n",
-        "        weight_decay = 0.01,\n",
+        "        weight_decay = 0.001,\n",
         "        lr_scheduler_type = \"linear\",\n",
         "        seed = 3407,\n",
         "        report_to = \"none\", # Use this for WandB etc\n",
@@ -2495,7 +2495,7 @@
         "    vllm_sampling_params = vllm_sampling_params,\n",
         "    temperature = 1.0,\n",
         "    learning_rate = 5e-6,\n",
-        "    weight_decay = 0.01,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/original_template/Mistral_v0.3_(7B)-GRPO.ipynb b/original_template/Mistral_v0.3_(7B)-GRPO.ipynb
index c6c773b5..d9a6c4c9 100644
--- a/original_template/Mistral_v0.3_(7B)-GRPO.ipynb
+++ b/original_template/Mistral_v0.3_(7B)-GRPO.ipynb
@@ -586,7 +586,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/original_template/Phi_4_(14B)-GRPO.ipynb b/original_template/Phi_4_(14B)-GRPO.ipynb
index e7f20c7e..16b4523e 100644
--- a/original_template/Phi_4_(14B)-GRPO.ipynb
+++ b/original_template/Phi_4_(14B)-GRPO.ipynb
@@ -364,7 +364,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
diff --git a/original_template/Qwen2.5_(3B)-GRPO.ipynb b/original_template/Qwen2.5_(3B)-GRPO.ipynb
index 185f4cd8..5120b42d 100644
--- a/original_template/Qwen2.5_(3B)-GRPO.ipynb
+++ b/original_template/Qwen2.5_(3B)-GRPO.ipynb
@@ -881,7 +881,7 @@
     "    learning_rate = 5e-6,\n",
     "    adam_beta1 = 0.9,\n",
     "    adam_beta2 = 0.99,\n",
-    "    weight_decay = 0.1,\n",
+    "    weight_decay = 0.001,\n",
     "    warmup_ratio = 0.1,\n",
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
diff --git a/original_template/Qwen2_5_7B_VL_GRPO.ipynb b/original_template/Qwen2_5_7B_VL_GRPO.ipynb
index d42ac874..68516dec 100644
--- a/original_template/Qwen2_5_7B_VL_GRPO.ipynb
+++ b/original_template/Qwen2_5_7B_VL_GRPO.ipynb
@@ -1376,7 +1376,7 @@
         "    learning_rate = 5e-6,\n",
         "    adam_beta1 = 0.9,\n",
         "    adam_beta2 = 0.99,\n",
-        "    weight_decay = 0.1,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"cosine\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/original_template/Qwen3_8B_FP8_GRPO.ipynb b/original_template/Qwen3_8B_FP8_GRPO.ipynb
index 2be098fe..197bcfce 100644
--- a/original_template/Qwen3_8B_FP8_GRPO.ipynb
+++ b/original_template/Qwen3_8B_FP8_GRPO.ipynb
@@ -1386,7 +1386,7 @@
         "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
         "        logging_steps = 5,\n",
         "        optim = \"adamw_8bit\",\n",
-        "        weight_decay = 0.01,\n",
+        "        weight_decay = 0.001,\n",
         "        lr_scheduler_type = \"linear\",\n",
         "        seed = 3407,\n",
         "        report_to = \"none\", # Use this for WandB etc\n",
@@ -2593,7 +2593,7 @@
         "    vllm_sampling_params = vllm_sampling_params,\n",
         "    temperature = 1.0,\n",
         "    learning_rate = 5e-6,\n",
-        "    weight_decay = 0.01,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb b/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb
index 292e2bbd..93a27ed3 100644
--- a/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb
+++ b/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb
@@ -1325,7 +1325,7 @@
         "    learning_rate = 5e-6,\n",
         "    adam_beta1 = 0.9,\n",
         "    adam_beta2 = 0.99,\n",
-        "    weight_decay = 0.1,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"cosine\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/original_template/TinyLlama_(1.1B)-Alpaca.ipynb b/original_template/TinyLlama_(1.1B)-Alpaca.ipynb
index 0a2f2953..4502115c 100644
--- a/original_template/TinyLlama_(1.1B)-Alpaca.ipynb
+++ b/original_template/TinyLlama_(1.1B)-Alpaca.ipynb
@@ -587,7 +587,7 @@
     "        learning_rate = 2e-5,\n",
     "        logging_steps = 1,\n",
     "        optim = \"adamw_8bit\",\n",
-    "        weight_decay = 0.1,\n",
+    "        weight_decay = 0.001,\n",
     "        lr_scheduler_type = \"linear\",\n",
     "        seed = 3407,\n",
     "        output_dir = \"outputs\",\n",
diff --git a/original_template/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb b/original_template/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
index 9b519ca0..9ae3510f 100644
--- a/original_template/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
+++ b/original_template/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.ipynb
@@ -838,7 +838,7 @@
         "training_args = GRPOConfig(\n",
         "    temperature = 1.0,\n",
         "    learning_rate = 5e-5,\n",
-        "    weight_decay = 0.01,\n",
+        "    weight_decay = 0.001,\n",
         "    warmup_ratio = 0.1,\n",
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
diff --git a/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py
index 6ca0a036..d8935ea3 100644
--- a/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py
+++ b/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py
@@ -352,7 +352,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 from trl import GRPOConfig, GRPOTrainer
 training_args = GRPOConfig(
     learning_rate = 5e-6,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/Falcon_H1_(0.5B)-Alpaca.py b/python_scripts/Falcon_H1_(0.5B)-Alpaca.py
index 637b35e5..af2a6f40 100644
--- a/python_scripts/Falcon_H1_(0.5B)-Alpaca.py
+++ b/python_scripts/Falcon_H1_(0.5B)-Alpaca.py
@@ -176,7 +176,7 @@ def formatting_prompts_func(examples):
         learning_rate = 2e-4,
         logging_steps = 1,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         output_dir = "outputs",
diff --git a/python_scripts/Gemma2_(2B)-Alpaca.py b/python_scripts/Gemma2_(2B)-Alpaca.py
index 7bc95cce..d577200d 100644
--- a/python_scripts/Gemma2_(2B)-Alpaca.py
+++ b/python_scripts/Gemma2_(2B)-Alpaca.py
@@ -166,7 +166,7 @@ def formatting_prompts_func(examples):
         learning_rate = 2e-4,
         logging_steps = 1,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         output_dir = "outputs",
diff --git a/python_scripts/Gemma3_(1B)-GRPO.py b/python_scripts/Gemma3_(1B)-GRPO.py
index 6763ba50..06881b13 100644
--- a/python_scripts/Gemma3_(1B)-GRPO.py
+++ b/python_scripts/Gemma3_(1B)-GRPO.py
@@ -348,7 +348,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
diff --git a/python_scripts/Gemma3_(4B)-Vision-GRPO.py b/python_scripts/Gemma3_(4B)-Vision-GRPO.py
index a7df8d58..5074ff7a 100644
--- a/python_scripts/Gemma3_(4B)-Vision-GRPO.py
+++ b/python_scripts/Gemma3_(4B)-Vision-GRPO.py
@@ -312,7 +312,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py
index 05efe9a8..a1554858 100644
--- a/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py	
+++ b/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py	
@@ -352,7 +352,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 from trl import GRPOConfig, GRPOTrainer
 training_args = GRPOConfig(
     learning_rate = 5e-6,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
diff --git a/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py
index 6ca0a036..d8935ea3 100644
--- a/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py	
+++ b/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py	
@@ -352,7 +352,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 from trl import GRPOConfig, GRPOTrainer
 training_args = GRPOConfig(
     learning_rate = 5e-6,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py b/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py
index 5af24a72..5484158b 100644
--- a/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py	
@@ -350,7 +350,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
diff --git a/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py b/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py
index 6fcfda06..460f97d4 100644
--- a/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py	
@@ -314,7 +314,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py b/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py
index bf0b8b4f..0a541979 100644
--- a/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py	
@@ -212,7 +212,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Llama_FP8_GRPO.py b/python_scripts/HuggingFace Course-Llama_FP8_GRPO.py
index 62bb1221..2514a3f6 100644
--- a/python_scripts/HuggingFace Course-Llama_FP8_GRPO.py	
+++ b/python_scripts/HuggingFace Course-Llama_FP8_GRPO.py	
@@ -308,7 +308,7 @@ def format_dataset(x):
         learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
         logging_steps = 5,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         report_to = "none", # Use this for WandB etc
@@ -669,7 +669,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     vllm_sampling_params = vllm_sampling_params,
     temperature = 1.0,
     learning_rate = 5e-6,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py b/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py
index 5f3528de..2a828b8c 100644
--- a/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py	
@@ -212,7 +212,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py b/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py
index 882102f7..93cd2a42 100644
--- a/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py	
@@ -208,7 +208,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py b/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py
index d0ba14c0..90ea0e0c 100644
--- a/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py	
@@ -211,7 +211,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py b/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py
index d14d9fa2..7af6dddb 100644
--- a/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py	
+++ b/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py	
@@ -329,7 +329,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Qwen3_8B_FP8_GRPO.py b/python_scripts/HuggingFace Course-Qwen3_8B_FP8_GRPO.py
index de115cea..092041ba 100644
--- a/python_scripts/HuggingFace Course-Qwen3_8B_FP8_GRPO.py	
+++ b/python_scripts/HuggingFace Course-Qwen3_8B_FP8_GRPO.py	
@@ -311,7 +311,7 @@ def format_dataset(x):
         learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
         logging_steps = 5,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         report_to = "none", # Use this for WandB etc
@@ -670,7 +670,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     vllm_sampling_params = vllm_sampling_params,
     temperature = 1.0,
     learning_rate = 5e-6,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py
index 4a1d1b49..5744d2de 100644
--- a/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py	
@@ -334,7 +334,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/HuggingFace Course-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py b/python_scripts/HuggingFace Course-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py
index d067d698..a17a6221 100644
--- a/python_scripts/HuggingFace Course-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py	
+++ b/python_scripts/HuggingFace Course-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py	
@@ -741,7 +741,7 @@ def generate_game_states(num_samples = 1000, rows = 6, cols = 6, num_mines = 5,
 training_args = GRPOConfig(
     temperature = 1.0,
     learning_rate = 5e-5,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
diff --git a/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py
index c50ae135..3b865ba5 100644
--- a/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py
+++ b/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py
@@ -327,7 +327,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 from trl import GRPOConfig, GRPOTrainer
 training_args = GRPOConfig(
     learning_rate = 5e-6,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
diff --git a/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py
index 0ae3725c..83aab641 100644
--- a/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py
+++ b/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py
@@ -329,7 +329,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 from trl import GRPOConfig, GRPOTrainer
 training_args = GRPOConfig(
     learning_rate = 5e-6,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py b/python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py
index 4adb07ff..75f20929 100644
--- a/python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py
+++ b/python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py
@@ -176,7 +176,7 @@ def formatting_prompts_func(examples):
         learning_rate = 2e-4,
         logging_steps = 1,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         output_dir = "outputs",
diff --git a/python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py b/python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py
index 90662b60..2adf77ff 100644
--- a/python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py
+++ b/python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py
@@ -166,7 +166,7 @@ def formatting_prompts_func(examples):
         learning_rate = 2e-4,
         logging_steps = 1,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         output_dir = "outputs",
diff --git a/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py b/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py
index a71119c2..c36ffa6b 100644
--- a/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py
+++ b/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py
@@ -325,7 +325,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
diff --git a/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py b/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py
index a42f086f..9c885808 100644
--- a/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py
+++ b/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py
@@ -289,7 +289,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py b/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py
index 1f9d0b8e..4769a292 100644
--- a/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py
+++ b/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py
@@ -187,7 +187,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/Kaggle-Llama_FP8_GRPO.py b/python_scripts/Kaggle-Llama_FP8_GRPO.py
index b31d380f..c852772c 100644
--- a/python_scripts/Kaggle-Llama_FP8_GRPO.py
+++ b/python_scripts/Kaggle-Llama_FP8_GRPO.py
@@ -283,7 +283,7 @@ def format_dataset(x):
         learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
         logging_steps = 5,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         report_to = "none", # Use this for WandB etc
@@ -644,7 +644,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     vllm_sampling_params = vllm_sampling_params,
     temperature = 1.0,
     learning_rate = 5e-6,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
diff --git a/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py b/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py
index 02f5598a..30e8135a 100644
--- a/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py
+++ b/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py
@@ -187,7 +187,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py b/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py
index 9ddc8e5a..512bad08 100644
--- a/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py
+++ b/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py
@@ -183,7 +183,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py b/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py
index 43c090f9..bd5a8a21 100644
--- a/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py
+++ b/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py
@@ -186,7 +186,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py b/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py
index deb69ce0..5cf61cf3 100644
--- a/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py
+++ b/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py
@@ -304,7 +304,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py b/python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py
index cceb48f0..ccffa313 100644
--- a/python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py
+++ b/python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py
@@ -286,7 +286,7 @@ def format_dataset(x):
         learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
         logging_steps = 5,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         report_to = "none", # Use this for WandB etc
@@ -645,7 +645,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     vllm_sampling_params = vllm_sampling_params,
     temperature = 1.0,
     learning_rate = 5e-6,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
diff --git a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py
index ed5fd6e6..4876d1e0 100644
--- a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py
+++ b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py
@@ -309,7 +309,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py b/python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py
index a86d3ac5..e8b1bafa 100644
--- a/python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py
+++ b/python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py
@@ -161,7 +161,7 @@ def formatting_prompts_func(examples):
         learning_rate = 2e-5,
         logging_steps = 1,
         optim = "adamw_8bit",
-        weight_decay = 0.1,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         output_dir = "outputs",
diff --git a/python_scripts/Kaggle-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py b/python_scripts/Kaggle-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py
index d067d698..a17a6221 100644
--- a/python_scripts/Kaggle-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py
+++ b/python_scripts/Kaggle-gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py
@@ -741,7 +741,7 @@ def generate_game_states(num_samples = 1000, rows = 6, cols = 6, num_mines = 5,
 training_args = GRPOConfig(
     temperature = 1.0,
     learning_rate = 5e-5,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
diff --git a/python_scripts/Llama3.1_(8B)-GRPO.py b/python_scripts/Llama3.1_(8B)-GRPO.py
index d83e4a05..4ee74315 100644
--- a/python_scripts/Llama3.1_(8B)-GRPO.py
+++ b/python_scripts/Llama3.1_(8B)-GRPO.py
@@ -210,7 +210,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/Llama_FP8_GRPO.py b/python_scripts/Llama_FP8_GRPO.py
index db6060d3..0511fabf 100644
--- a/python_scripts/Llama_FP8_GRPO.py
+++ b/python_scripts/Llama_FP8_GRPO.py
@@ -306,7 +306,7 @@ def format_dataset(x):
         learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
         logging_steps = 5,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         report_to = "none", # Use this for WandB etc
@@ -667,7 +667,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     vllm_sampling_params = vllm_sampling_params,
     temperature = 1.0,
     learning_rate = 5e-6,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
diff --git a/python_scripts/Mistral_v0.3_(7B)-GRPO.py b/python_scripts/Mistral_v0.3_(7B)-GRPO.py
index b3ca59ac..d0196d07 100644
--- a/python_scripts/Mistral_v0.3_(7B)-GRPO.py
+++ b/python_scripts/Mistral_v0.3_(7B)-GRPO.py
@@ -210,7 +210,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/Phi_4_(14B)-GRPO.py b/python_scripts/Phi_4_(14B)-GRPO.py
index 50301864..e39ca85c 100644
--- a/python_scripts/Phi_4_(14B)-GRPO.py
+++ b/python_scripts/Phi_4_(14B)-GRPO.py
@@ -206,7 +206,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
diff --git a/python_scripts/Qwen2.5_(3B)-GRPO.py b/python_scripts/Qwen2.5_(3B)-GRPO.py
index d3d33926..2c58444d 100644
--- a/python_scripts/Qwen2.5_(3B)-GRPO.py
+++ b/python_scripts/Qwen2.5_(3B)-GRPO.py
@@ -209,7 +209,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/Qwen2_5_7B_VL_GRPO.py b/python_scripts/Qwen2_5_7B_VL_GRPO.py
index 2276ca5d..d9a267c9 100644
--- a/python_scripts/Qwen2_5_7B_VL_GRPO.py
+++ b/python_scripts/Qwen2_5_7B_VL_GRPO.py
@@ -327,7 +327,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     learning_rate = 5e-6,
     adam_beta1 = 0.9,
     adam_beta2 = 0.99,
-    weight_decay = 0.1,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
diff --git a/python_scripts/Qwen3_8B_FP8_GRPO.py b/python_scripts/Qwen3_8B_FP8_GRPO.py
index 1a5b55ea..da44c4ea 100644
--- a/python_scripts/Qwen3_8B_FP8_GRPO.py
+++ b/python_scripts/Qwen3_8B_FP8_GRPO.py
@@ -309,7 +309,7 @@ def format_dataset(x):
         learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
         logging_steps = 5,
         optim = "adamw_8bit",
-        weight_decay = 0.01,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         report_to = "none", # Use this for WandB etc
@@ -668,7 +668,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     vllm_sampling_params = vllm_sampling_params,
     temperature = 1.0,
     learning_rate = 5e-6,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
diff --git a/python_scripts/TinyLlama_(1.1B)-Alpaca.py b/python_scripts/TinyLlama_(1.1B)-Alpaca.py
index dc5ab2fc..e013ea6c 100644
--- a/python_scripts/TinyLlama_(1.1B)-Alpaca.py
+++ b/python_scripts/TinyLlama_(1.1B)-Alpaca.py
@@ -161,7 +161,7 @@ def formatting_prompts_func(examples):
         learning_rate = 2e-5,
         logging_steps = 1,
         optim = "adamw_8bit",
-        weight_decay = 0.1,
+        weight_decay = 0.001,
         lr_scheduler_type = "linear",
         seed = 3407,
         output_dir = "outputs",
diff --git a/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py b/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py
index d067d698..a17a6221 100644
--- a/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py
+++ b/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_GRPO_Minesweeper_Game_BF16.py
@@ -741,7 +741,7 @@ def generate_game_states(num_samples = 1000, rows = 6, cols = 6, num_mines = 5,
 training_args = GRPOConfig(
     temperature = 1.0,
     learning_rate = 5e-5,
-    weight_decay = 0.01,
+    weight_decay = 0.001,
     warmup_ratio = 0.1,
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",