diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.yaml
index a1aa9a62fc..f9551a954e 100644
--- a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.yaml
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.yaml
@@ -2,7 +2,7 @@ defaults: ../../distillation_math.yaml
 distillation:
   num_prompts_per_step: 64
   max_num_steps: 20
-  val_batch_size: 32
+  val_batch_size: 256
   val_period: 10
   max_val_samples: 256
 loss_fn:
@@ -11,43 +11,15 @@ checkpointing:
   checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-dynamicbatch
 policy:
   model_name: Qwen/Qwen3-4B-Base
-  train_global_batch_size: 32
-  generation_batch_size: 32
   dtensor_cfg:
     context_parallel_size: 1
   make_sequence_length_divisible_by: 2
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 20
-  - name: torch.optim.lr_scheduler.ConstantLR
-    kwargs:
-      factor: 1.0
-      total_iters: 10000000000
-  - milestones:
-    - 20
 teacher:
   model_name: Qwen/Qwen3-32B
-  train_global_batch_size: 32
-  generation_batch_size: 32
   dtensor_cfg:
     tensor_parallel_size: 8
     context_parallel_size: 1
   make_sequence_length_divisible_by: 2
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 20
-  - name: torch.optim.lr_scheduler.ConstantLR
-    kwargs:
-      factor: 1.0
-      total_iters: 10000000000
-  - milestones:
-    - 20
 logger:
   log_dir: logs/distillation-qwen3-32b-to-4b-base-dynamicbatch
   wandb:
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml
index 0f7ebfae4d..d2b4ec620f 100644
--- a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml
@@ -2,58 +2,22 @@ defaults: ../../distillation_math.yaml
 distillation:
   num_prompts_per_step: 64
   max_num_steps: 500
-  val_batch_size: 32
+  val_batch_size: 512
   val_period: 50
-  max_val_samples: 256
+loss_fn:
+  kl_type: reverse
 checkpointing:
   checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-long
-  save_period: 50
+  save_period: 10
 policy:
   model_name: Qwen/Qwen3-4B-Base
-  train_global_batch_size: 32
-  generation_batch_size: 32
-  max_total_sequence_length: 32768
-  dynamic_batching:
-    enabled: false
-  make_sequence_length_divisible_by: 2
-  optimizer:
-    kwargs:
-      lr: 1.0e-05
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 100
-  - name: torch.optim.lr_scheduler.CosineAnnealingLR
-    kwargs:
-      T_max: 900
-      eta_min: 1.0e-07
-  - milestones:
-    - 100
+  max_total_sequence_length: 20480
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 2
 teacher:
   model_name: Qwen/Qwen3-32B
-  train_global_batch_size: 32
-  generation_batch_size: 32
-  max_total_sequence_length: 32768
-  dynamic_batching:
-    enabled: false
-  make_sequence_length_divisible_by: 2
-  optimizer:
-    kwargs:
-      lr: 1.0e-05
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 100
-  - name: torch.optim.lr_scheduler.CosineAnnealingLR
-    kwargs:
-      T_max: 900
-      eta_min: 1.0e-07
-  - milestones:
-    - 100
+  max_total_sequence_length: 20480
 logger:
   log_dir: logs/distillation-qwen3-32b-to-4b-base-long
   wandb:
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.yaml
new file mode 100644
index 0000000000..9d7b8746dc
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.yaml
@@ -0,0 +1,37 @@
+defaults: ../../distillation_math.yaml
+distillation:
+  num_prompts_per_step: 64
+  max_num_steps: 20
+  val_batch_size: 256
+  val_period: 10
+  max_val_samples: 256
+loss_fn:
+  kl_type: reverse
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-seqpack
+policy:
+  model_name: Qwen/Qwen3-4B-Base
+  dtensor_cfg:
+    context_parallel_size: 1
+  dynamic_batching:
+    enabled: false
+  sequence_packing:
+    enabled: true
+  make_sequence_length_divisible_by: 2
+teacher:
+  model_name: Qwen/Qwen3-32B
+  dtensor_cfg:
+    tensor_parallel_size: 8
+    context_parallel_size: 1
+  dynamic_batching:
+    enabled: false
+  sequence_packing:
+    enabled: true
+  make_sequence_length_divisible_by: 2
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-4b-base-seqpack
+  wandb:
+    project: nemo-rl
+    name: distillation-qwen3-32b-to-4b-base-seqpack
+cluster:
+  num_nodes: 2
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.yaml
index 5adcbe9cab..8f1d235d69 100644
--- a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.yaml
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.yaml
@@ -2,7 +2,7 @@ defaults: ../../distillation_math.yaml
 distillation:
   num_prompts_per_step: 64
   max_num_steps: 20
-  val_batch_size: 32
+  val_batch_size: 256
   val_period: 10
   max_val_samples: 256
 loss_fn:
@@ -12,29 +12,10 @@ checkpointing:
   save_period: 50
 policy:
   model_name: Qwen/Qwen3-4B-Base
-  train_global_batch_size: 32
-  generation_batch_size: 32
   dtensor_cfg:
     tensor_parallel_size: 8
     context_parallel_size: 1
-  dynamic_batching:
-    enabled: false
   make_sequence_length_divisible_by: 2
-  optimizer:
-    kwargs:
-      lr: 1.0e-05
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 100
-  - name: torch.optim.lr_scheduler.CosineAnnealingLR
-    kwargs:
-      T_max: 900
-      eta_min: 1.0e-07
-  - milestones:
-    - 100
   generation:
     colocated:
       enabled: false
@@ -43,29 +24,10 @@ policy:
         num_nodes: 1
 teacher:
   model_name: Qwen/Qwen3-32B
-  train_global_batch_size: 32
-  generation_batch_size: 32
   dtensor_cfg:
     tensor_parallel_size: 8
     context_parallel_size: 1
-  dynamic_batching:
-    enabled: false
   make_sequence_length_divisible_by: 2
-  optimizer:
-    kwargs:
-      lr: 1.0e-05
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 100
-  - name: torch.optim.lr_scheduler.CosineAnnealingLR
-    kwargs:
-      T_max: 900
-      eta_min: 1.0e-07
-  - milestones:
-    - 100
   generation:
     colocated:
       enabled: false
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-instruct-2n8g-fsdp2tp2-seqpack.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-instruct-2n8g-fsdp2tp2-seqpack.v1.yaml
deleted file mode 100644
index b11b27fd54..0000000000
--- a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-instruct-2n8g-fsdp2tp2-seqpack.v1.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-defaults: ../../distillation_math.yaml
-distillation:
-  num_prompts_per_step: 64
-  max_num_steps: 20
-  val_batch_size: 32
-  val_period: 10
-  max_val_samples: 256
-loss_fn:
-  kl_type: reverse
-checkpointing:
-  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-instruct-seqpack
-policy:
-  model_name: Qwen/Qwen3-4B-Instruct
-  train_global_batch_size: 32
-  generation_batch_size: 32
-  dtensor_cfg:
-    context_parallel_size: 1
-  dynamic_batching:
-    enabled: false
-  sequence_packing:
-    enabled: true
-  make_sequence_length_divisible_by: 2
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 20
-  - name: torch.optim.lr_scheduler.ConstantLR
-    kwargs:
-      factor: 1.0
-      total_iters: 10000000000
-  - milestones:
-    - 20
-teacher:
-  model_name: Qwen/Qwen3-32B
-  train_global_batch_size: 32
-  generation_batch_size: 32
-  dtensor_cfg:
-    tensor_parallel_size: 8
-    context_parallel_size: 1
-  dynamic_batching:
-    enabled: false
-  sequence_packing:
-    enabled: true
-  make_sequence_length_divisible_by: 2
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 20
-  - name: torch.optim.lr_scheduler.ConstantLR
-    kwargs:
-      factor: 1.0
-      total_iters: 10000000000
-  - milestones:
-    - 20
-logger:
-  log_dir: logs/distillation-qwen3-32b-to-4b-instruct-seqpack
-  wandb:
-    project: nemo-rl
-    name: distillation-qwen3-32b-to-4b-instruct-seqpack
-cluster:
-  num_nodes: 2
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-8b-base-2n8g-fsdp2tp2.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-8b-base-2n8g-fsdp2tp2.v1.yaml
deleted file mode 100644
index 6dd08a3f66..0000000000
--- a/examples/configs/recipes/llm/distillation-qwen3-32b-to-8b-base-2n8g-fsdp2tp2.v1.yaml
+++ /dev/null
@@ -1,57 +0,0 @@
-defaults: ../../distillation_math.yaml
-distillation:
-  num_prompts_per_step: 64
-  max_num_steps: 100
-  val_batch_size: 32
-  val_period: 10
-  max_val_samples: 256
-loss_fn:
-  kl_type: reverse
-checkpointing:
-  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-8b-base
-policy:
-  model_name: Qwen/Qwen3-8B-Base
-  train_global_batch_size: 32
-  generation_batch_size: 32
-  max_total_sequence_length: 16384
-  make_sequence_length_divisible_by: 2
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 20
-  - name: torch.optim.lr_scheduler.ConstantLR
-    kwargs:
-      factor: 1.0
-      total_iters: 10000000000
-  - milestones:
-    - 20
-teacher:
-  model_name: Qwen/Qwen3-32B
-  train_global_batch_size: 32
-  generation_batch_size: 32
-  max_total_sequence_length: 16384
-  dtensor_cfg:
-    tensor_parallel_size: 8
-    context_parallel_size: 1
-  make_sequence_length_divisible_by: 2
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 20
-  - name: torch.optim.lr_scheduler.ConstantLR
-    kwargs:
-      factor: 1.0
-      total_iters: 10000000000
-  - milestones:
-    - 20
-logger:
-  log_dir: logs/distillation-qwen3-32b-to-8b-base
-  wandb:
-    project: nemo-rl
-    name: distillation-qwen3-32b-to-8b-base
-cluster:
-  num_nodes: 2
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-8b-base-4n8g-fsdp2tp8-long.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-8b-base-4n8g-fsdp2tp8-long.v1.yaml
deleted file mode 100644
index 1da1e231b6..0000000000
--- a/examples/configs/recipes/llm/distillation-qwen3-32b-to-8b-base-4n8g-fsdp2tp8-long.v1.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-defaults: ../../distillation_math.yaml
-distillation:
-  num_prompts_per_step: 64
-  max_num_steps: 500
-  val_batch_size: 32
-  val_period: 50
-  max_val_samples: 256
-loss_fn:
-  kl_type: reverse
-checkpointing:
-  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-8b-base-long
-  save_period: 50
-policy:
-  model_name: Qwen/Qwen3-8B-Base
-  train_global_batch_size: 32
-  generation_batch_size: 32
-  max_total_sequence_length: 32768
-  dtensor_cfg:
-    tensor_parallel_size: 8
-    context_parallel_size: 1
-  make_sequence_length_divisible_by: 4
-  optimizer:
-    kwargs:
-      lr: 1.0e-05
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 150
-  - name: torch.optim.lr_scheduler.CosineAnnealingLR
-    kwargs:
-      T_max: 1350
-      eta_min: 1.0e-07
-  - milestones:
-    - 150
-teacher:
-  model_name: Qwen/Qwen3-32B
-  train_global_batch_size: 32
-  generation_batch_size: 32
-  max_total_sequence_length: 32768
-  make_sequence_length_divisible_by: 4
-  optimizer:
-    kwargs:
-      lr: 1.0e-05
-  scheduler:
-  - name: torch.optim.lr_scheduler.LinearLR
-    kwargs:
-      start_factor: 0.1
-      end_factor: 1.0
-      total_iters: 150
-  - name: torch.optim.lr_scheduler.CosineAnnealingLR
-    kwargs:
-      T_max: 1350
-      eta_min: 1.0e-07
-  - milestones:
-    - 150
-logger:
-  log_dir: logs/distillation-qwen3-32b-to-8b-base-long
-  wandb:
-    project: nemo-rl
-    name: distillation-qwen3-32b-to-8b-base-long
-cluster:
-  num_nodes: 4
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh
index 0573d0bcba..52f17c2c28 100755
--- a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh
@@ -7,7 +7,7 @@ NUM_NODES=1
 STEPS_PER_RUN=20
 MAX_STEPS=20
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=240
+NUM_MINUTES=120
 # ===== END CONFIG =====
 
 exit_if_max_steps_reached
@@ -35,7 +35,7 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
         'data["train/loss"]["1"] < 1.5' \
-        'data["train/loss"]["20"] < 0.5' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'data["train/loss"]["20"] < 0.3' \
+        'data["validation/accuracy"]["20"] > 0.1' \
         'mean(data["timing/train/total_step_time"], -6, -1) < 1000'
 fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh
index 0b759078a3..cd4b635e72 100755
--- a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh
@@ -4,10 +4,10 @@ source $SCRIPT_DIR/common.env
 
 # ===== BEGIN CONFIG =====
 NUM_NODES=2
-STEPS_PER_RUN=100
-MAX_STEPS=500
+STEPS_PER_RUN=50
+MAX_STEPS=100
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=1200
+NUM_MINUTES=240
 # ===== END CONFIG =====
 
 exit_if_max_steps_reached
@@ -35,7 +35,7 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
         'data["train/loss"]["1"] < 1.5' \
-        'data["train/loss"]["100"] < 0.3' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'data["train/loss"]["100"] < 0.25' \
+        'data["validation/accuracy"]["100"] > 0.2' \
         'mean(data["timing/train/total_step_time"], -6, -1) < 1600'
 fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-instruct-2n8g-fsdp2tp2-seqpack.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.sh
similarity index 91%
rename from tests/test_suites/llm/distillation-qwen3-32b-to-4b-instruct-2n8g-fsdp2tp2-seqpack.v1.sh
rename to tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.sh
index a366f77ac6..df8d6daed7 100755
--- a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-instruct-2n8g-fsdp2tp2-seqpack.v1.sh
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.sh
@@ -7,7 +7,7 @@ NUM_NODES=2
 STEPS_PER_RUN=20
 MAX_STEPS=20
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=300
+NUM_MINUTES=120
 # ===== END CONFIG =====
 
 exit_if_max_steps_reached
@@ -35,7 +35,7 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
         'data["train/loss"]["1"] < 1.5' \
-        'data["train/loss"]["20"] < 0.5' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'data["train/loss"]["20"] < 0.3' \
+        'data["validation/accuracy"]["20"] > 0.1' \
         'mean(data["timing/train/total_step_time"], -6, -1) < 1000'
 fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh
index 463dc3c3b1..df8d6daed7 100755
--- a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh
@@ -7,7 +7,7 @@ NUM_NODES=2
 STEPS_PER_RUN=20
 MAX_STEPS=20
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=240
+NUM_MINUTES=120
 # ===== END CONFIG =====
 
 exit_if_max_steps_reached
@@ -35,7 +35,7 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
         'data["train/loss"]["1"] < 1.5' \
-        'data["train/loss"]["20"] < 0.5' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'data["train/loss"]["20"] < 0.3' \
+        'data["validation/accuracy"]["20"] > 0.1' \
         'mean(data["timing/train/total_step_time"], -6, -1) < 1000'
 fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-8b-base-2n8g-fsdp2tp2.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-8b-base-2n8g-fsdp2tp2.v1.sh
deleted file mode 100755
index 9705c8e155..0000000000
--- a/tests/test_suites/llm/distillation-qwen3-32b-to-8b-base-2n8g-fsdp2tp2.v1.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
-source $SCRIPT_DIR/common.env
-
-# ===== BEGIN CONFIG =====
-NUM_NODES=2
-STEPS_PER_RUN=100
-MAX_STEPS=100
-NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=240
-# ===== END CONFIG =====
-
-exit_if_max_steps_reached
-
-# Run the experiment
-cd $PROJECT_ROOT
-uv run examples/run_distillation_math.py \
-    --config $CONFIG_PATH \
-    distillation.max_num_steps=$MAX_STEPS \
-    logger.log_dir=$LOG_DIR \
-    logger.wandb_enabled=True \
-    logger.wandb.project=nemo-rl-distillation \
-    logger.wandb.name=$EXP_NAME \
-    logger.monitor_gpus=True \
-    logger.tensorboard_enabled=True \
-    checkpointing.enabled=True \
-    checkpointing.checkpoint_dir=$CKPT_DIR \
-    $@ \
-    2>&1 | tee $RUN_LOG
-
-# Convert tensorboard logs to json
-uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
-
-# Only run metrics if the target step is reached
-if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
-    uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 1.5' \
-        'data["train/loss"]["100"] < 0.5' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 80' \
-        'mean(data["timing/train/total_step_time"], -6, -1) < 500'
-fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-8b-base-4n8g-fsdp2tp8-long.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-8b-base-4n8g-fsdp2tp8-long.v1.sh
deleted file mode 100755
index 2686ebe281..0000000000
--- a/tests/test_suites/llm/distillation-qwen3-32b-to-8b-base-4n8g-fsdp2tp8-long.v1.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
-source $SCRIPT_DIR/common.env
-
-# ===== BEGIN CONFIG =====
-NUM_NODES=4
-STEPS_PER_RUN=100
-MAX_STEPS=500
-NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=1200
-# ===== END CONFIG =====
-
-exit_if_max_steps_reached
-
-# Run the experiment
-cd $PROJECT_ROOT
-uv run examples/run_distillation_math.py \
-    --config $CONFIG_PATH \
-    distillation.max_num_steps=$MAX_STEPS \
-    logger.log_dir=$LOG_DIR \
-    logger.wandb_enabled=True \
-    logger.wandb.project=nemo-rl-distillation \
-    logger.wandb.name=$EXP_NAME \
-    logger.monitor_gpus=True \
-    logger.tensorboard_enabled=True \
-    checkpointing.enabled=True \
-    checkpointing.checkpoint_dir=$CKPT_DIR \
-    $@ \
-    2>&1 | tee $RUN_LOG
-
-# Convert tensorboard logs to json
-uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
-
-# Only run metrics if the target step is reached
-if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
-    uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 1.5' \
-        'data["train/loss"]["100"] < 0.3' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 80' \
-        'mean(data["timing/train/total_step_time"], -6, -1) < 1600'
-fi
diff --git a/tests/test_suites/release.txt b/tests/test_suites/release.txt
index 83cf4009ba..2eddf0011d 100644
--- a/tests/test_suites/release.txt
+++ b/tests/test_suites/release.txt
@@ -40,14 +40,10 @@ tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh
 # Distillation #
 ################
 
-# 100 step 4b convergence
-tests/test_suites/llm/distillation-qwen3-32b-to-8b-base-2n8g-fsdp2tp2.v1.sh
- 
-# Long 4b and 8b convergence
+# Long 4b convergence
 tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh
-tests/test_suites/llm/distillation-qwen3-32b-to-8b-base-4n8g-fsdp2tp8-long.v1.sh
  
 # 20 step functional tests on dynamic batching, non-colocated and seqence packing features
 tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh
 tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh
-tests/test_suites/llm/distillation-qwen3-32b-to-4b-instruct-2n8g-fsdp2tp2-seqpack.v1.sh
+tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.sh